Source code for stactools.testing.test_data

import os
import shutil
from dataclasses import dataclass
from tempfile import TemporaryDirectory
from typing import Any, Dict, Optional, Union
from zipfile import ZipFile

import fsspec
import requests


[docs] @dataclass class ExternalData: """External data configurations for fetching and storing remote files. Args: url (str): URL at which the external data is found. compress (str): Compression method that has been used on external data. If provided, data is extracted after it is fetched. Only zip is supported. Defaults to None. s3 (Dict[str, Any]): Dictionary containing keyword arguments to use when instantiating ``s3fs.S3FileSystem``. Defaults to None. planetary_computer (bool): Whether external data is on planetary computer and needs to be signed. Defaults to False. """ url: str compress: Optional[str] = None s3: Optional[Dict[str, Any]] = None planetary_computer: bool = False
[docs] @dataclass class TestData: """A structure for getting paths to test data files, and fetching external data for local testing. Initializing this from, e.g., ``/home/user/my-package/tests/__init__.py``: .. code-block:: python test_data = TestData(__file__) Means that ``get_path`` will be relative to ``/home/user/my-package/tests``. .. code-block:: python test_data.get_path("data-files/basic") # "/home/user/my-package/tests/data-files/basic" When caching external data that base path is appended with ``test_data.external_subpath`` which by default is 'data-files/external'. For instance with the following external data configuration the external data file will be fetched from the URL, extracted from its zip file and locally stored at: ``/home/user/my-package/tests/data-files/external/AST_L1T_00305032000040446_20150409135350_78838.hdf`` .. code-block:: python test_data.external_data = { 'AST_L1T_00305032000040446_20150409135350_78838.hdf': { 'url': ('https://ai4epublictestdata.blob.core.windows.net/' 'stactools/aster/AST_L1T_00305032000040446_20150409135350_78838.zip'), 'compress': 'zip' } } test_data.get_external_data("AST_L1T_00305032000040446_20150409135350_78838.hdf") Args: path (str): The path to any file in the directory where data is (or will be) stored. The directory information is taken from this path and used as the base for relative paths for the local data. It is stored on the class as ``self.base_path`` external_data (Dict[str, ExternalData]): External data configurations for fetching and storing remote files. This is defined as a dictionary with the following structure: the key is the relative path (relative to ``self.base_path / self.external_subpath``) for cached data after it is fetched from remote and the value is the configuration as defined in :class:`ExternalData`. external_subpath (str): The subpath under ``self.base_path`` that is used for storing external data files. Defaults to 'data-files/external' """ __test__ = False def __init__( self, path: str, external_data: Dict[str, Union[Dict[str, Any], ExternalData]] = {}, external_subpath: str = "data-files/external", ) -> None: self.base_path = os.path.abspath(os.path.dirname(path)) self.external_subpath = external_subpath self.external_data = external_data
[docs] def get_path(self, rel_path: str) -> str: """Returns an absolute path to a local data file. Args: rel_path (str): The relative path to the test data file. The path is assumed to be relative to ``self.base_path``. Returns: str: The absolute path joining ``self.base_path`` and ``rel_path`` """ return os.path.join(self.base_path, rel_path)
[docs] def get_external_data(self, rel_path: str) -> str: """Returns the path to the local cached version of the external data. If data is not yet cached, this method fetches it, caches it, then returns the path to the local cached version. Args: rel_path (str): This is both the filename that the local data will be stored at _and_ a key in the ``external_data`` dictionary where the corresponding value is the configuration information for the external data. Returns: str: The absolute path to the local cached version of the external data file. """ path = self.get_path(os.path.join(self.external_subpath, rel_path)) if not os.path.exists(path): config = self.external_data.get(rel_path) if config is None: raise Exception( f"Local path {path} does not exist and there is no key " f"in ``external_data`` that matches {rel_path}" ) print(f"Downloading external test data {rel_path}...") os.makedirs(os.path.dirname(path), exist_ok=True) if not isinstance(config, ExternalData): config = ExternalData(**config) if config.s3: try: import s3fs except ImportError as e: print( "Trying to download external test data via s3, " "but s3fs is not installed and the download requires " "configuring the s3fs filesystem. Install stactools " "with s3fs via `pip install stactools[s3]` and try again." ) raise (e) s3 = s3fs.S3FileSystem(**config.s3) with s3.open(config.url) as f: data = f.read() elif config.planetary_computer: href = config.url r = requests.get( "https://planetarycomputer.microsoft.com/api/sas/v1/sign?" f"href={href}" ) r.raise_for_status() signed_href = r.json()["href"] with fsspec.open(signed_href) as f: data = f.read() else: with fsspec.open(config.url) as f: data = f.read() if config.compress == "zip": with TemporaryDirectory() as tmp_dir: tmp_path = os.path.join(tmp_dir, "file.zip") with open(tmp_path, "wb") as f: f.write(data) z = ZipFile(tmp_path) name = z.namelist()[0] extracted_path = z.extract(name) shutil.move(extracted_path, path) else: with open(path, "wb") as f: f.write(data) return path