Source code for stactools.core.utils.convert
"""Format conversion utilities."""
import os
from typing import Any, Dict, List, Optional, Tuple, cast
import rasterio
import rasterio.shutil
from rasterio.errors import DriverRegistrationError
from stactools.core import utils
DEFAULT_PROFILE = {
"compress": "deflate",
"driver": "COG",
"blocksize": 512,
}
"""The default profile to use when writing Cloud-Optimized GeoTIFFs (COGs)."""
def assert_cog_driver_is_enabled() -> None:
if not utils.gdal_driver_is_enabled("COG"):
raise DriverRegistrationError(
"GDAL's COG driver is not enabled, make sure you're using GDAL >= 3.1"
)
[docs]
def cogify(
infile: str,
outfile: str,
band: Optional[int] = None,
profile: Optional[Dict[str, Any]] = None,
) -> None:
"""Creates a Cloud-Optimized GeoTIFF (COG) from a GDAL-readable file.
A band number can optionally be provided to extract a single band from a
multiband file. To create COGs from subdatasets, use
:py:meth:`stactools.core.utils.convert.cogify_subdatasets`.
Args:
infile (str): The input file.
outfile (str): The output COG to be written.
band (Optional[int]): The band number in the input file to extract.
If not provided, a multi-band COG will be created.
profile (Optional[dict[str, Any]]):
An optional profile to use on the
output file. If not provided,
:py:const:`stactools.core.utils.convert.DEFAULT_PROFILE` will be
used.
"""
assert_cog_driver_is_enabled()
src = rasterio.open(infile)
dest_profile = DEFAULT_PROFILE.copy()
dest_profile.update(
{
"width": src.width,
"height": src.height,
"crs": src.crs,
"transform": src.transform,
}
)
if profile:
dest_profile.update(profile)
# If a band number was provided, create a single-band COG
if band:
single_band = src.read(band)
dest_profile.update({"count": 1, "dtype": single_band.dtype})
with rasterio.open(outfile, "w", **dest_profile) as dest:
dest.write(single_band, 1)
# If no band numbers were provided, create a multi-band COG
else:
dest_profile.update({"count": src.count, "dtype": src.dtypes[0]})
rasterio.shutil.copy(infile, outfile, **dest_profile)
[docs]
def cogify_subdatasets(
infile: str, outdir: str, subdataset_names: Optional[List[str]] = None
) -> Tuple[List[str], List[str]]:
"""Creates Cloud-Optimized GeoTIFFs for all subdatasets in a multi-dataset
raster file.
The created files will be named the same as the source file, with a
``_SUBDATASET`` suffix. E.g. if the source file is named ``foo.hdf`` and
the subdataset is named ``bar``, the output COG will be named
``foo_bar.tif``. Only 2D (and not 3D) subdatasets are supported.
Args:
infile (str): The input file containing subdatasets.
outdir (str): The output directory where the COGs will be created.
Returns:
Tuple[List[str], List[str]]:
A two tuple (paths, names):
- The first element is a list of the output COG paths
- The second element is a list of subdataset names
"""
assert_cog_driver_is_enabled()
with rasterio.open(infile) as dataset:
subdatasets = cast(List[str], dataset.subdatasets)
base_file_name = os.path.splitext(os.path.basename(infile))[0]
paths = []
names = []
for subdataset in subdatasets:
with rasterio.open(subdataset) as subd:
if len(subd.shape) != 2:
continue
parts = subdataset.split(":")
subdataset_name = parts[-1]
if subdataset_names and subdataset_name not in subdataset_names:
continue
sanitized_subdataset_name = (
subdataset_name.strip()
.strip("/")
.replace(" ", "_")
.replace("/", "_")
)
names.append(sanitized_subdataset_name)
file_name = f"{base_file_name}_{sanitized_subdataset_name}.tif"
outfile = os.path.join(outdir, file_name)
destination_profile = DEFAULT_PROFILE.copy()
rasterio.shutil.copy(subdataset, outfile, **destination_profile)
paths.append(outfile)
return (paths, names)