"""Copy valid pixels from input files to an output file.""" from contextlib import ExitStack, contextmanager import logging import os import math import cmath import warnings import numbers import numpy as np import rasterio from rasterio.enums import Resampling from rasterio.errors import ( MergeError, RasterioDeprecationWarning, RasterioError, WindowError, ) from rasterio.io import DatasetWriter from rasterio import windows from rasterio.transform import Affine from rasterio.windows import subdivide logger = logging.getLogger(__name__) def copy_first(merged_data, new_data, merged_mask, new_mask, **kwargs): """Returns the first available pixel.""" mask = np.empty_like(merged_mask, dtype="bool") np.logical_not(new_mask, out=mask) np.logical_and(merged_mask, mask, out=mask) np.copyto(merged_data, new_data, where=mask, casting="unsafe") def copy_last(merged_data, new_data, merged_mask, new_mask, **kwargs): """Returns the last available pixel.""" mask = np.empty_like(merged_mask, dtype="bool") np.logical_not(new_mask, out=mask) np.copyto(merged_data, new_data, where=mask, casting="unsafe") def copy_min(merged_data, new_data, merged_mask, new_mask, **kwargs): """Returns the minimum value pixel.""" mask = np.empty_like(merged_mask, dtype="bool") np.logical_or(merged_mask, new_mask, out=mask) np.logical_not(mask, out=mask) np.minimum(merged_data, new_data, out=merged_data, where=mask, casting="unsafe") np.logical_not(new_mask, out=mask) np.logical_and(merged_mask, mask, out=mask) np.copyto(merged_data, new_data, where=mask, casting="unsafe") def copy_max(merged_data, new_data, merged_mask, new_mask, **kwargs): """Returns the maximum value pixel.""" mask = np.empty_like(merged_mask, dtype="bool") np.logical_or(merged_mask, new_mask, out=mask) np.logical_not(mask, out=mask) np.maximum(merged_data, new_data, out=merged_data, where=mask, casting="unsafe") np.logical_not(new_mask, out=mask) np.logical_and(merged_mask, mask, out=mask) np.copyto(merged_data, new_data, where=mask, casting="unsafe") def copy_sum(merged_data, new_data, merged_mask, new_mask, **kwargs): """Returns the sum of all pixel values.""" mask = np.empty_like(merged_mask, dtype="bool") np.logical_or(merged_mask, new_mask, out=mask) np.logical_not(mask, out=mask) np.add(merged_data, new_data, out=merged_data, where=mask, casting="unsafe") np.logical_not(new_mask, out=mask) np.logical_and(merged_mask, mask, out=mask) np.copyto(merged_data, new_data, where=mask, casting="unsafe") def copy_count(merged_data, new_data, merged_mask, new_mask, **kwargs): """Returns the count of valid pixels.""" mask = np.empty_like(merged_mask, dtype="bool") np.logical_or(merged_mask, new_mask, out=mask) np.logical_not(mask, out=mask) np.add(merged_data, mask, out=merged_data, where=mask, casting="unsafe") np.logical_not(new_mask, out=mask) np.logical_and(merged_mask, mask, out=mask) np.copyto(merged_data, mask, where=mask, casting="unsafe") MERGE_METHODS = { "first": copy_first, "last": copy_last, "min": copy_min, "max": copy_max, "sum": copy_sum, "count": copy_count, } def merge( sources, bounds=None, res=None, nodata=None, dtype=None, precision=None, indexes=None, output_count=None, resampling=Resampling.nearest, method="first", target_aligned_pixels=False, mem_limit=64, use_highest_res=False, masked=False, dst_path=None, dst_kwds=None, ): """Copy valid pixels from input files to an output file. All files must have the same number of bands, data type, and coordinate reference system. Rotated, flipped, or upside-down rasters cannot be merged. Input files are merged in their listed order using the reverse painter's algorithm (default) or another method. If the output file exists, its values will be overwritten by input values. Geospatial bounds and resolution of a new output file in the units of the input file coordinate reference system may be provided and are otherwise taken from the first input file. Parameters ---------- sources : list A sequence of dataset objects opened in 'r' mode or Path-like objects. bounds: tuple, optional Bounds of the output image (left, bottom, right, top). If not set, bounds are determined from bounds of input rasters. res: tuple, optional Output resolution in units of coordinate reference system. If not set, a source resolution will be used. If a single value is passed, output pixels will be square. use_highest_res: bool, optional. Default: False. If True, the highest resolution of all sources will be used. If False, the first source's resolution will be used. nodata: float, optional nodata value to use in output file. If not set, uses the nodata value in the first input raster. masked: bool, optional. Default: False. If True, return a masked array. Note: nodata is always set in the case of file output. dtype: numpy.dtype or string dtype to use in outputfile. If not set, uses the dtype value in the first input raster. precision: int, optional This parameters is unused, deprecated in rasterio 1.3.0, and will be removed in version 2.0.0. indexes : list of ints or a single int, optional bands to read and merge output_count: int, optional If using callable it may be useful to have additional bands in the output in addition to the indexes specified for read resampling : Resampling, optional Resampling algorithm used when reading input files. Default: `Resampling.nearest`. method : str or callable pre-defined method: first: reverse painting last: paint valid new on top of existing min: pixel-wise min of existing and new max: pixel-wise max of existing and new or custom callable with signature: merged_data : array_like array to update with new_data new_data : array_like data to merge same shape as merged_data merged_mask, new_mask : array_like boolean masks where merged/new data pixels are invalid same shape as merged_data index: int index of the current dataset within the merged dataset collection roff: int row offset in base array coff: int column offset in base array target_aligned_pixels : bool, optional Whether to adjust output image bounds so that pixel coordinates are integer multiples of pixel size, matching the ``-tap`` options of GDAL utilities. Default: False. mem_limit : int, optional Process merge output in chunks of mem_limit MB in size. dst_path : str or PathLike, optional Path of output dataset dst_kwds : dict, optional Dictionary of creation options and other parameters that will be overlaid on the profile of the output dataset. Returns ------- tuple Two elements: dest: numpy.ndarray Contents of all input rasters in single array out_transform: affine.Affine() Information for mapping pixel coordinates in `dest` to another coordinate system Raises ------ MergeError When sources cannot be merged due to incompatibility between them or limitations of the tool. """ if precision is not None: warnings.warn( "The precision parameter is unused, deprecated, and will be removed in 2.0.0.", RasterioDeprecationWarning, ) if method in MERGE_METHODS: copyto = MERGE_METHODS[method] elif callable(method): copyto = method else: raise ValueError( "Unknown method {}, must be one of {} or callable".format( method, list(MERGE_METHODS.keys()) ) ) # Create a dataset_opener object to use in several places in this function. if isinstance(sources[0], (str, os.PathLike)): dataset_opener = rasterio.open else: @contextmanager def nullcontext(obj): try: yield obj finally: pass dataset_opener = nullcontext dst = None with ExitStack() as exit_stack: with dataset_opener(sources[0]) as first: first_profile = first.profile first_crs = first.crs best_res = first.res first_nodataval = first.nodatavals[0] nodataval = first_nodataval dt = first.dtypes[0] if indexes is None: src_count = first.count elif isinstance(indexes, int): src_count = indexes else: src_count = len(indexes) try: first_colormap = first.colormap(1) except ValueError: first_colormap = None if not output_count: output_count = src_count # Extent from option or extent of all inputs if bounds: dst_w, dst_s, dst_e, dst_n = bounds else: # scan input files xs = [] ys = [] for i, dataset in enumerate(sources): with dataset_opener(dataset) as src: src_transform = src.transform if use_highest_res: best_res = min( best_res, src.res, key=lambda x: x if isinstance(x, numbers.Number) else math.sqrt(x[0] ** 2 + x[1] ** 2), ) # The merge tool requires non-rotated rasters with origins at their # upper left corner. This limitation may be lifted in the future. if not src_transform.is_rectilinear: raise MergeError( "Rotated, non-rectilinear rasters cannot be merged." ) if src_transform.a < 0: raise MergeError( 'Rasters with negative pixel width ("flipped" rasters) cannot be merged.' ) if src_transform.e > 0: raise MergeError( 'Rasters with negative pixel height ("upside down" rasters) cannot be merged.' ) left, bottom, right, top = src.bounds xs.extend([left, right]) ys.extend([bottom, top]) dst_w, dst_s, dst_e, dst_n = min(xs), min(ys), max(xs), max(ys) # Resolution/pixel size if not res: res = best_res elif isinstance(res, numbers.Number): res = (res, res) elif len(res) == 1: res = (res[0], res[0]) if target_aligned_pixels: dst_w = math.floor(dst_w / res[0]) * res[0] dst_e = math.ceil(dst_e / res[0]) * res[0] dst_s = math.floor(dst_s / res[1]) * res[1] dst_n = math.ceil(dst_n / res[1]) * res[1] # Compute output array shape. We guarantee it will cover the output # bounds completely output_width = int(round((dst_e - dst_w) / res[0])) output_height = int(round((dst_n - dst_s) / res[1])) output_transform = Affine.translation(dst_w, dst_n) * Affine.scale( res[0], -res[1] ) if dtype is not None: dt = dtype logger.debug("Set dtype: %s", dt) if nodata is not None: nodataval = nodata logger.debug("Set nodataval: %r", nodataval) inrange = False if nodataval is not None: # Only fill if the nodataval is within dtype's range if np.issubdtype(dt, np.integer): info = np.iinfo(dt) inrange = info.min <= nodataval <= info.max else: if cmath.isfinite(nodataval): info = np.finfo(dt) inrange = info.min <= nodataval <= info.max nodata_dt = np.min_scalar_type(nodataval) inrange = inrange & np.can_cast(nodata_dt, dt) else: inrange = True if not inrange: warnings.warn( f"Ignoring nodata value. The nodata value, {nodataval}, cannot safely be represented " f"in the chosen data type, {dt}. Consider overriding it " "using the --nodata option for better results. " "Falling back to first source's nodata value." ) nodataval = first_nodataval else: logger.debug("Set nodataval to 0") nodataval = 0 # When dataset output is selected, we might need to create one # and will also provide the option of merging by chunks. dout_window = windows.Window(0, 0, output_width, output_height) if dst_path is not None: if isinstance(dst_path, DatasetWriter): dst = dst_path else: out_profile = first_profile out_profile.update(**(dst_kwds or {})) out_profile["transform"] = output_transform out_profile["height"] = output_height out_profile["width"] = output_width out_profile["count"] = output_count out_profile["dtype"] = dt if nodata is not None: out_profile["nodata"] = nodata dst = rasterio.open(dst_path, "w", **out_profile) exit_stack.enter_context(dst) max_pixels = mem_limit * 1.0e6 / (np.dtype(dt).itemsize * output_count) if output_width * output_height < max_pixels: chunks = [dout_window] else: n = math.floor(math.sqrt(max_pixels)) chunks = subdivide(dout_window, n, n) else: chunks = [dout_window] def _intersect_bounds(bounds1, bounds2, transform): """Based on gdal_merge.py.""" int_w = max(bounds1[0], bounds2[0]) int_e = min(bounds1[2], bounds2[2]) if int_w >= int_e: raise ValueError if transform.e < 0: # north up int_s = max(bounds1[1], bounds2[1]) int_n = min(bounds1[3], bounds2[3]) if int_s >= int_n: raise ValueError else: int_s = min(bounds1[1], bounds2[1]) int_n = max(bounds1[3], bounds2[3]) if int_n >= int_s: raise ValueError return int_w, int_s, int_e, int_n for chunk in chunks: dst_w, dst_s, dst_e, dst_n = windows.bounds(chunk, output_transform) dest = np.zeros((output_count, chunk.height, chunk.width), dtype=dt) if inrange: dest.fill(nodataval) # From gh-2221 chunk_bounds = windows.bounds(chunk, output_transform) chunk_transform = windows.transform(chunk, output_transform) def win_align(window): """Equivalent to rounding both offsets and lengths. This method computes offsets, width, and height that are useful for compositing arrays into larger arrays and datasets without seams. It is used by Rasterio's merge tool and is based on the logic in gdal_merge.py. Returns ------- Window """ row_off = math.floor(window.row_off + 0.1) col_off = math.floor(window.col_off + 0.1) height = math.floor(window.height + 0.5) width = math.floor(window.width + 0.5) return windows.Window(col_off, row_off, width, height) for idx, dataset in enumerate(sources): with dataset_opener(dataset) as src: # Intersect source bounds and tile bounds if first_crs != src.crs: raise RasterioError(f"CRS mismatch with source: {dataset}") try: ibounds = _intersect_bounds( src.bounds, chunk_bounds, chunk_transform ) sw = windows.from_bounds(*ibounds, src.transform) cw = windows.from_bounds(*ibounds, chunk_transform) except (ValueError, WindowError): logger.info( "Skipping source: src=%r, bounds=%r", src, src.bounds ) continue cw = win_align(cw) rows, cols = cw.toslices() region = dest[:, rows, cols] if cmath.isnan(nodataval): region_mask = np.isnan(region) elif not np.issubdtype(region.dtype, np.integer): region_mask = np.isclose(region, nodataval) else: region_mask = region == nodataval data = src.read( out_shape=(src_count, cw.height, cw.width), indexes=indexes, masked=True, window=sw, resampling=resampling, ) copyto( region, data, region_mask, data.mask, index=idx, roff=cw.row_off, coff=cw.col_off, ) if dst: dw = windows.from_bounds(*chunk_bounds, output_transform) dw = win_align(dw) dst.write(dest, window=dw) if dst is None: if masked: dest = np.ma.masked_equal(dest, nodataval, copy=False) return dest, output_transform else: if first_colormap: dst.write_colormap(1, first_colormap) dst.close()