Source code for yatsm.io.readers

""" Helper functions for reading various types of imagery data
"""
import logging
import time

import numpy as np
from osgeo import gdal, gdal_array

from .stack_line_readers import bip_reader, gdal_reader
from .. import cache

logger = logging.getLogger('yatsm')


[docs]def get_image_attribute(image_filename): """ Use GDAL to open image and return some attributes Args: image_filename (str): image filename Returns: tuple: nrow (int), ncol (int), nband (int), NumPy datatype (type) """ try: image_ds = gdal.Open(image_filename, gdal.GA_ReadOnly) except Exception as e: logger.error('Could not open example image dataset ({f}): {e}' .format(f=image_filename, e=str(e))) raise nrow = image_ds.RasterYSize ncol = image_ds.RasterXSize nband = image_ds.RasterCount dtype = gdal_array.GDALTypeCodeToNumericTypeCode( image_ds.GetRasterBand(1).DataType) return (nrow, ncol, nband, dtype)
[docs]def read_image(image_filename, bands=None, dtype=None): """ Return raster image bands as a sequence of NumPy arrays Args: image_filename (str): Image filename bands (iterable, optional): A sequence of bands to read from image. If `bands` is None, function returns all bands in raster. Note that bands are indexed on 1 (default: None) dtype (np.dtype): NumPy datatype to use for image bands. If `dtype` is None, arrays are kept as the image datatype (default: None) Returns: list: list of NumPy arrays for each band specified Raises: IOError: raise IOError if bands specified are not contained within raster RuntimeError: raised if GDAL encounters errors """ try: ds = gdal.Open(image_filename, gdal.GA_ReadOnly) except: logger.error('Could not read image {i}'.format(i=image_filename)) raise if bands: if not all([b in range(1, ds.RasterCount + 1) for b in bands]): raise IOError('Image {i} ({n} bands) does not contain bands ' 'specified (requested {b})'. format(i=image_filename, n=ds.RasterCount, b=bands)) else: bands = range(1, ds.RasterCount + 1) if not dtype: dtype = gdal_array.GDALTypeCodeToNumericTypeCode( ds.GetRasterBand(1).DataType) output = [] for b in bands: output.append(ds.GetRasterBand(b).ReadAsArray().astype(dtype)) return output
[docs]def read_pixel_timeseries(files, px, py): """ Returns NumPy array containing timeseries values for one pixel Args: files (list): List of filenames to read from px (int): Pixel X location py (int): Pixel Y location Returns: np.ndarray: Array (nband x n_images) containing all timeseries data from one pixel """ nrow, ncol, nband, dtype = get_image_attribute(files[0]) if px < 0 or px >= ncol or py < 0 or py >= nrow: raise IndexError('Row/column {r}/{c} is outside of image ' '(nrow/ncol: {nrow}/{ncol})'. format(r=py, c=px, nrow=nrow, ncol=ncol)) Y = np.zeros((nband, len(files)), dtype=dtype) for i, f in enumerate(files): ds = gdal.Open(f, gdal.GA_ReadOnly) for b in range(nband): Y[b, i] = ds.GetRasterBand(b + 1).ReadAsArray(px, py, 1, 1) return Y
[docs]def read_line(line, images, image_IDs, dataset_config, ncol, nband, dtype, read_cache=False, write_cache=False, validate_cache=False): """ Reads in dataset from cache or images if required Args: line (int): line to read in from images images (list): list of image filenames to read from image_IDs (iterable): list image identifying strings dataset_config (dict): dictionary of dataset configuration options ncol (int): number of columns nband (int): number of bands dtype (type): NumPy datatype read_cache (bool, optional): try to read from cache directory (default: False) write_cache (bool, optional): try to to write to cache directory (default: False) validate_cache (bool, optional): validate that cache data come from same images specified in `images` (default: False) Returns: np.ndarray: 3D array of image data (nband, n_image, n_cols) """ start_time = time.time() read_from_disk = True cache_filename = cache.get_line_cache_name( dataset_config, len(images), line, nband) Y_shape = (nband, len(images), ncol) if read_cache: Y = cache.read_cache_file(cache_filename, image_IDs if validate_cache else None) if Y is not None and Y.shape == Y_shape: logger.debug('Read in Y from cache file') read_from_disk = False elif Y is not None and Y.shape != Y_shape: logger.warning( 'Data from cache file does not meet size requested ' '({y} versus {r})'.format(y=Y.shape, r=Y_shape)) if read_from_disk: # Read in Y if dataset_config['use_bip_reader']: # Use BIP reader logger.debug('Reading in data from disk using BIP reader') Y = bip_reader.read_row(images, line) else: # Read in data just using GDAL logger.debug('Reading in data from disk using GDAL') Y = gdal_reader.read_row(images, line) logger.debug('Took {s}s to read in the data'.format( s=round(time.time() - start_time, 2))) if write_cache and read_from_disk: logger.debug('Writing Y data to cache file {f}'.format( f=cache_filename)) cache.write_cache_file(cache_filename, Y, image_IDs) return Y