def create_tasks(args): x_list = range(args.x_min, args.x_max + 1) y_list = range(args.y_min, args.y_max + 1) dataset_types = [args.dataset_type] if args.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) from itertools import product if args.file_per_statistic: for (season, band, statistic) in product(args.get_seasons(), args.bands, args.statistics): acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(args.acq_min, args.acq_max, season, seasons=SEASONS, extend=True) for cell in list_cells_as_list(x=x_list, y=y_list, satellites=args.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): yield Arg25EpochStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=season, epochs = list(args.get_epochs()), satellites=args.satellites, dataset_type=args.dataset_type, band=band, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, tidal_workflow=args.tidal_workflow, tidal_ifile=args.tidal_ifile, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistic = statistic, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory) return for (acq_min, acq_max), season in product(args.get_epochs(), args.get_seasons()): _log.debug("acq_min=[%s] acq_max=[%s] season=[%s]", acq_min, acq_max, season.name) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, season, seasons=SEASONS, extend=True) _log.debug("\tacq_min_extended=[%s], acq_max_extended=[%s], criteria=[%s]", acq_min_extended, acq_max_extended, criteria) for cell in list_cells_as_list(x=x_list, y=y_list, satellites=args.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): _log.debug("\t%3d %4d", cell.x, cell.y) #yield args.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season) _log.debug("Creating task for %s %s %s %s %s", cell.x, cell.y, acq_min, acq_max, season) yield Arg25BandStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=season, satellites=args.satellites, dataset_type=args.dataset_type, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory)
def create_tasks(args): x_list = range(args.x_min, args.x_max + 1) y_list = range(args.y_min, args.y_max + 1) dataset_types = [args.dataset_type] if args.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) from itertools import product if args.file_per_statistic: for (season, band, statistic) in product(args.get_seasons(), args.bands, args.statistics): acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(args.acq_min, args.acq_max, season, seasons=SEASONS, extend=True) for cell in list_cells_as_list(x=x_list, y=y_list, satellites=args.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): yield Arg25EpochStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=season, epochs = list(args.get_epochs()), satellites=args.satellites, dataset_type=args.dataset_type, band=band, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistic = statistic, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory) return for (acq_min, acq_max), season in product(args.get_epochs(), args.get_seasons()): _log.debug("acq_min=[%s] acq_max=[%s] season=[%s]", acq_min, acq_max, season.name) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, season, seasons=SEASONS, extend=True) _log.debug("\tacq_min_extended=[%s], acq_max_extended=[%s], criteria=[%s]", acq_min_extended, acq_max_extended, criteria) for cell in list_cells_as_list(x=x_list, y=y_list, satellites=args.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): _log.debug("\t%3d %4d", cell.x, cell.y) #yield args.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season) _log.debug("Creating task for %s %s %s %s %s", cell.x, cell.y, acq_min, acq_max, season) yield Arg25BandStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=season, satellites=args.satellites, dataset_type=args.dataset_type, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory)
def requires(self): dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) for (acq_min, acq_max) in self.epochs: _log.debug("acq_min=[%s] acq_max=[%s] season=[%s]", acq_min, acq_max, self.season.name) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, self.season, seasons=SEASONS, extend=True) _log.debug("\tacq_min_extended=[%s], acq_max_extended=[%s], criteria=[%s]", acq_min_extended, acq_max_extended, criteria) for cell in list_cells_as_list(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): _log.debug("\t%3d %4d", cell.x, cell.y) # yield args.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season) _log.debug("Creating task for %s %s %s %s %s", cell.x, cell.y, acq_min, acq_max, self.season) yield Arg25BandStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=self.season, satellites=args.satellites, dataset_type=args.dataset_type, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, tidal_workflow=args.tidal_workflow, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory)
def requires(self): dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) for (acq_min, acq_max) in self.epochs: _log.debug("acq_min=[%s] acq_max=[%s] season=[%s]", acq_min, acq_max, self.season.name) acq_min_extended, acq_max_extended, criteria = build_season_date_criteria(acq_min, acq_max, self.season, seasons=SEASONS, extend=True) _log.debug("\tacq_min_extended=[%s], acq_max_extended=[%s], criteria=[%s]", acq_min_extended, acq_max_extended, criteria) for cell in list_cells_as_list(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min_extended, acq_max=acq_max_extended, dataset_types=dataset_types, include=criteria): _log.debug("\t%3d %4d", cell.x, cell.y) # yield args.create_task(x=cell.x, y=cell.y, acq_min=acq_min, acq_max=acq_max, season=season) _log.debug("Creating task for %s %s %s %s %s", cell.x, cell.y, acq_min, acq_max, self.season) yield Arg25BandStatisticsTask(x=cell.x, y=cell.y, acq_min=acq_min_extended, acq_max=acq_max_extended, season=self.season, satellites=args.satellites, dataset_type=args.dataset_type, bands=args.bands, mask_pqa_apply=args.mask_pqa_apply, mask_pqa_mask=args.mask_pqa_mask, x_chunk_size=args.x_chunk_size, y_chunk_size=args.y_chunk_size, statistics=args.statistics, interpolation=args.interpolation, output_directory=args.output_directory)
def test_list_cells_120_020_2005_ls578(config=None): cells = list_cells_as_list(x=[TEST_CELL_X], y=[TEST_CELL_Y], acq_min=parse_date_min(TEST_YEAR_STR), acq_max=parse_date_max(TEST_YEAR_STR), satellites=[Satellite.LS5, Satellite.LS7, Satellite.LS8], dataset_types=[DatasetType.ARG25], config=config) assert(cells and len(list(cells)) > 0) for cell in cells: _log.info("Found cell xy = %s", cell.xy) assert(cell.x == TEST_CELL_X and cell.y == TEST_CELL_Y and cell.xy == (TEST_CELL_X, TEST_CELL_Y))
def go(self): import numpy from datacube.api.query import list_cells_as_list, list_tiles_as_list from datacube.config import Config x_min, x_max, y_max, y_min = self.extract_bounds_from_vector() _log.debug("The bounds are [%s]", (x_min, x_max, y_min, y_max)) cells_vector = self.extract_cells_from_vector() _log.debug("Intersecting cells_vector are [%d] [%s]", len(cells_vector), cells_vector) config = Config() _log.debug(config.to_str()) x_list = range(x_min, x_max + 1) y_list = range(y_min, y_max + 1) _log.debug("x = [%s] y=[%s]", x_list, y_list) cells_db = list() for cell in list_cells_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]): cells_db.append((cell.x, cell.y)) _log.debug("Cells from DB are [%d] [%s]", len(cells_db), cells_db) cells = intersection(cells_vector, cells_db) _log.debug("Combined cells are [%d] [%s]", len(cells), cells) for (x, y) in cells: _log.info("Processing cell [%3d/%4d]", x, y) tiles = list_tiles_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]) _log.info("There are [%d] tiles", len(tiles)) if self.list_only: for tile in tiles: _log.info("Would process [%s]", tile.datasets[self.dataset_type].path) continue # Calculate the mask for the cell mask_aoi = self.get_mask_aoi_cell(x, y) pixel_count = 4000 * 4000 pixel_count_aoi = (mask_aoi == False).sum() _log.debug("mask_aoi is [%s]\n[%s]", numpy.shape(mask_aoi), mask_aoi) metadata = None with self.get_output_file() as csv_file: csv_writer = csv.writer(csv_file) import operator header = reduce(operator.add, [["DATE", "INSTRUMENT", "# PIXELS", "# PIXELS IN AOI"]] + [ ["%s - # DATA PIXELS" % band_name, "%s - # DATA PIXELS AFTER PQA" % band_name, "%s - # DATA PIXELS AFTER PQA WOFS" % band_name, "%s - # DATA PIXELS AFTER PQA WOFS AOI" % band_name, "%s - MIN" % band_name, "%s - MAX" % band_name, "%s - MEAN" % band_name] for band_name in self.bands]) csv_writer.writerow(header) for tile in tiles: _log.info("Processing tile [%s]", tile.datasets[self.dataset_type].path) if self.list_only: continue if not metadata: metadata = get_dataset_metadata(tile.datasets[self.dataset_type]) # Apply PQA if specified pqa = None mask_pqa = None if self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets: pqa = tile.datasets[DatasetType.PQ25] mask_pqa = get_mask_pqa(pqa, self.mask_pqa_mask) _log.debug("mask_pqa is [%s]\n[%s]", numpy.shape(mask_pqa), mask_pqa) # Apply WOFS if specified wofs = None mask_wofs = None if self.mask_wofs_apply and DatasetType.WATER in tile.datasets: wofs = tile.datasets[DatasetType.WATER] mask_wofs = get_mask_wofs(wofs, self.mask_wofs_mask) _log.debug("mask_wofs is [%s]\n[%s]", numpy.shape(mask_wofs), mask_wofs) dataset = tile.datasets[self.dataset_type] bands = [] dataset_band_names = [b.name for b in dataset.bands] for b in self.bands: if b in dataset_band_names: bands.append(dataset.bands[b]) data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands) _log.debug("data is [%s]\n[%s]", numpy.shape(data), data) pixel_count_data = dict() pixel_count_data_pqa = dict() pixel_count_data_pqa_wofs = dict() pixel_count_data_pqa_wofs_aoi = dict() mmin = dict() mmax = dict() mmean = dict() for band_name in self.bands: # Add "zeroed" entries for non-present bands - should only be if outputs for those bands have been explicitly requested if band_name not in dataset_band_names: pixel_count_data[band_name] = 0 pixel_count_data_pqa[band_name] = 0 pixel_count_data_pqa_wofs[band_name] = 0 pixel_count_data_pqa_wofs_aoi[band_name] = 0 mmin[band_name] = numpy.ma.masked mmax[band_name] = numpy.ma.masked mmean[band_name] = numpy.ma.masked continue band = dataset.bands[band_name] data[band] = numpy.ma.masked_equal(data[band], NDV) _log.debug("masked data is [%s] [%d]\n[%s]", numpy.shape(data), numpy.ma.count(data), data) pixel_count_data[band_name] = numpy.ma.count(data[band]) if pqa: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_pqa) _log.debug("PQA masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa[band_name] = numpy.ma.count(data[band]) if wofs: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_wofs) _log.debug("WOFS masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs[band_name] = numpy.ma.count(data[band]) data[band].mask = numpy.ma.mask_or(data[band].mask, mask_aoi) _log.debug("AOI masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs_aoi[band_name] = numpy.ma.count(data[band]) mmin[band_name] = numpy.ma.min(data[band]) mmax[band_name] = numpy.ma.max(data[band]) mmean[band_name] = numpy.ma.mean(data[band]) # Convert the mean to an int...taking into account masking.... if not numpy.ma.is_masked(mmean[band_name]): mmean[band_name] = mmean[band_name].astype(numpy.int16) pixel_count_data_pqa_wofs_aoi_all_bands = reduce(operator.add, pixel_count_data_pqa_wofs_aoi.itervalues()) if pixel_count_data_pqa_wofs_aoi_all_bands == 0 and not self.output_no_data: _log.info("Skipping dataset with no non-masked data values in ANY band") continue row = reduce( operator.add, [[tile.end_datetime, self.decode_satellite_as_instrument(tile.datasets[self.dataset_type].satellite), pixel_count, pixel_count_aoi]] + [[pixel_count_data[band_name], pixel_count_data_pqa[band_name], pixel_count_data_pqa_wofs[band_name], pixel_count_data_pqa_wofs_aoi[band_name], mmin[band_name], mmax[band_name], mmean[band_name]] for band_name in self.bands]) csv_writer.writerow(row)
def go(self): import numpy from datacube.api.query import list_cells_as_list, list_tiles_as_list from datacube.config import Config # Verify that all the requested satellites have the same band combinations dataset_bands = get_bands(self.dataset_type, self.satellites[0]) _log.info("dataset bands is [%s]", " ".join([b.name for b in dataset_bands])) for satellite in self.satellites: if dataset_bands != get_bands(self.dataset_type, satellite): _log.error("Satellites [%s] have differing bands", " ".join([satellite.name for satellite in self.satellites])) raise Exception("Satellites with different band combinations selected") bands = [] dataset_bands_list = list(dataset_bands) if not self.bands: bands = dataset_bands_list else: for b in self.bands: bands.append(dataset_bands_list[b - 1]) _log.info("Using bands [%s]", " ".join(band.name for band in bands)) x_min, x_max, y_max, y_min = self.extract_bounds_from_vector() _log.debug("The bounds are [%s]", (x_min, x_max, y_min, y_max)) cells_vector = self.extract_cells_from_vector() _log.debug("Intersecting cells_vector are [%d] [%s]", len(cells_vector), cells_vector) config = Config(os.path.expanduser("~/.datacube/config")) _log.debug(config.to_str()) x_list = range(x_min, x_max + 1) y_list = range(y_min, y_max + 1) _log.debug("x = [%s] y=[%s]", x_list, y_list) cells_db = list() for cell in list_cells_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]): cells_db.append((cell.x, cell.y)) _log.debug("Cells from DB are [%d] [%s]", len(cells_db), cells_db) cells = intersection(cells_vector, cells_db) _log.debug("Combined cells are [%d] [%s]", len(cells), cells) for (x, y) in cells: _log.info("Processing cell [%3d/%4d]", x, y) tiles = list_tiles_as_list(x=x_list, y=y_list, acq_min=self.acq_min, acq_max=self.acq_max, satellites=[satellite for satellite in self.satellites], dataset_types=[self.dataset_type]) _log.info("There are [%d] tiles", len(tiles)) if self.list_only: for tile in tiles: _log.info("Would process [%s]", tile.datasets[self.dataset_type].path) continue # Calculate the mask for the cell mask_aoi = self.get_mask_aoi_cell(x, y) pixel_count = 4000 * 4000 pixel_count_aoi = (mask_aoi == False).sum() _log.debug("mask_aoi is [%s]\n[%s]", numpy.shape(mask_aoi), mask_aoi) metadata = None with self.get_output_file() as csv_file: csv_writer = csv.writer(csv_file) import operator header = reduce(operator.add, [["DATE", "INSTRUMENT", "# PIXELS", "# PIXELS IN AOI"]] + [ ["%s - # DATA PIXELS" % b.name, "%s - # DATA PIXELS AFTER PQA" % b.name, "%s - # DATA PIXELS AFTER PQA WOFS" % b.name, "%s - # DATA PIXELS AFTER PQA WOFS AOI" % b.name, "%s - MIN" % b.name, "%s - MAX" % b.name, "%s - MEAN" % b.name] for b in bands]) csv_writer.writerow(header) for tile in tiles: _log.info("Processing tile [%s]", tile.datasets[self.dataset_type].path) if self.list_only: continue if not metadata: metadata = get_dataset_metadata(tile.datasets[self.dataset_type]) # Apply PQA if specified pqa = None mask_pqa = None if self.mask_pqa_apply and DatasetType.PQ25 in tile.datasets: pqa = tile.datasets[DatasetType.PQ25] mask_pqa = get_mask_pqa(pqa, self.mask_pqa_mask) _log.debug("mask_pqa is [%s]\n[%s]", numpy.shape(mask_pqa), mask_pqa) # Apply WOFS if specified wofs = None mask_wofs = None if self.mask_wofs_apply and DatasetType.WATER in tile.datasets: wofs = tile.datasets[DatasetType.WATER] mask_wofs = get_mask_wofs(wofs, self.mask_wofs_mask) _log.debug("mask_wofs is [%s]\n[%s]", numpy.shape(mask_wofs), mask_wofs) data = get_dataset_data(tile.datasets[self.dataset_type], bands=bands) _log.debug("data is [%s]\n[%s]", numpy.shape(data), data) pixel_count_data = dict() pixel_count_data_pqa = dict() pixel_count_data_pqa_wofs = dict() pixel_count_data_pqa_wofs_aoi = dict() mmin = dict() mmax = dict() mmean = dict() for band in bands: data[band] = numpy.ma.masked_equal(data[band], NDV) _log.debug("masked data is [%s] [%d]\n[%s]", numpy.shape(data), numpy.ma.count(data), data) pixel_count_data[band] = numpy.ma.count(data[band]) if pqa: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_pqa) _log.debug("PQA masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa[band] = numpy.ma.count(data[band]) if wofs: data[band].mask = numpy.ma.mask_or(data[band].mask, mask_wofs) _log.debug("WOFS masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs[band] = numpy.ma.count(data[band]) data[band].mask = numpy.ma.mask_or(data[band].mask, mask_aoi) _log.debug("AOI masked data is [%s] [%d]\n[%s]", numpy.shape(data[band]), numpy.ma.count(data[band]), data[band]) pixel_count_data_pqa_wofs_aoi[band] = numpy.ma.count(data[band]) mmin[band] = numpy.ma.min(data[band]) mmax[band] = numpy.ma.max(data[band]) mmean[band] = numpy.ma.mean(data[band]) # Convert the mean to an int...which is actually trickier than you would expect due to masking.... if numpy.ma.count(mmean[band]) != 0: mmean[band] = mmean[band].astype(numpy.int16) # Should we output if no data values found? pixel_count_data_pqa_wofs_aoi_all_bands = reduce(operator.add, pixel_count_data_pqa_wofs_aoi.itervalues()) if pixel_count_data_pqa_wofs_aoi_all_bands == 0 and not self.output_no_data: _log.info("Skipping dataset with no non-masked data values in ANY band") continue row = reduce( operator.add, [[tile.end_datetime, self.decode_satellite_as_instrument(tile.datasets[self.dataset_type].satellite), pixel_count, pixel_count_aoi]] + [[pixel_count_data[band], pixel_count_data_pqa[band], pixel_count_data_pqa_wofs[band], pixel_count_data_pqa_wofs_aoi[band], mmin[band], mmax[band], mmean[band]] for band in bands]) csv_writer.writerow(row)