def test_fred(x=2, y=2, z=50, ndv=-999): print "Testing..." data = numpy.load("/tmp/random_{x}_{y}_{z}.npy".format(x=x, y=y, z=z)) # print numpy.shape(data), data # Calculate the percentiles stripping out no data values p = numpy.empty((y, x), dtype=numpy.int16) from itertools import product for y, x in product(range(0, y), range(0, x)): d = data[:, x, y] # print "data", x, y, numpy.shape(d), d # print "stripped data", numpy.shape(d[d != ndv]), d[d != ndv] p[x, y] = numpy.percentile(d[d != ndv], 75, axis=0, interpolation="nearest") # print "stripped percentiles", p[x, y] print "percentiles\n", p from datacube.api.utils import calculate_stack_statistic_percentile print "calculated percentiles\n", calculate_stack_statistic_percentile(data, 75) print data[:, 0, 0] print numpy.apply_along_axis(do_percentile_1d, axis=0, arr=data)
def run(self): _log.info("Calculating statistics for chunk") filtile = [] ndv = get_dataset_type_ndv(self.dataset_type) data_type = get_dataset_type_data_type(self.dataset_type) tiles = self.get_tiles() filtile = tiles if self.tidal_workflow: filtile = [] lines = self.load_filterfile() cnt = 0 _log.info("\tlength of original tiles is %d", len(tiles)) for tile in tiles: #import pdb; pdb.set_trace() cnt = cnt + 1 dataset = tile.datasets[self.dataset_type] tdate = str(tile.end_datetime.strftime("%Y-%m-%d")) if tdate in lines: filtile.append(tile) _log.info("\tlength of new filtered tiles is %d", len(filtile)) stack = get_dataset_data_stack(filtile, self.dataset_type, self.band.name, ndv=ndv, x=self.x_offset, y=self.y_offset, x_size=self.x_chunk_size, y_size=self.y_chunk_size, mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask) if len(stack) == 0: return # TODO get statistics to be generated from command line argument if Statistic.COUNT in self.statistics: #log_mem("Before COUNT") # COUNT stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat) del stack_stat if Statistic.MIN in self.statistics: log_mem("Before MIN") # MIN stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat) del stack_stat if Statistic.MAX in self.statistics: log_mem("Before MAX") # MAX stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat) del stack_stat if Statistic.MEAN in self.statistics: log_mem("Before MEAN") # MEAN stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat) del stack_stat if Statistic.MEDIAN in self.statistics: #log_mem("Before MEDIAN") # MEAN stack_stat = calculate_stack_statistic_median(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MEDIAN), stack_stat) del stack_stat if Statistic.VARIANCE in self.statistics: log_mem("Before VARIANCE") # VARIANCE stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat) del stack_stat if Statistic.STANDARD_DEVIATION in self.statistics: #log_mem("Before STANDARD_DEVIATION") # STANDARD_DEVIATION stack_stat = calculate_stack_statistic_standard_deviation( stack=stack, ndv=ndv, dtype=data_type) numpy.save( self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat) del stack_stat for percentile in PERCENTILE: if percentile in self.statistics: log_mem("Before {p}".format(p=percentile.name)) stack_stat = calculate_stack_statistic_percentile( stack=stack, percentile=PERCENTILE[percentile], ndv=ndv, interpolation=self.interpolation) numpy.save(self.get_statistic_filename(percentile), stack_stat) del stack_stat if Statistic.COUNT_OBSERVED in self.statistics: #log_mem("Before OBSERVED COUNT") # COUNT OBSERVED - note the copy=False is modifying the array so this is done last stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat) del stack_stat
def run(self): _log.info("Calculating statistics for chunk") filtile = [ ] ndv = get_dataset_type_ndv(self.dataset_type) data_type = get_dataset_type_data_type(self.dataset_type) tiles = self.get_tiles() filtile = tiles if self.tidal_workflow: filtile = [ ] lines = self.load_filterfile() cnt=0 _log.info("\tlength of original tiles is %d", len(tiles)) for tile in tiles: #import pdb; pdb.set_trace() cnt=cnt+1 dataset = tile.datasets[self.dataset_type] tdate= str(tile.end_datetime.strftime("%Y-%m-%d")) if tdate in lines: filtile.append(tile) _log.info("\tlength of new filtered tiles is %d", len(filtile)) stack = get_dataset_data_stack(filtile, self.dataset_type, self.band.name, ndv=ndv, x=self.x_offset, y=self.y_offset, x_size=self.x_chunk_size, y_size=self.y_chunk_size, mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask) if len(stack) == 0: return # TODO get statistics to be generated from command line argument if Statistic.COUNT in self.statistics: #log_mem("Before COUNT") # COUNT stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat) del stack_stat if Statistic.MIN in self.statistics: log_mem("Before MIN") # MIN stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat) del stack_stat if Statistic.MAX in self.statistics: log_mem("Before MAX") # MAX stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat) del stack_stat if Statistic.MEAN in self.statistics: log_mem("Before MEAN") # MEAN stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat) del stack_stat if Statistic.MEDIAN in self.statistics: #log_mem("Before MEDIAN") # MEAN stack_stat = calculate_stack_statistic_median(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MEDIAN), stack_stat) del stack_stat if Statistic.VARIANCE in self.statistics: log_mem("Before VARIANCE") # VARIANCE stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat) del stack_stat if Statistic.STANDARD_DEVIATION in self.statistics: #log_mem("Before STANDARD_DEVIATION") # STANDARD_DEVIATION stack_stat = calculate_stack_statistic_standard_deviation(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat) del stack_stat for percentile in PERCENTILE: if percentile in self.statistics: log_mem("Before {p}".format(p=percentile.name)) stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile], ndv=ndv, interpolation=self.interpolation) numpy.save(self.get_statistic_filename(percentile), stack_stat) del stack_stat if Statistic.COUNT_OBSERVED in self.statistics: #log_mem("Before OBSERVED COUNT") # COUNT OBSERVED - note the copy=False is modifying the array so this is done last stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat) del stack_stat
def existing_nan_percentile(data): print "existing nan percentile..." from datacube.api.utils import calculate_stack_statistic_percentile print "calculated percentiles\n", calculate_stack_statistic_percentile(data, [25, 50, 75])
def run(self): _log.info("Calculating statistics for chunk") ndv = get_dataset_type_ndv(self.dataset_type) acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season, seasons=SEASONS, extend=True) _log.info("\tcriteria is %s", criteria) dataset_types = [self.dataset_type] if self.mask_pqa_apply: dataset_types.append(DatasetType.PQ25) tiles = list_tiles_as_generator(x=[self.x], y=[self.y], satellites=self.satellites, acq_min=acq_min, acq_max=acq_max, dataset_types=dataset_types, include=criteria) stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv, x=self.x_offset, y=self.y_offset, x_size=self.x_chunk_size, y_size=self.y_chunk_size, mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask) if len(stack) == 0: return # TODO get statistics to be generated from command line argument if Statistic.COUNT in self.statistics: log_mem("Before COUNT") # COUNT print "COUNT" stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat) del stack_stat if Statistic.MIN in self.statistics: log_mem("Before MIN") # MIN print "MIN" stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat) del stack_stat if Statistic.MAX in self.statistics: log_mem("Before MAX") # MAX print "MAX" stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat) del stack_stat if Statistic.MEAN in self.statistics: log_mem("Before MEAN") # MEAN print "MEAN" stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat) del stack_stat for percentile in [Statistic.PERCENTILE_25, Statistic.PERCENTILE_50, Statistic.PERCENTILE_75, Statistic.PERCENTILE_90, Statistic.PERCENTILE_95]: if percentile in self.statistics: log_mem("Before {p}".format(p=percentile.name)) print "Before {p}".format(p=percentile.name) stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile], ndv=ndv) numpy.save(self.get_statistic_filename(percentile), stack_stat) del stack_stat if Statistic.COUNT_OBSERVED in self.statistics: log_mem("Before OBSERVED COUNT") # COUNT OBSERVED - note the copy=False is modifying the array so this is done last print "COUNT OBSERVED" stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat) del stack_stat log_mem("DONE")
def run(self): _log.info("Calculating statistics for chunk") ndv = get_dataset_type_ndv(self.dataset_type) data_type = get_dataset_type_data_type(self.dataset_type) tiles = self.get_tiles() stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv, x=self.x_offset, y=self.y_offset, x_size=self.x_chunk_size, y_size=self.y_chunk_size, mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask) if len(stack) == 0: return # TODO get statistics to be generated from command line argument if Statistic.COUNT in self.statistics: log_mem("Before COUNT") # COUNT stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat) del stack_stat if Statistic.MIN in self.statistics: log_mem("Before MIN") # MIN stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat) del stack_stat if Statistic.MAX in self.statistics: log_mem("Before MAX") # MAX stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat) del stack_stat if Statistic.MEAN in self.statistics: log_mem("Before MEAN") # MEAN stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat) del stack_stat if Statistic.VARIANCE in self.statistics: log_mem("Before VARIANCE") # VARIANCE stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat) del stack_stat if Statistic.STANDARD_DEVIATION in self.statistics: log_mem("Before STANDARD_DEVIATION") # STANDARD_DEVIATION stack_stat = calculate_stack_statistic_standard_deviation(stack=stack, ndv=ndv, dtype=data_type) numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat) del stack_stat for percentile in PERCENTILE: if percentile in self.statistics: log_mem("Before {p}".format(p=percentile.name)) stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile], ndv=ndv, interpolation=self.interpolation) numpy.save(self.get_statistic_filename(percentile), stack_stat) del stack_stat if Statistic.COUNT_OBSERVED in self.statistics: log_mem("Before OBSERVED COUNT") # COUNT OBSERVED - note the copy=False is modifying the array so this is done last stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv) numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat) del stack_stat log_mem("DONE")