def generate_animation(index, combined_data): base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * index for index in range((task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1)): path = os.path.join(task.get_temp_path(), "animation_{}.nc".format(base_index + index)) if os.path.exists(path): animated_data = xr.open_dataset(path) if task.animated_product.animation_id == "cumulative": animated_data = xr.concat([animated_data], 'time') animated_data['time'] = [0] clear_mask = task.satellite.get_clean_mask_func()( animated_data) animated_data = task.get_processing_method()( animated_data, clean_mask=clear_mask, intermediate_product=combined_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) path = os.path.join( task.get_temp_path(), "animation_{}.png".format(base_index + index)) write_png_from_xr(path, animated_data, bands=[ task.query_type.red, task.query_type.green, task.query_type.blue ], scale=task.satellite.get_scale(), no_data=task.satellite.no_data_value)
def perform_task_chunking(self, parameters, task_id=None): """Chunk parameter sets into more manageable sizes Uses functions provided by the task model to create a group of parameter sets that make up the arg. Args: parameters: parameter stream containing all kwargs to load data Returns: parameters with a list of geographic and time ranges """ if parameters is None: return None task = SpectralIndicesTask.objects.get(pk=task_id) if check_cancel_task(self, task): return dc = DataAccessApi(config=task.config_path) dates = dc.list_acquisition_dates(**parameters) task_chunk_sizing = task.get_chunk_size() geographic_chunks = create_geographic_chunks( longitude=parameters['longitude'], latitude=parameters['latitude'], geographic_chunk_size=task_chunk_sizing['geographic']) time_chunks = create_time_chunks( dates, _reversed=task.get_reverse_time(), time_chunk_size=task_chunk_sizing['time']) logger.info("Time chunks: {}, Geo chunks: {}".format(len(time_chunks), len(geographic_chunks))) dc.close() if check_cancel_task(self, task): return task.update_status("WAIT", "Chunked parameter set.") return {'parameters': parameters, 'geographic_chunks': geographic_chunks, 'time_chunks': time_chunks}
def recombine_time_chunks(chunks, task_id=None): """Recombine processed chunks over the time index. Open time chunked processed datasets and recombine them using the same function that was used to process them. This assumes an iterative algorithm - if it is not, then it will simply return the data again. Args: chunks: list of the return from the processing_task function - path, metadata, and {chunk ids} Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ logger.info("RECOMBINE_TIME") #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2.. chunks = chunks if isinstance(chunks, list) else [chunks] chunks = [chunk for chunk in chunks if chunk is not None] if len(chunks) == 0: return None total_chunks = sorted(chunks, key=lambda x: x[0]) task = SlipTask.objects.get(pk=task_id) geo_chunk_id = total_chunks[0][2]['geo_chunk_id'] time_chunk_id = total_chunks[0][2]['time_chunk_id'] metadata = {} combined_data = None combined_slip = None for index, chunk in enumerate(reversed(total_chunks)): metadata.update(chunk[1]) data = xr.open_dataset(chunk[0], autoclose=True) if combined_data is None: combined_data = data.drop('slip') # since this is going to interact with data/mosaicking, it needs a time dim combined_slip = xr.concat([data.slip.copy(deep=True)], 'time') continue #give time an indice to keep mosaicking from breaking. data = xr.concat([data], 'time') data['time'] = [0] clear_mask = task.satellite.get_clean_mask_func()(data) # modify clean mask so that only slip pixels that are still zero will be used. This will show all the pixels that caused the flag. clear_mask[xr.concat([combined_slip], 'time').values == 1] = False combined_data = create_mosaic( data.drop('slip'), clean_mask=clear_mask, intermediate_product=combined_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) combined_slip.values[combined_slip.values == 0] = data.slip.values[combined_slip.values == 0] # Since we added a time dim to combined_slip, we need to remove it here. combined_data['slip'] = combined_slip.isel(time=0, drop=True) path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id)) combined_data.to_netcdf(path) logger.info("Done combining time chunks for geo: " + str(geo_chunk_id)) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def recombine_time_chunks(chunks, task_id=None): """Recombine processed chunks over the time index. Open time chunked processed datasets and recombine them using the same function that was used to process them. This assumes an iterative algorithm - if it is not, then it will simply return the data again. Args: chunks: list of the return from the processing_task function - path, metadata, and {chunk ids} Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ logger.info("RECOMBINE_TIME") #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2.. chunks = chunks if isinstance(chunks, list) else [chunks] chunks = [chunk for chunk in chunks if chunk is not None] if len(chunks) == 0: return None total_chunks = sorted(chunks, key=lambda x: x[0]) task = FractionalCoverTask.objects.get(pk=task_id) geo_chunk_id = total_chunks[0][2]['geo_chunk_id'] time_chunk_id = total_chunks[0][2]['time_chunk_id'] metadata = {} combined_data = None for index, chunk in enumerate(total_chunks): metadata.update(chunk[1]) data = xr.open_dataset(chunk[0], autoclose=True) if combined_data is None: combined_data = data continue #give time an indice to keep mosaicking from breaking. data = xr.concat([data], 'time') data['time'] = [0] clear_mask = task.satellite.get_clean_mask_func()(data) combined_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=combined_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) if combined_data is None: return None path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id)) combined_data.to_netcdf(path) logger.info("Done combining time chunks for geo: " + str(geo_chunk_id)) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def create_output_products(data, task_id=None): """Create the final output products for this algorithm. Open the final dataset and metadata and generate all remaining metadata. Convert and write the dataset to variuos formats and register all values in the task model Update status and exit. Args: data: tuple in the format of processing_task function - path, metadata, and {chunk ids} """ logger.info("CREATE_OUTPUT") full_metadata = data[1] dataset = xr.open_dataset(data[0], autoclose=True) task = AppNameTask.objects.get(pk=task_id) # TODO: Add any paths that you've added in your models.py Result model and remove the ones that aren't there. task.result_path = os.path.join(task.get_result_path(), "png_mosaic.png") task.result_filled_path = os.path.join(task.get_result_path(), "filled_png_mosaic.png") task.data_path = os.path.join(task.get_result_path(), "data_tif.tif") task.data_netcdf_path = os.path.join(task.get_result_path(), "data_netcdf.nc") task.animation_path = os.path.join(task.get_result_path( ), "animation.gif") if task.animated_product.animation_id != 'none' else "" task.final_metadata_from_dataset(dataset) task.metadata_from_dict(full_metadata) # TODO: Set the bands that should be written to the final products bands = ['blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'cf_mask' ] if 'cf_mask' in dataset else [ 'blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'pixel_qa' ] # TODO: If you're creating pngs, specify the RGB bands png_bands = [ task.query_type.red, task.query_type.green, task.query_type.blue ] dataset.to_netcdf(task.data_netcdf_path) write_geotiff_from_xr(task.data_path, dataset.astype('int32'), bands=bands) write_png_from_xr(task.result_path, dataset, bands=png_bands, png_filled_path=task.result_filled_path, fill_color=task.query_type.fill, scale=(0, 4096)) # TODO: if there is no animation, remove this. Otherwise, open each time iteration slice and write to disk. if task.animated_product.animation_id != "none": with imageio.get_writer(task.animation_path, mode='I', duration=1.0) as writer: valid_range = reversed( range(len(full_metadata)) ) if task.animated_product.animation_id == "scene" and task.get_reverse_time( ) else range(len(full_metadata)) for index in valid_range: path = os.path.join(task.get_temp_path(), "animation_{}.png".format(index)) if os.path.exists(path): image = imageio.imread(path) writer.append_data(image) # TODO: if you're capturing more tabular metadata, plot it here by converting these to lists. # an example of this is the current water detection app. dates = list( map(lambda x: datetime.strptime(x, "%m/%d/%Y"), task._get_field_as_list('acquisition_list'))) if len(dates) > 1: task.plot_path = os.path.join(task.get_result_path(), "plot_path.png") create_2d_plot(task.plot_path, dates=dates, datasets=task._get_field_as_list( 'clean_pixel_percentages_per_acquisition'), data_labels="Clean Pixel Percentage (%)", titles="Clean Pixel Percentage Per Acquisition") logger.info("All products created.") # task.update_bounds_from_dataset(dataset) task.complete = True task.execution_end = datetime.now() task.update_status( "OK", "All products have been generated. Your result will be loaded on the map." ) shutil.rmtree(task.get_temp_path()) return True
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = SpectralIndicesTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) iteration_data = None for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_dataset_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None: logger.info("Empty chunk.") continue if 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=iteration_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) if check_cancel_task(self, task): return task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) if iteration_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(iteration_data, path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def create_output_products(data, task_id=None): """Create the final output products for this algorithm. Open the final dataset and metadata and generate all remaining metadata. Convert and write the dataset to variuos formats and register all values in the task model Update status and exit. Args: data: tuple in the format of processing_task function - path, metadata, and {chunk ids} """ logger.info("CREATE_OUTPUT") full_metadata = data[1] dataset = xr.open_dataset(data[0], autoclose=True) task = CustomMosaicToolTask.objects.get(pk=task_id) task.result_path = os.path.join(task.get_result_path(), "png_mosaic.png") task.result_filled_path = os.path.join(task.get_result_path(), "filled_png_mosaic.png") task.data_path = os.path.join(task.get_result_path(), "data_tif.tif") task.data_netcdf_path = os.path.join(task.get_result_path(), "data_netcdf.nc") task.animation_path = os.path.join(task.get_result_path(), "animation.gif") if task.animated_product.animation_id != 'none' else "" task.final_metadata_from_dataset(dataset) task.metadata_from_dict(full_metadata) bands = task.satellite.get_measurements() png_bands = [task.query_type.red, task.query_type.green, task.query_type.blue] dataset.to_netcdf(task.data_netcdf_path) write_geotiff_from_xr(task.data_path, dataset.astype('int32'), bands=bands, no_data=task.satellite.no_data_value) write_png_from_xr( task.result_path, dataset, bands=png_bands, png_filled_path=task.result_filled_path, fill_color=task.query_type.fill, scale=task.satellite.get_scale(), low_res=True, no_data=task.satellite.no_data_value) if task.animated_product.animation_id != "none": with imageio.get_writer(task.animation_path, mode='I', duration=1.0) as writer: valid_range = reversed( range(len(full_metadata))) if task.animated_product.animation_id == "scene" and task.get_reverse_time( ) else range(len(full_metadata)) for index in valid_range: path = os.path.join(task.get_temp_path(), "animation_{}.png".format(index)) if os.path.exists(path): image = imageio.imread(path) writer.append_data(image) dates = list(map(lambda x: datetime.strptime(x, "%m/%d/%Y"), task._get_field_as_list('acquisition_list'))) if len(dates) > 1: task.plot_path = os.path.join(task.get_result_path(), "plot_path.png") create_2d_plot( task.plot_path, dates=dates, datasets=task._get_field_as_list('clean_pixel_percentages_per_acquisition'), data_labels="Clean Pixel Percentage (%)", titles="Clean Pixel Percentage Per Acquisition") logger.info("All products created.") # task.update_bounds_from_dataset(dataset) task.complete = True task.execution_end = datetime.now() task.update_status("OK", "All products have been generated. Your result will be loaded on the map.") shutil.rmtree(task.get_temp_path()) return True
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = CustomMosaicToolTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) iteration_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()(data, clean_mask=clear_mask, intermediate_product=iteration_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) if task.animated_product.animation_id != "none": path = os.path.join(task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) if task.animated_product.animation_id == "scene": #need to clear out all the metadata.. clear_attrs(data) #can't reindex on time - weird? data.isel(time=0).drop('time').to_netcdf(path) elif task.animated_product.animation_id == "cumulative": iteration_data.to_netcdf(path) task.scenes_processed = F('scenes_processed') + 1 task.save() path = os.path.join(task.get_temp_path(), chunk_id + ".nc") if iteration_data is None: return None iteration_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def recombine_time_chunks(chunks, task_id=None): """Recombine processed chunks over the time index. Open time chunked processed datasets and recombine them using the same function that was used to process them. This assumes an iterative algorithm - if it is not, then it will simply return the data again. Args: chunks: list of the return from the processing_task function - path, metadata, and {chunk ids} Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ logger.info("RECOMBINE_TIME") #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2.. chunks = chunks if isinstance(chunks, list) else [chunks] chunks = [chunk for chunk in chunks if chunk is not None] if len(chunks) == 0: return None total_chunks = sorted(chunks, key=lambda x: x[0]) if isinstance( chunks, list) else [chunks] task = AppNameTask.objects.get(pk=task_id) geo_chunk_id = total_chunks[0][2]['geo_chunk_id'] time_chunk_id = total_chunks[0][2]['time_chunk_id'] metadata = {} #TODO: If there is no animation, remove this block. Otherwise, compute the data needed to create each frame. def generate_animation(index, combined_data): base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * index for index in range((task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1)): path = os.path.join(task.get_temp_path(), "animation_{}.nc".format(base_index + index)) if os.path.exists(path): animated_data = xr.open_dataset(path) if task.animated_product.animation_id == "cumulative": animated_data = xr.concat([animated_data], 'time') animated_data['time'] = [0] clear_mask = task.satellite.get_clean_mask_func()( animated_data) animated_data = task.get_processing_method()( animated_data, clean_mask=clear_mask, intermediate_product=combined_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) path = os.path.join( task.get_temp_path(), "animation_{}.png".format(base_index + index)) write_png_from_xr(path, animated_data, bands=[ task.query_type.red, task.query_type.green, task.query_type.blue ], scale=task.satellite.get_scale(), no_data=task.satellite.no_data_value) combined_data = None for index, chunk in enumerate(total_chunks): metadata.update(chunk[1]) data = xr.open_dataset(chunk[0]) if combined_data is None: # TODO: If there is no animation, remove this. if task.animated_product.animation_id != "none": generate_animation(index, combined_data) combined_data = data continue #give time an indice to keep mosaicking from breaking. data = xr.concat([data], 'time') data['time'] = [0] clear_mask = task.satellite.get_clean_mask_func()(data) combined_data = task.get_processing_method()( data, clean_mask=clear_mask, intermediate_product=combined_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) # if we're animating, combine it all and save to disk. # TODO: If there is no animation, remove this. if task.animated_product.animation_id != "none": generate_animation(index, combined_data) path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id)) export_xarray_to_netcdf(combined_data, path) logger.info("Done combining time chunks for geo: " + str(geo_chunk_id)) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Computes a single SLIP baseline comparison - returns a slip mask and mosaic. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = SlipTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) time_range = _get_datetime_range_containing(time_chunk[0], time_chunk[-1]) dc = DataAccessApi(config=task.config_path) updated_params = {**parameters} updated_params.update(geographic_chunk) updated_params.update({'time': time_range}) data = dc.get_dataset_by_extent(**updated_params) #grab dem data as well dem_parameters = {**updated_params} dem_parameters.update({'product': 'terra_aster_gdm_' + task.area_id, 'platform': 'TERRA'}) dem_parameters.pop('time') dem_parameters.pop('measurements') dem_data = dc.get_dataset_by_extent(**dem_parameters) if 'time' not in data or 'time' not in dem_data: return None #target data is most recent, with the baseline being everything else. target_data = xr.concat([data.isel(time=-1)], 'time') baseline_data = data.isel(time=slice(None, -1)) target_clear_mask = task.satellite.get_clean_mask_func()(target_data) baseline_clear_mask = task.satellite.get_clean_mask_func()(baseline_data) combined_baseline = task.get_processing_method()(baseline_data, clean_mask=baseline_clear_mask, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) target_data = create_mosaic( target_data, clean_mask=target_clear_mask, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) slip_data = compute_slip(combined_baseline, target_data, dem_data, no_data=task.satellite.no_data_value) target_data['slip'] = slip_data metadata = task.metadata_from_dataset( metadata, target_data, target_clear_mask, updated_params, time=data.time.values.astype('M8[ms]').tolist()[-1]) task.scenes_processed = F('scenes_processed') + 1 task.save() path = os.path.join(task.get_temp_path(), chunk_id + ".nc") clear_attrs(target_data) target_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}