def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = SpectralIndicesTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) iteration_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_dataset_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = create_cfmask_clean_mask(data.cf_mask) if 'cf_mask' in data else create_bit_mask(data.pixel_qa, [1, 2]) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()(data, clean_mask=clear_mask, intermediate_product=iteration_data) task.scenes_processed = F('scenes_processed') + 1 task.save() if iteration_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") iteration_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = NdviAnomalyTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) base_scene_time_range = parameters['time'] dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) # Generate the baseline data - one time slice at a time full_dataset = [] for time_index, time in enumerate(time_chunk): updated_params.update({'time': _get_datetime_range_containing(time)}) data = dc.get_dataset_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None or 'time' not in data: logger.info("Invalid chunk.") continue full_dataset.append(data.copy(deep=True)) # load selected scene and mosaic just in case we got two scenes (handles scene boundaries/overlapping data) updated_params.update({'time': base_scene_time_range}) selected_scene = dc.get_dataset_by_extent(**updated_params) if check_cancel_task(self, task): return if len(full_dataset) == 0 or 'time' not in selected_scene: return None #concat individual slices over time, compute metadata + mosaic baseline_data = xr.concat(full_dataset, 'time') baseline_clear_mask = task.satellite.get_clean_mask_func()(baseline_data) metadata = task.metadata_from_dataset(metadata, baseline_data, baseline_clear_mask, parameters) selected_scene_clear_mask = task.satellite.get_clean_mask_func()( selected_scene) metadata = task.metadata_from_dataset(metadata, selected_scene, selected_scene_clear_mask, parameters) selected_scene = task.get_processing_method()( selected_scene, clean_mask=selected_scene_clear_mask, intermediate_product=None, no_data=task.satellite.no_data_value) # we need to re generate the clear mask using the mosaic now. selected_scene_clear_mask = task.satellite.get_clean_mask_func()( selected_scene) if check_cancel_task(self, task): return ndvi_products = compute_ndvi_anomaly( baseline_data, selected_scene, baseline_clear_mask=baseline_clear_mask, selected_scene_clear_mask=selected_scene_clear_mask, no_data=task.satellite.no_data_value) full_product = xr.merge([ndvi_products, selected_scene]) task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) path = os.path.join(task.get_temp_path(), chunk_id + ".nc") full_product.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = CloudCoverageTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None cloud_cover = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_dataset_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) if check_cancel_task(self, task): return mosaic, cloud_coverage = task.get_processing_method() iteration_data = mosaic(data, clean_mask=clear_mask, intermediate_product=iteration_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) cloud_cover = cloud_coverage(data, clean_mask=clear_mask, intermediate_product=cloud_cover, no_data=task.satellite.no_data_value) if check_cancel_task(self, task): return task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) if iteration_data is None: return None full_product = xr.merge([iteration_data, cloud_cover]) path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(full_product, path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = CustomMosaicToolTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) iteration_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) add_timestamp_data_to_xr(data) metadata = task.metadata_from_dataset(metadata, data, clear_mask, updated_params) iteration_data = task.get_processing_method()(data, clean_mask=clear_mask, intermediate_product=iteration_data, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) if task.animated_product.animation_id != "none": path = os.path.join(task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) if task.animated_product.animation_id == "scene": #need to clear out all the metadata.. clear_attrs(data) #can't reindex on time - weird? data.isel(time=0).drop('time').to_netcdf(path) elif task.animated_product.animation_id == "cumulative": iteration_data.to_netcdf(path) task.scenes_processed = F('scenes_processed') + 1 task.save() path = os.path.join(task.get_temp_path(), chunk_id + ".nc") if iteration_data is None: return None iteration_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = TsmTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative( ) else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) #updated_params.update({'products': parameters['']}) water_analysis = None tsm_analysis = None combined_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) wofs_data = task.get_processing_method()( data, clean_mask=clear_mask, enforce_float64=True, no_data=task.satellite.no_data_value) water_analysis = perform_timeseries_analysis( wofs_data, 'wofs', intermediate_product=water_analysis, no_data=task.satellite.no_data_value) clear_mask[(data.swir2.values > 100) | (wofs_data.wofs.values == 0)] = False tsm_data = tsm(data, clean_mask=clear_mask, no_data=task.satellite.no_data_value) tsm_analysis = perform_timeseries_analysis( tsm_data, 'tsm', intermediate_product=tsm_analysis, no_data=task.satellite.no_data_value) combined_data = tsm_analysis combined_data['wofs'] = water_analysis.total_data combined_data['wofs_total_clean'] = water_analysis.total_clean metadata = task.metadata_from_dataset(metadata, tsm_data, clear_mask, updated_params) if task.animated_product.animation_id != "none": path = os.path.join( task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) animated_data = tsm_data.isel( time=0, drop=True ) if task.animated_product.animation_id == "scene" else combined_data animated_data.to_netcdf(path) task.scenes_processed = F('scenes_processed') + 1 task.save() if combined_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") combined_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Computes a single SLIP baseline comparison - returns a slip mask and mosaic. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = SlipTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) time_range = _get_datetime_range_containing(time_chunk[0], time_chunk[-1]) dc = DataAccessApi(config=task.config_path) updated_params = {**parameters} updated_params.update(geographic_chunk) updated_params.update({'time': time_range}) data = dc.get_dataset_by_extent(**updated_params) #grab dem data as well dem_parameters = {**updated_params} dem_parameters.update({'product': 'terra_aster_gdm_' + task.area_id, 'platform': 'TERRA'}) dem_parameters.pop('time') dem_parameters.pop('measurements') dem_data = dc.get_dataset_by_extent(**dem_parameters) if 'time' not in data or 'time' not in dem_data: return None #target data is most recent, with the baseline being everything else. target_data = xr.concat([data.isel(time=-1)], 'time') baseline_data = data.isel(time=slice(None, -1)) target_clear_mask = task.satellite.get_clean_mask_func()(target_data) baseline_clear_mask = task.satellite.get_clean_mask_func()(baseline_data) combined_baseline = task.get_processing_method()(baseline_data, clean_mask=baseline_clear_mask, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) target_data = create_mosaic( target_data, clean_mask=target_clear_mask, no_data=task.satellite.no_data_value, reverse_time=task.get_reverse_time()) slip_data = compute_slip(combined_baseline, target_data, dem_data, no_data=task.satellite.no_data_value) target_data['slip'] = slip_data metadata = task.metadata_from_dataset( metadata, target_data, target_clear_mask, updated_params, time=data.time.values.astype('M8[ms]').tolist()[-1]) task.scenes_processed = F('scenes_processed') + 1 task.save() path = os.path.join(task.get_temp_path(), chunk_id + ".nc") clear_attrs(target_data) target_data.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def processing_task(task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = CoastalChangeTask.objects.get(pk=task_id) logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None iteration_data = None def _get_datetime_range_containing(*time_ranges): return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1)) starting_year = _get_datetime_range_containing(*time_chunk[0]) comparison_year = _get_datetime_range_containing(*time_chunk[1]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) def _compute_mosaic(time): updated_params.update({'time': time}) data = dc.get_dataset_by_extent(**updated_params) if data is None or 'time' not in data: logger.info("Invalid chunk.") return None, None clear_mask = task.satellite.get_clean_mask_func()(data) metadata = task.metadata_from_dataset({}, data, clear_mask, updated_params) return task.get_processing_method()( data, clean_mask=clear_mask, no_data=task.satellite.no_data_value), metadata old_mosaic, old_metadata = _compute_mosaic(starting_year) new_mosaic, new_metadata = _compute_mosaic(comparison_year) if old_mosaic is None or new_mosaic is None: return None metadata = {**old_metadata, **new_metadata} output_product = compute_coastal_change( old_mosaic, new_mosaic, no_data=task.satellite.no_data_value) task.scenes_processed = F('scenes_processed') + 1 task.save() path = os.path.join(task.get_temp_path(), chunk_id + ".nc") output_product.to_netcdf(path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def processing_task(self, task_id=None, geo_chunk_id=None, time_chunk_id=None, geographic_chunk=None, time_chunk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude time_chunk: list of acquisition dates parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)]) task = TsmTask.objects.get(pk=task_id) if check_cancel_task(self, task): return logger.info("Starting chunk: " + chunk_id) if not os.path.exists(task.get_temp_path()): return None metadata = {} times = list( map(_get_datetime_range_containing, time_chunk) if task.get_iterative() else [_get_datetime_range_containing(time_chunk[0], time_chunk[-1])]) dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) water_analysis = None tsm_analysis = None combined_data = None base_index = (task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1) * time_chunk_id for time_index, time in enumerate(times): updated_params.update({'time': time}) data = dc.get_stacked_datasets_by_extent(**updated_params) if check_cancel_task(self, task): return if data is None: logger.info("Empty chunk.") continue if 'time' not in data: logger.info("Invalid chunk.") continue clear_mask = task.satellite.get_clean_mask_func()(data) # Ensure data variables have the range of Landsat 7 Collection 1 Level 2 # since the color scales are tailored for that dataset. platform = task.satellite.platform collection = task.satellite.collection level = task.satellite.level if (platform, collection) != ('LANDSAT_7', 'c1'): data = \ convert_range(data, from_platform=platform, from_collection=collection, from_level=level, to_platform='LANDSAT_7', to_collection='c1', to_level='l2') wofs_data = task.get_processing_method()(data, clean_mask=clear_mask, no_data=task.satellite.no_data_value) water_analysis = perform_timeseries_analysis( wofs_data, 'wofs', intermediate_product=water_analysis, no_data=task.satellite.no_data_value) # clear_mask.data[(data.swir2.values > 100) | (wofs_data.wofs.values == 0)] = False tsm_data = tsm(data, clean_mask=clear_mask, no_data=task.satellite.no_data_value) tsm_analysis = perform_timeseries_analysis( tsm_data, 'tsm', intermediate_product=tsm_analysis, no_data=task.satellite.no_data_value) if check_cancel_task(self, task): return combined_data = tsm_analysis combined_data['wofs'] = water_analysis.total_data combined_data['wofs_total_clean'] = water_analysis.total_clean metadata = task.metadata_from_dataset(metadata, tsm_data, clear_mask, updated_params) if task.animated_product.animation_id != "none": path = os.path.join(task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_id), str(base_index + time_index))) animated_data = tsm_data.isel( time=0, drop=True) if task.animated_product.animation_id == "scene" else combined_data export_xarray_to_netcdf(animated_data, path) task.scenes_processed = F('scenes_processed') + 1 task.save(update_fields=['scenes_processed']) if combined_data is None: return None path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(combined_data, path) dc.close() logger.info("Done with chunk: " + chunk_id) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
def processing_task(self, task_id=None, geo_chunk_id=None, geographic_chunk=None, num_scn_per_chk=None, **parameters): """Process a parameter set and save the results to disk. Uses the geographic and time chunk id to identify output products. **params is updated with time and geographic ranges then used to load data. the task model holds the iterative property that signifies whether the algorithm is iterative or if all data needs to be loaded at once. Args: task_id, geo_chunk_id: identification for the main task and what chunk this is processing geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude num_scn_per_chk: A dictionary of the number of scenes per chunk for the baseline and analysis extents. Used to determine task progress. parameters: all required kwargs to load data. Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ chunk_id = str(geo_chunk_id) task = SpectralAnomalyTask.objects.get(pk=task_id) if check_cancel_task(self, task): return if not os.path.exists(task.get_temp_path()): return None metadata = {} # For both the baseline and analysis time ranges for this # geographic chunk, load, calculate the spectral index, composite, # and filter the data according to user-supplied parameters - # recording where the data was out of the filter's range so we can # create the output product (an image). dc = DataAccessApi(config=task.config_path) updated_params = parameters updated_params.update(geographic_chunk) spectral_index = task.query_type.result_id composites = {} composites_out_of_range = {} no_data_value = task.satellite.no_data_value for composite_name in ['baseline', 'analysis']: if check_cancel_task(self, task): return # Use the corresponding time range for the baseline and analysis data. updated_params['time'] = \ updated_params['baseline_time' if composite_name == 'baseline' else 'analysis_time'] time_column_data = dc.get_dataset_by_extent(**updated_params) # If this geographic chunk is outside the data extents, return None. if len(time_column_data.dims) == 0: return None # Obtain the clean mask for the satellite. time_column_clean_mask = task.satellite.get_clean_mask_func()( time_column_data) measurements_list = task.satellite.measurements.replace(" ", "").split(",") # Obtain the mask for valid Landsat values. time_column_invalid_mask = landsat_clean_mask_invalid(\ time_column_data, platform=task.satellite.platform, collection=task.satellite.collection, level=task.satellite.level).values # Also exclude data points with the no_data value. no_data_mask = time_column_data[ measurements_list[0]].values != no_data_value # Combine the clean masks. time_column_clean_mask = time_column_clean_mask | time_column_invalid_mask | no_data_mask # Obtain the composite. composite = task.get_processing_method()( time_column_data, clean_mask=time_column_clean_mask, no_data=task.satellite.no_data_value) # Obtain the mask for valid Landsat values. composite_invalid_mask = landsat_clean_mask_invalid(\ composite, platform=task.satellite.platform, collection=task.satellite.collection, level=task.satellite.level).values # Also exclude data points with the no_data value via the compositing mask. composite_no_data_mask = composite[ measurements_list[0]].values != no_data_value composite_clean_mask = composite_invalid_mask | composite_no_data_mask # Compute the spectral index for the composite. spec_ind_params = dict() if spectral_index == 'fractional_cover': spec_ind_params = dict(clean_mask=composite_clean_mask, no_data=no_data_value) spec_ind_result = spectral_indices_function_map[spectral_index]( composite, **spec_ind_params) if spectral_index in ['ndvi', 'ndbi', 'ndwi', 'evi']: composite[spectral_index] = spec_ind_result else: # Fractional Cover composite = xr.merge([composite, spec_ind_result]) # Fractional Cover is supposed to have a range of [0, 100], with its bands - # 'bs', 'pv', and 'npv' - summing to 100. However, the function we use # can have the sum of those bands as high as 106. # frac_cov_min, frac_cov_max = spectral_indices_range_map[spectral_index] frac_cov_min, frac_cov_max = 0, 106 for band in ['bs', 'pv', 'npv']: composite[band].values = \ np.interp(composite[band].values, (frac_cov_min, frac_cov_max), spectral_indices_range_map[spectral_index]) composites[composite_name] = composite # Determine where the composite is out of range. # We rename the resulting xarray.DataArray because calling to_netcdf() # on it at the end of this function will save it as a Dataset # with one data variable with the same name as the DataArray. if spectral_index in ['ndvi', 'ndbi', 'ndwi', 'evi']: composites_out_of_range[composite_name] = \ xr_or(composite[spectral_index] < task.composite_threshold_min, task.composite_threshold_max < composite[spectral_index]).rename(spectral_index) else: # Fractional Cover # For fractional cover, a composite pixel is out of range if any of its # fractional cover bands are out of range. composites_out_of_range[composite_name] = xr_or( xr_or( xr_or(composite['bs'] < task.composite_threshold_min, task.composite_threshold_max < composite['bs']), xr_or(composite['pv'] < task.composite_threshold_min, task.composite_threshold_max < composite['pv'])), xr_or(composite['npv'] < task.composite_threshold_min, task.composite_threshold_max < composite['npv'])).rename(spectral_index) # Update the metadata with the current data (baseline or analysis). metadata = task.metadata_from_dataset(metadata, time_column_data, time_column_clean_mask, parameters) # Record task progress (baseline or analysis composite data obtained). task.scenes_processed = F( 'scenes_processed') + num_scn_per_chk[composite_name] task.save(update_fields=['scenes_processed']) dc.close() if check_cancel_task(self, task): return # Create a difference composite. diff_composite = composites['analysis'] - composites['baseline'] # Find where either the baseline or analysis composite was out of range for a pixel. composite_out_of_range = xr_or(*composites_out_of_range.values()) # Find where either the baseline or analysis composite was no_data. if spectral_index in ['ndvi', 'ndbi', 'ndwi', 'evi']: composite_no_data = xr_or( composites['baseline'][measurements_list[0]] == no_data_value, composites['analysis'][measurements_list[0]] == no_data_value) if spectral_index == 'evi': # EVI returns no_data for values outside [-1,1]. composite_no_data = xr_or( composite_no_data, xr_or(composites['baseline'][spectral_index] == no_data_value, composites['analysis'][spectral_index] == no_data_value)) else: # Fractional Cover composite_no_data = xr_or( xr_or( xr_or(composites['baseline']['bs'] == no_data_value, composites['baseline']['pv'] == no_data_value), composites['baseline']['npv'] == no_data_value), xr_or( xr_or(composites['baseline']['bs'] == no_data_value, composites['baseline']['pv'] == no_data_value), composites['baseline']['npv'] == no_data_value)) composite_no_data = composite_no_data.rename(spectral_index) # Drop unneeded data variables. diff_composite = diff_composite.drop(measurements_list) if check_cancel_task(self, task): return composite_path = os.path.join(task.get_temp_path(), chunk_id + ".nc") export_xarray_to_netcdf(diff_composite, composite_path) composite_out_of_range_path = os.path.join(task.get_temp_path(), chunk_id + "_out_of_range.nc") logger.info("composite_out_of_range:" + str(composite_out_of_range)) export_xarray_to_netcdf(composite_out_of_range, composite_out_of_range_path) composite_no_data_path = os.path.join(task.get_temp_path(), chunk_id + "_no_data.nc") export_xarray_to_netcdf(composite_no_data, composite_no_data_path) return composite_path, composite_out_of_range_path, composite_no_data_path, \ metadata, {'geo_chunk_id': geo_chunk_id}