def start_chunk_processing(self, chunk_details, task_id=None): """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks. The most efficient way to do this is to create a group of time chunks for each geographic chunk, recombine over the time index, then combine geographic last. If we create an animation, this needs to be reversed - e.g. group of geographic for each time, recombine over geographic, then recombine time last. The full processing pipeline is completed, then the create_output_products task is triggered, completing the task. """ if chunk_details is None: return None parameters = chunk_details.get('parameters') geographic_chunks = chunk_details.get('geographic_chunks') time_chunks = chunk_details.get('time_chunks') task = FractionalCoverTask.objects.get(pk=task_id) # Get an estimate of the amount of work to be done: the number of scenes # to process, also considering intermediate chunks to be combined. num_scenes = len(geographic_chunks) * sum([len(time_chunk) for time_chunk in time_chunks]) # recombine_time_chunks() and process_band_math() scenes: # num_scn_per_chk * len(time_chunks) * len(geographic_chunks) num_scn_per_chk = round(num_scenes / (len(time_chunks) * len(geographic_chunks))) # recombine_geographic_chunks() and create_output_products() scenes: # num_scn_per_chk_geo * len(geographic_chunks) num_scn_per_chk_geo = round(num_scenes / len(geographic_chunks)) # Scene processing progress is tracked in: processing_task(), recombine_time_chunks(), # and process_band_math(). Scenes in process_band_math() are counted twice # for the sake of tracking progress because it takes so long to run. So 1 + 1 + 2 = 4. task.total_scenes = 4 * num_scenes task.scenes_processed = 0 task.save(update_fields=['total_scenes', 'scenes_processed']) if check_cancel_task(self, task): return task.update_status("WAIT", "Starting processing.") logger.info("START_CHUNK_PROCESSING") processing_pipeline = (group([ group([ processing_task.s( task_id=task_id, geo_chunk_id=geo_index, time_chunk_id=time_index, geographic_chunk=geographic_chunk, time_chunk=time_chunk, **parameters) for time_index, time_chunk in enumerate(time_chunks) ]) | recombine_time_chunks.s(task_id=task_id, num_scn_per_chk=num_scn_per_chk) | process_band_math.s(task_id=task_id, num_scn_per_chk=2*num_scn_per_chk_geo) for geo_index, geographic_chunk in enumerate(geographic_chunks) ]) | recombine_geographic_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id) | task_clean_up.si(task_id=task_id, task_model='FractionalCoverTask')).apply_async() return True
def start_chunk_processing(self, chunk_details, task_id=None): """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks. The most efficient way to do this is to create a group of time chunks for each geographic chunk, recombine over the time index, then combine geographic last. If we create an animation, this needs to be reversed - e.g. group of geographic for each time, recombine over geographic, then recombine time last. The full processing pipeline is completed, then the create_output_products task is triggered, completing the task. """ if chunk_details is None: return None parameters = chunk_details.get('parameters') geographic_chunks = chunk_details.get('geographic_chunks') time_chunks = chunk_details.get('time_chunks') task = SpectralIndicesTask.objects.get(pk=task_id) # Track task progress. num_scenes = len(geographic_chunks) * sum( [len(time_chunk) for time_chunk in time_chunks]) # Scene processing progress is tracked in processing_task(). task.total_scenes = num_scenes task.scenes_processed = 0 task.save(update_fields=['total_scenes', 'scenes_processed']) if check_cancel_task(self, task): return task.update_status("WAIT", "Starting processing.") logger.info("START_CHUNK_PROCESSING") processing_pipeline = ( group([ group([ processing_task.s(task_id=task_id, geo_chunk_id=geo_index, time_chunk_id=time_index, geographic_chunk=geographic_chunk, time_chunk=time_chunk, **parameters) for time_index, time_chunk in enumerate(time_chunks) ]) | recombine_time_chunks.s(task_id=task_id) | process_band_math.s(task_id=task_id) for geo_index, geographic_chunk in enumerate(geographic_chunks) ]) | recombine_geographic_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id) | task_clean_up.si(task_id=task_id, task_model='SpectralIndicesTask')).apply_async() return True
def start_chunk_processing(self, chunk_details, task_id=None): """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks. The most efficient way to do this is to create a group of time chunks for each geographic chunk, recombine over the time index, then combine geographic last. If we create an animation, this needs to be reversed - e.g. group of geographic for each time, recombine over geographic, then recombine time last. The full processing pipeline is completed, then the create_output_products task is triggered, completing the task. """ if chunk_details is None: return None parameters = chunk_details.get('parameters') geographic_chunks = chunk_details.get('geographic_chunks') time_chunks = chunk_details.get('time_chunks') task = CoastalChangeTask.objects.get(pk=task_id) # This calculation does not account for time chunking because this app # does not support time chunking. num_times_fst_lst_yrs = len(time_chunks[0][0]) + len(time_chunks[0][1]) task.total_scenes = len(geographic_chunks) * len( time_chunks) * num_times_fst_lst_yrs task.scenes_processed = 0 task.save() if check_cancel_task(self, task): return task.update_status("WAIT", "Starting processing.") logger.info("START_CHUNK_PROCESSING") processing_pipeline = (group([ group([ processing_task.s( task_id=task_id, geo_chunk_id=geo_index, time_chunk_id=time_index, geographic_chunk=geographic_chunk, time_chunk=time_chunk, **parameters) for geo_index, geographic_chunk in enumerate(geographic_chunks) ]) | recombine_geographic_chunks.s(task_id=task_id) for time_index, time_chunk in enumerate(time_chunks) ]) | recombine_time_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id)\ | task_clean_up.si(task_id=task_id, task_model='CoastalChangeTask')).apply_async() return True
def start_chunk_processing(self, chunk_details, task_id=None): """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks. The most efficient way to do this is to create a group of time chunks for each geographic chunk, recombine over the time index, then combine geographic last. If we create an animation, this needs to be reversed - e.g. group of geographic for each time, recombine over geographic, then recombine time last. The full processing pipeline is completed, then the create_output_products task is triggered, completing the task. """ if chunk_details is None: return None parameters = chunk_details.get('parameters') geographic_chunks = chunk_details.get('geographic_chunks') time_chunks = chunk_details.get('time_chunks') assert len( time_chunks ) == 1, "There should only be one time chunk for NDVI anomaly operations." task = NdviAnomalyTask.objects.get(pk=task_id) task.total_scenes = len(geographic_chunks) * len(time_chunks) * ( task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else len(time_chunks[0])) task.scenes_processed = 0 if check_cancel_task(self, task): return task.update_status("WAIT", "Starting processing.") logger.info("START_CHUNK_PROCESSING") processing_pipeline = (group([ group([ processing_task.s( task_id=task_id, geo_chunk_id=geo_index, time_chunk_id=time_index, geographic_chunk=geographic_chunk, time_chunk=time_chunk, **parameters) for time_index, time_chunk in enumerate(time_chunks) ]) for geo_index, geographic_chunk in enumerate(geographic_chunks) ]) | recombine_geographic_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id) \ | task_clean_up.si(task_id=task_id, task_model='NdviAnomalyTask')).apply_async() return True
def start_chunk_processing(self, chunk_details, task_id=None): """Create a fully asyncrhonous processing pipeline from paramters and a list of chunks. The most efficient way to do this is to create a group of time chunks for each geographic chunk, recombine over the time index, then combine geographic last. If we create an animation, this needs to be reversed - e.g. group of geographic for each time, recombine over geographic, then recombine time last. The full processing pipeline is completed, then the create_output_products task is triggered, completing the task. """ if chunk_details is None: return None parameters = chunk_details.get('parameters') geographic_chunks = chunk_details.get('geographic_chunks') task = SpectralAnomalyTask.objects.get(pk=task_id) api = DataAccessApi(config=task.config_path) # Get an estimate of the amount of work to be done: the number of scenes # to process, also considering intermediate chunks to be combined. # Determine the number of scenes for the baseline and analysis extents. num_scenes = {} params_temp = parameters.copy() for composite_name in ['baseline', 'analysis']: num_scenes[composite_name] = 0 for geographic_chunk in geographic_chunks: params_temp.update(geographic_chunk) params_temp['measurements'] = [] # Use the corresponding time range for the baseline and analysis data. params_temp['time'] = \ params_temp['baseline_time' if composite_name == 'baseline' else 'analysis_time'] params_temp_clean = params_temp.copy() del params_temp_clean['baseline_time'], params_temp_clean['analysis_time'], \ params_temp_clean['composite_range'], params_temp_clean['change_range'] data = api.dc.load(**params_temp_clean) if 'time' in data.coords: num_scenes[composite_name] += len(data.time) # The number of scenes per geographic chunk for baseline and analysis extents. num_scn_per_chk_geo = { k: round(v / len(geographic_chunks)) for k, v in num_scenes.items() } # Scene processing progress is tracked in processing_task(). task.total_scenes = sum(num_scenes.values()) task.scenes_processed = 0 task.save(update_fields=['total_scenes', 'scenes_processed']) if check_cancel_task(self, task): return task.update_status("WAIT", "Starting processing.") processing_pipeline = (group([ processing_task.s( task_id=task_id, geo_chunk_id=geo_index, geographic_chunk=geographic_chunk, num_scn_per_chk=num_scn_per_chk_geo, **parameters) for geo_index, geographic_chunk in enumerate(geographic_chunks) ]) | recombine_geographic_chunks.s(task_id=task_id) | create_output_products.s(task_id=task_id) \ | task_clean_up.si(task_id=task_id, task_model='SpectralAnomalyTask')).apply_async() return True