def recombine_geographic_chunks(chunks, task_id=None): """Recombine processed data over the geographic indices For each geographic chunk process spawned by the main task, open the resulting dataset and combine it into a single dataset. Combine metadata as well, writing to disk. Args: chunks: list of the return from the processing_task function - path, metadata, and {chunk ids} Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ logger.info("RECOMBINE_GEO") total_chunks = [chunks] if not isinstance(chunks, list) else chunks total_chunks = [chunk for chunk in total_chunks if chunk is not None] geo_chunk_id = total_chunks[0][2]['geo_chunk_id'] time_chunk_id = total_chunks[0][2]['time_chunk_id'] metadata = {} task = TsmTask.objects.get(pk=task_id) chunk_data = [] for index, chunk in enumerate(total_chunks): metadata = task.combine_metadata(metadata, chunk[1]) chunk_data.append(xr.open_dataset(chunk[0], autoclose=True)) combined_data = combine_geographic_chunks(chunk_data) if task.animated_product.animation_id != "none": base_index = (task.get_chunk_size()['time'] if task.get_chunk_size() ['time'] is not None else 1) * time_chunk_id for index in range((task.get_chunk_size()['time'] if task.get_chunk_size()['time'] is not None else 1)): animated_data = [] for chunk in total_chunks: geo_chunk_index = chunk[2]['geo_chunk_id'] # if we're animating, combine it all and save to disk. path = os.path.join( task.get_temp_path(), "animation_{}_{}.nc".format(str(geo_chunk_index), str(base_index + index))) if os.path.exists(path): animated_data.append(xr.open_dataset(path, autoclose=True)) path = os.path.join(task.get_temp_path(), "animation_{}.nc".format(base_index + index)) if len(animated_data) > 0: combine_geographic_chunks(animated_data).to_netcdf(path) path = os.path.join(task.get_temp_path(), "recombined_geo_{}.nc".format(time_chunk_id)) combined_data.to_netcdf(path) logger.info("Done combining geographic chunks for time: " + str(time_chunk_id)) return path, metadata, { 'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id }
def recombine_geographic_chunks(chunks, task_id=None): """Recombine processed data over the geographic indices For each geographic chunk process spawned by the main task, open the resulting dataset and combine it into a single dataset. Combine metadata as well, writing to disk. Args: chunks: list of the return from the processing_task function - path, metadata, and {chunk ids} Returns: path to the output product, metadata dict, and a dict containing the geo/time ids """ logger.info("RECOMBINE_GEO") total_chunks = [chunks] if not isinstance(chunks, list) else chunks total_chunks = [chunk for chunk in total_chunks if chunk is not None] geo_chunk_id = total_chunks[0][2]['geo_chunk_id'] time_chunk_id = total_chunks[0][2]['time_chunk_id'] metadata = {} task = SlipTask.objects.get(pk=task_id) chunk_data = [] for index, chunk in enumerate(total_chunks): metadata = task.combine_metadata(metadata, chunk[1]) chunk_data.append(xr.open_dataset(chunk[0], autoclose=True)) combined_data = combine_geographic_chunks(chunk_data) path = os.path.join(task.get_temp_path(), "recombined_geo_{}.nc".format(time_chunk_id)) combined_data.to_netcdf(path) logger.info("Done combining geographic chunks for time: " + str(time_chunk_id)) return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}