Пример #1
0
def recombine_time_chunks(chunks, task_id=None):
    """Recombine processed chunks over the time index.

    Open time chunked processed datasets and recombine them using the same function
    that was used to process them. This assumes an iterative algorithm - if it is not, then it will
    simply return the data again.

    Args:
        chunks: list of the return from the processing_task function - path, metadata, and {chunk ids}

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids

    """
    logger.info("RECOMBINE_TIME")
    #sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
    chunks = chunks if isinstance(chunks, list) else [chunks]
    chunks = [chunk for chunk in chunks if chunk is not None]
    if len(chunks) == 0:
        return None

    total_chunks = sorted(chunks, key=lambda x: x[0])
    task = SlipTask.objects.get(pk=task_id)
    geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
    time_chunk_id = total_chunks[0][2]['time_chunk_id']
    metadata = {}

    combined_data = None
    combined_slip = None
    for index, chunk in enumerate(reversed(total_chunks)):
        metadata.update(chunk[1])
        data = xr.open_dataset(chunk[0], autoclose=True)
        if combined_data is None:
            combined_data = data.drop('slip')
            # since this is going to interact with data/mosaicking, it needs a time dim
            combined_slip = xr.concat([data.slip.copy(deep=True)], 'time')
            continue
        #give time an indice to keep mosaicking from breaking.
        data = xr.concat([data], 'time')
        data['time'] = [0]
        clear_mask = task.satellite.get_clean_mask_func()(data)
        # modify clean mask so that only slip pixels that are still zero will be used. This will show all the pixels that caused the flag.
        clear_mask[xr.concat([combined_slip], 'time').values == 1] = False
        combined_data = create_mosaic(
            data.drop('slip'),
            clean_mask=clear_mask,
            intermediate_product=combined_data,
            no_data=task.satellite.no_data_value,
            reverse_time=task.get_reverse_time())
        combined_slip.values[combined_slip.values == 0] = data.slip.values[combined_slip.values == 0]

    # Since we added a time dim to combined_slip, we need to remove it here.
    combined_data['slip'] = combined_slip.isel(time=0, drop=True)
    path = os.path.join(task.get_temp_path(), "recombined_time_{}.nc".format(geo_chunk_id))
    combined_data.to_netcdf(path)
    logger.info("Done combining time chunks for geo: " + str(geo_chunk_id))
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}
Пример #2
0
def mosaic(dataset):
    # The mask here is based on pixel_qa. It comes bundled in with most Landsat Products.
    if sys.argv[6] == 'LANDSAT_7':
        # The mask here is based on pixel_qa. It comes bundled in with most Landsat Products.
        clear_xarray = ls7_unpack_qa(
            dataset.pixel_qa, "clear")  # Boolean Xarray indicating landcover
        water_xarray = ls7_unpack_qa(
            dataset.pixel_qa, "water")  # Boolean Xarray indicating watercover
    elif sys.argv[6] == 'LANDSAT_8':
        clear_xarray = ls8_unpack_qa(
            dataset.pixel_qa, "clear")  # Boolean Xarray indicating landcover
        water_xarray = ls8_unpack_qa(
            dataset.pixel_qa, "water")  # Boolean Xarray indicating watercover
    elif sys.argv[6] == 'LANDSAT_5':
        clear_xarray = ls5_unpack_qa(
            dataset.pixel_qa, "clear")  # Boolean Xarray indicating landcover
        water_xarray = ls5_unpack_qa(
            dataset.pixel_qa, "water")  # Boolean Xarray indicating watercover

    cloud_free_boolean_mask = np.logical_or(clear_xarray, water_xarray)

    return create_mosaic(dataset, clean_mask=cloud_free_boolean_mask)
Пример #3
0
def processing_task(task_id=None,
                    geo_chunk_id=None,
                    time_chunk_id=None,
                    geographic_chunk=None,
                    time_chunk=None,
                    **parameters):
    """Process a parameter set and save the results to disk.

    Uses the geographic and time chunk id to identify output products.
    **params is updated with time and geographic ranges then used to load data.
    the task model holds the iterative property that signifies whether the algorithm
    is iterative or if all data needs to be loaded at once.

    Computes a single SLIP baseline comparison - returns a slip mask and mosaic.

    Args:
        task_id, geo_chunk_id, time_chunk_id: identification for the main task and what chunk this is processing
        geographic_chunk: range of latitude and longitude to load - dict with keys latitude, longitude
        time_chunk: list of acquisition dates
        parameters: all required kwargs to load data.

    Returns:
        path to the output product, metadata dict, and a dict containing the geo/time ids
    """

    chunk_id = "_".join([str(geo_chunk_id), str(time_chunk_id)])
    task = SlipTask.objects.get(pk=task_id)

    logger.info("Starting chunk: " + chunk_id)
    if not os.path.exists(task.get_temp_path()):
        return None

    metadata = {}

    def _get_datetime_range_containing(*time_ranges):
        return (min(time_ranges) - timedelta(microseconds=1), max(time_ranges) + timedelta(microseconds=1))

    time_range = _get_datetime_range_containing(time_chunk[0], time_chunk[-1])

    dc = DataAccessApi(config=task.config_path)
    updated_params = {**parameters}
    updated_params.update(geographic_chunk)
    updated_params.update({'time': time_range})
    data = dc.get_dataset_by_extent(**updated_params)

    #grab dem data as well
    dem_parameters = {**updated_params}
    dem_parameters.update({'product': 'terra_aster_gdm_' + task.area_id, 'platform': 'TERRA'})
    dem_parameters.pop('time')
    dem_parameters.pop('measurements')
    dem_data = dc.get_dataset_by_extent(**dem_parameters)

    if 'time' not in data or 'time' not in dem_data:
        return None

    #target data is most recent, with the baseline being everything else.
    target_data = xr.concat([data.isel(time=-1)], 'time')
    baseline_data = data.isel(time=slice(None, -1))

    target_clear_mask = task.satellite.get_clean_mask_func()(target_data)
    baseline_clear_mask = task.satellite.get_clean_mask_func()(baseline_data)
    combined_baseline = task.get_processing_method()(baseline_data,
                                                     clean_mask=baseline_clear_mask,
                                                     no_data=task.satellite.no_data_value,
                                                     reverse_time=task.get_reverse_time())

    target_data = create_mosaic(
        target_data,
        clean_mask=target_clear_mask,
        no_data=task.satellite.no_data_value,
        reverse_time=task.get_reverse_time())

    slip_data = compute_slip(combined_baseline, target_data, dem_data, no_data=task.satellite.no_data_value)
    target_data['slip'] = slip_data

    metadata = task.metadata_from_dataset(
        metadata, target_data, target_clear_mask, updated_params, time=data.time.values.astype('M8[ms]').tolist()[-1])

    task.scenes_processed = F('scenes_processed') + 1
    task.save()

    path = os.path.join(task.get_temp_path(), chunk_id + ".nc")
    clear_attrs(target_data)
    target_data.to_netcdf(path)
    dc.close()
    logger.info("Done with chunk: " + chunk_id)
    return path, metadata, {'geo_chunk_id': geo_chunk_id, 'time_chunk_id': time_chunk_id}