Exemplo n.º 1
0
def load_process_save_chunk(output_files: OutputDriver,
                            chunk: Tuple[slice, slice, slice], task: StatsTask,
                            timer: MultiTimer):
    try:
        with timer.time('loading_data'):
            geom = geometry_for_task(task)
            data = load_data(chunk, task.sources, geom=geom)

        last_idx = len(task.output_products) - 1
        for idx, (prod_name, stat) in enumerate(task.output_products.items()):
            _LOG.debug(
                "Computing %s in tile %s %s; %s", prod_name, task.spatial_id,
                "({})".format(", ".join(prettier_slice(c) for c in chunk)),
                timer)

            measurements = stat.data_measurements

            with timer.time(prod_name):
                result = stat.compute(data)

                if idx == last_idx:  # make sure input data is released early
                    del data

                # restore nodata values back
                result = cast_back(result, measurements)

            # For each of the data variables, shove this chunk into the output results
            with timer.time('writing_data'):
                output_files.write_chunk(prod_name, chunk, result)

    except EmptyChunkException:
        _LOG.debug(
            'Error: No data returned while loading %s for %s. May have all been masked',
            chunk, task)
Exemplo n.º 2
0
def main(index, stats_config_file, qsub, runner, save_tasks, load_tasks,
         tile_index, tile_index_file, output_location, year, task_slice,
         batch):

    try:
        _log_setup()

        if qsub is not None and batch is not None:
            for i in range(batch):
                child = qsub.clone()
                child.reset_internal_args()
                child.add_internal_args('--task-slice',
                                        '{}::{}'.format(i, batch))
                click.echo(repr(child))
                exit_code, _ = child(auto=True, auto_clean=[('--batch', 1)])
                if exit_code != 0:
                    return exit_code
            return 0

        elif qsub is not None:
            # TODO: verify config before calling qsub submit
            click.echo(repr(qsub))
            exit_code, _ = qsub(auto=True)
            return exit_code

        timer = MultiTimer().start('main')

        config = normalize_config(read_config(stats_config_file), tile_index,
                                  tile_index_file, year, output_location)

        app = StatsApp(config, index)
        app.log_config()

        if save_tasks is not None:
            app.save_tasks_to_file(save_tasks, index)
            failed = 0
        else:
            if load_tasks is not None:
                tasks = unpickle_stream(load_tasks)
            else:
                tasks = app.generate_tasks(index)

            successful, failed = app.run_tasks(tasks, runner, task_slice)

        timer.pause('main')
        _LOG.info('Stats processing completed in %s seconds.',
                  timer.run_times['main'])

        if failed > 0:
            raise click.ClickException(
                '%s of %s tasks not completed successfully.' %
                (failed, successful + failed))

    except Exception as e:
        _LOG.error(e)
        sys.exit(1)

    return 0
Exemplo n.º 3
0
def execute_task(task: StatsTask, output_driver, chunking) -> StatsTask:
    """
    Load data, run the statistical operations and write results out to the filesystem.

    :param datacube_stats.models.StatsTask task:
    :type output_driver: OutputDriver
    :param chunking: dict of dimension sizes to chunk the computation by
    """
    timer = MultiTimer().start('total')

    process_chunk = load_process_save_chunk_iteratively if task.is_iterative else load_process_save_chunk

    try:
        with output_driver(task=task) as output_files:
            # currently for polygons process will load entirely
            if len(chunking) == 0:
                chunking = {
                    'x': task.sample_tile.shape[2],
                    'y': task.sample_tile.shape[1]
                }
            for sub_tile_slice in tile_iter(task.sample_tile, chunking):
                process_chunk(output_files, sub_tile_slice, task, timer)
    except OutputFileAlreadyExists as e:
        _LOG.warning(str(e))
    except OutputDriverResult as e:
        # was run interactively
        # re-raise result to be caught again by StatsApp.execute_task
        raise e
    except Exception as e:
        _LOG.error("Error processing task: %s", task)
        raise StatsProcessingException("Error processing task: %s" % task)

    timer.pause('total')
    _LOG.debug('Completed %s %s task with %s data sources; %s',
               task.spatial_id,
               [d.strftime('%Y-%m-%d') for d in task.time_period],
               task.data_sources_length(), timer)
    return task
Exemplo n.º 4
0
def load_process_save_chunk_iteratively(output_files: OutputDriver,
                                        chunk: Tuple[slice, slice, slice],
                                        task: StatsTask, timer: MultiTimer):
    procs = [(stat.make_iterative_proc(), name, stat)
             for name, stat in task.output_products.items()]

    def update(ds):
        for proc, name, _ in procs:
            with timer.time(name):
                proc(ds)

    def save(name, ds):
        for var_name, var in ds.data_vars.items():
            output_files.write_data(name, var_name, chunk, var.values)

    geom = geometry_for_task(task)
    for ds in load_data_lazy(chunk, task.sources, geom=geom, timer=timer):
        update(ds)

    with timer.time('writing_data'):
        for proc, name, stat in procs:
            save(name, cast_back(proc(), stat.data_measurements))