Exemplo n.º 1
0
 def my_parse(ctx, param, value):
     parsed_expressions = parse_expressions(*list(value))
     # ctx.ensure_object(dict)
     # try:
     #     parsed_expressions['crs'] = ctx.obj['crs']
     # except KeyError:
     #     pass
     return parsed_expressions
Exemplo n.º 2
0
def validate_time_range(context, param, value):
    """
    Click callback to validate a date string
    """
    try:
        parse_expressions(value)
        return value
    except SyntaxError:
        raise click.BadParameter(
            'Date range must be in one of the following format as a string:'
            '\n\t1996-01 < time < 1996-12'
            '\n\t1996 < time < 1997'
            '\n\t1996-01-01 < time < 1996-12-31'
            '\n\ttime in 1996'
            '\n\ttime in 1996-12'
            '\n\ttime in 1996-12-31'
            '\n\ttime=1996'
            '\n\ttime=1996-12'
            '\n\ttime=1996-12-31')
def generate_work_list(product_name, output_dir, s3_list, time_range, config):
    """
    Compares datacube file uri's against S3 bucket (file names within text file) and writes the list of datasets
    for conversion into the task file

    Uses a configuration file to define the file naming schema.

    Task file format:
        A two column, comma separated CSV file.
        <Absolute path to NetCDF File>,<Directly and file name prefix of outputs>

        eg.
        /g/data/fk4/data/foo/bar.nc,prefix/data/foo/bar

    """
    with open(config) as config_file:
        config = yaml.safe_load(config_file)['products'][product_name]

    if s3_list is None:
        # Use text file generated by save_s3_inventory function
        s3_list = Path(output_dir) / (product_name + S3_LIST_EXT)
    else:
        s3_list = Path(s3_list)

    if s3_list.exists():
        existing_s3_keys = _load_s3_inventory(s3_list)
    else:
        existing_s3_keys = {}

    # Mapping from Expected Output YAML Location -> Input NetCDF File
    dc_workgen_list = dict()

    output_bands = expected_bands(product_name)

    for source_uri, new_basename in get_dataset_values(product_name,
                                                       config,
                                                       parse_expressions(time_range)):
        output_yaml = new_basename + '.yaml'
        expected_outputs = [f'{new_basename}_{band}.tif' for band in output_bands] + [output_yaml]
        if not all(output in existing_s3_keys for output in expected_outputs):
            dc_workgen_list[new_basename] = source_uri.split('file://')[1]

    out_file = Path(output_dir) / (product_name + TASK_FILE_EXT)

    with open(out_file, 'w', newline='') as fp:
        csv_writer = csv.writer(fp, quoting=csv.QUOTE_MINIMAL)
        LOG.info(f'Found {len(dc_workgen_list)} datasets needing conversion, writing to {out_file}')
        for s3_basename, input_file in dc_workgen_list.items():
            LOG.debug(f"File does not exists in S3, add to processing list: {input_file}")
            # Write Input_file, Output Basename
            csv_writer.writerow((input_file, splitext(s3_basename)[0]))

    if not dc_workgen_list:
        LOG.info(f"No tasks found, everything is up to date.")
Exemplo n.º 4
0
def generate_work_list(product_name, output_dir, pickle_file, time_range,
                       config):
    """
    Compares datacube file uri's against S3 bucket (file names within pickle file) and writes the list of datasets
    for cog conversion into the task file
    Uses a configuration file to define the file naming schema.

    Before using this command, execute the following:
      $ module use /g/data/v10/public/modules/modulefiles/
      $ module load dea
    """
    with open(config) as config_file:
        config = yaml.load(config_file)

    if pickle_file is None:
        # Use pickle file generated by save_s3_inventory function
        pickle_file = Path(output_dir) / (product_name + PICKLE_FILE_EXT)

    with open(pickle_file, "rb") as pickle_in_fl:
        s3_file_list = pickle.load(pickle_in_fl)

    dc_workgen_list = dict()

    for uri, dest_dir, dc_yamlfile_path in get_dataset_values(
            product_name, config['products'][product_name],
            parse_expressions(time_range)):
        if uri:
            dc_workgen_list[dc_yamlfile_path] = (uri.split('file://')[1],
                                                 dest_dir)

    work_list = set(dc_workgen_list.keys()) - set(s3_file_list)
    out_file = Path(output_dir) / (product_name + TASK_FILE_EXT)

    with open(out_file, 'w', newline='') as fp:
        csv_writer = csv.writer(fp, quoting=csv.QUOTE_MINIMAL)
        for s3_filepath in work_list:
            # dict_value shall contain uri value and s3 output directory path template
            input_file, dest_dir = dc_workgen_list.get(s3_filepath,
                                                       (None, None))
            if input_file:
                LOG.info(
                    f"File does not exists in S3, add to processing list: {input_file}"
                )
                csv_writer.writerow((input_file, dest_dir))

    if not work_list:
        LOG.info(f"No tasks found")
Exemplo n.º 5
0
 def my_parse(ctx, param, value):
     return parse_expressions(*list(value))
Exemplo n.º 6
0
def expression_parser(ctx, param, value):
    """ Parse query expressions like ``datacube-core``. """
    return parse_expressions(*list(value))
Exemplo n.º 7
0
def units(ctx, index, expression):
    ctx.obj['write_results'](
        index.storage.get_fields().keys(),
        index.storage.search_summaries(**parse_expressions(*expression)))
Exemplo n.º 8
0
def datasets(ctx, index, expression):
    ctx.obj['write_results'](
        index.datasets.get_field_names(),
        index.datasets.search_summaries(**parse_expressions(*expression)))
def get_start_end_dates(expressions):
    parsed = parse_expressions(*expressions)
    time_range = parsed['time']
    return time_range.begin, time_range.end
Exemplo n.º 10
0
def product_counts(index, period, expression):
    for product, series in index.datasets.count_by_product_through_time(period, **parse_expressions(*expression)):
        click.echo(product.name)
        for timerange, count in series:
            click.echo('    {}: {}'.format(timerange[0].strftime("%Y-%m-%d"), count))
Exemplo n.º 11
0
def units(ctx, index, expression):
    ctx.obj['write_results'](
        index.storage.get_fields().keys(),
        index.storage.search_summaries(**parse_expressions(*expression))
    )
Exemplo n.º 12
0
def datasets(ctx, index, expression):
    ctx.obj['write_results'](
        index.datasets.get_field_names(),
        index.datasets.search_summaries(**parse_expressions(*expression))
    )