示例#1
0
 def run_it(dataset_paths):
     doc_stream = ui_path_doc_stream(dataset_paths, logger=_LOG, uri=True)
     dss = dataset_stream(doc_stream, ds_resolve)
     index_datasets(dss,
                    index,
                    auto_add_lineage=auto_add_lineage,
                    dry_run=dry_run)
示例#2
0
def item2dataset_cli(stac_collection, dc_product, url, outdir, max_items,
                     engine_file, datacube_config, verbose, access_token,
                     advanced_filter):
    _filter = {"collections": [stac_collection]}
    if advanced_filter:
        _filter = {**_filter, **prepare_advanced_filter(advanced_filter)}

    stac_service = stac.STAC(url, False, access_token=access_token)
    dc_index = datacube_index(datacube_config)

    features = create_feature_collection_from_stac_elements(
        stac_service, int(max_items), _filter)
    odc_datasets = stac2odc.item.item2dataset(engine_file,
                                              dc_product,
                                              features,
                                              dc_index,
                                              verbose=verbose)
    odc_datasets_definition_files = write_odc_element_in_yaml_file(
        odc_datasets, outdir)

    # add datasets definitions on datacube index
    # code adapted from: https://github.com/opendatacube/datacube-core/blob/develop/datacube/scripts/dataset.py
    ds_resolve = Doc2Dataset(dc_index, [dc_product])
    doc_stream = remap_uri_from_doc(
        ui_path_doc_stream(odc_datasets_definition_files, uri=True))
    datasets_on_stream = dataset_stream(doc_stream, ds_resolve)

    logger_message(f"Adding datasets", logger.info, True)
    for dataset in datasets_on_stream:
        try:
            dc_index.datasets.add(dataset, with_lineage=True)
        except (ValueError, MissingRecordError):
            logger_message(f"Error to add dataset ({dataset.local_uri})",
                           logger.warning, True)
示例#3
0
    def load_datasets(path, ds_resolve):
        for uri, ds in ui_path_doc_stream(path):

            dataset, err = ds_resolve(ds, uri)

            if dataset is None:
                _LOG.error('dataset is empty', error=str(err))
                continue

            is_consistent, reason = check_dataset_consistent(dataset)
            if not is_consistent:
                _LOG.error("dataset inconsistency", dataset=dataset.id, reason=str(reason))
                continue

            yield dataset
示例#4
0
def test_ui_path_doc_stream(httpserver):
    filename = 'dataset_metadata.yaml'
    file_content = ''
    out_dir = write_files({filename: file_content})

    httpserver.expect_request(filename).respond_with_data(file_content)

    input_paths = [
        Path(out_dir) / 'dataset_metadata.yaml',
        httpserver.url_for(filename)
    ]

    for input_path, (doc,
                     resolved_path) in zip(input_paths,
                                           ui_path_doc_stream(input_paths)):
        assert doc == {}
        assert input_path == resolved_path
示例#5
0
def update_cmd(index, keys_that_can_change, dry_run, location_policy, dataset_paths):
    def loc_action(action, new_ds, existing_ds, action_name):
        if len(existing_ds.uris) == 0:
            return None

        if len(existing_ds.uris) > 1:
            _LOG.warning("Refusing to %s old location, there are several", action_name)
            return None

        new_uri, = new_ds.uris
        old_uri, = existing_ds.uris

        if new_uri == old_uri:
            return None

        if dry_run:
            echo('Will {} old location {}, and add new one {}'.format(action_name, old_uri, new_uri))
            return True

        return action(existing_ds.id, old_uri)

    def loc_archive(new_ds, existing_ds):
        return loc_action(index.datasets.archive_location, new_ds, existing_ds, 'archive')

    def loc_forget(new_ds, existing_ds):
        return loc_action(index.datasets.remove_location, new_ds, existing_ds, 'forget')

    def loc_keep(new_ds, existing_ds):
        return None

    update_loc = dict(archive=loc_archive,
                      forget=loc_forget,
                      keep=loc_keep)[location_policy]

    updates_allowed = parse_update_rules(keys_that_can_change)

    success, fail = 0, 0

    for dataset, existing_ds in load_datasets_for_update(
            ui_path_doc_stream(dataset_paths, logger=_LOG, uri=True), index):
        _LOG.info('Matched %s', dataset)

        if location_policy != 'keep':
            if len(existing_ds.uris) > 1:
                # TODO:
                pass

        if not dry_run:
            try:
                index.datasets.update(dataset, updates_allowed=updates_allowed)
                update_loc(dataset, existing_ds)
                success += 1
                echo('Updated %s' % dataset.id)
            except ValueError as e:
                fail += 1
                echo('Failed to update %s: %s' % (dataset.id, e))
        else:
            if update_dry_run(index, updates_allowed, dataset):
                update_loc(dataset, existing_ds)
                success += 1
            else:
                fail += 1
    echo('%d successful, %d failed' % (success, fail))