示例#1
0
def test_traverse_datasets():
    """
      A -> B
      |    |
      |    v
      +--> C -> D
      |
      +--> E
    """

    def node(name, **kwargs):
        return SimpleNamespace(id=name, sources=kwargs)

    A, *_ = make_graph_abcde(node)

    def visitor(node, name=None, depth=0, out=None):
        s = '{}:{}:{:d}'.format(node.id, name if name else '..', depth)
        out.append(s)

    with pytest.raises(ValueError):
        traverse_datasets(A, visitor, mode='not-a-real-mode')

    expect_preorder = '''
A:..:0
B:ab:1
C:bc:2
D:cd:3
C:ac:1
D:cd:2
E:ae:1
'''.lstrip().rstrip()

    expect_postorder = '''
D:cd:3
C:bc:2
B:ab:1
D:cd:2
C:ac:1
E:ae:1
A:..:0
'''.lstrip().rstrip()

    for mode, expect in zip(['pre-order', 'post-order'],
                            [expect_preorder, expect_postorder]):
        out = []
        traverse_datasets(A, visitor, mode=mode, out=out)
        assert '\n'.join(out) == expect

    fv = flatten_datasets(A)

    assert len(fv['A']) == 1
    assert len(fv['C']) == 2
    assert len(fv['E']) == 1
    assert set(fv.keys()) == set('ABCDE')

    leaf = SimpleNamespace(id='N', sources=None)
    out = []
    traverse_datasets(leaf, visitor, out=out)
    assert out == ["N:..:0"]
示例#2
0
文件: hl.py 项目: zs856/datacube-core
    def resolve(main_ds, uri):
        try:
            main_ds = SimpleDocNav(dedup_lineage(main_ds))
        except InvalidDocException as e:
            return None, e

        main_uuid = main_ds.id

        ds_by_uuid = toolz.valmap(toolz.first, flatten_datasets(main_ds))
        all_uuid = list(ds_by_uuid)
        db_dss = {str(ds.id): ds for ds in index.datasets.bulk_get(all_uuid)}

        lineage_uuids = set(filter(lambda x: x != main_uuid, all_uuid))
        missing_lineage = lineage_uuids - set(db_dss)

        if missing_lineage and fail_on_missing_lineage:
            return None, "Following lineage datasets are missing from DB: %s" % (','.join(missing_lineage))

        if verify_lineage and not is_doc_eo3(main_ds.doc):
            bad_lineage = []

            for uuid in lineage_uuids:
                if uuid in db_dss:
                    ok, err = check_consistent(jsonify_document(ds_by_uuid[uuid].doc_without_lineage_sources),
                                               db_dss[uuid].metadata_doc)
                    if not ok:
                        bad_lineage.append((uuid, err))

            if len(bad_lineage) > 0:
                error_report = '\n'.join('Inconsistent lineage dataset {}:\n> {}'.format(uuid, err)
                                         for uuid, err in bad_lineage)
                return None, error_report

        def with_cache(v, k, cache):
            cache[k] = v
            return v

        def resolve_ds(ds, sources, cache=None):
            cached = cache.get(ds.id)
            if cached is not None:
                return cached

            uris = [uri] if ds.id == main_uuid else []

            doc = ds.doc

            db_ds = db_dss.get(ds.id)
            if db_ds:
                product = db_ds.type
            else:
                product = match_product(doc)

            return with_cache(Dataset(product, doc, uris=uris, sources=sources), ds.id, cache)

        try:
            return remap_lineage_doc(main_ds, resolve_ds, cache={}), None
        except BadMatch as e:
            return None, e
示例#3
0
def test_simple_doc_nav():
    """
      A -> B
      |    |
      |    v
      +--> C -> D
      |
      +--> E
    """
    def node(name, **kwargs):
        return dict(id=name, lineage=dict(source_datasets=kwargs))

    A, _, C, _, _ = make_graph_abcde(node)
    rdr = SimpleDocNav(A)

    assert rdr.doc == A
    assert rdr.doc_without_lineage_sources == node('A')
    assert isinstance(rdr.sources['ae'], SimpleDocNav)
    assert rdr.sources['ab'].sources['bc'].doc == C
    assert rdr.doc_without_lineage_sources is rdr.doc_without_lineage_sources
    assert rdr.sources is rdr.sources
    assert isinstance(rdr.sources_path, tuple)

    def visitor(node, name=None, depth=0, out=None):
        s = '{}:{}:{:d}'.format(node.id, name if name else '..', depth)
        out.append(s)

    expect_preorder = '''
A:..:0
B:ab:1
C:bc:2
D:cd:3
C:ac:1
D:cd:2
E:ae:1
'''.lstrip().rstrip()

    expect_postorder = '''
D:cd:3
C:bc:2
B:ab:1
D:cd:2
C:ac:1
E:ae:1
A:..:0
'''.lstrip().rstrip()

    for mode, expect in zip(['pre-order', 'post-order'],
                            [expect_preorder, expect_postorder]):
        out = []
        traverse_datasets(rdr, visitor, mode=mode, out=out)
        assert '\n'.join(out) == expect

    fv = flatten_datasets(rdr)

    assert len(fv['A']) == 1
    assert len(fv['C']) == 2
    assert len(fv['E']) == 1
    assert set(fv.keys()) == set('ABCDE')

    fv, dg = flatten_datasets(rdr, with_depth_grouping=True)

    assert len(fv['A']) == 1
    assert len(fv['C']) == 2
    assert len(fv['E']) == 1
    assert set(fv.keys()) == set('ABCDE')
    assert isinstance(dg, list)
    assert len(dg) == 4
    assert [len(l) for l in dg] == [1, 3, 2, 1]

    def to_set(xx):
        return set(x.id for x in xx)

    assert [set(s)
            for s in ('A', 'BCE', 'CD', 'D')] == [to_set(xx) for xx in dg]
示例#4
0
    def add(self, dataset, with_lineage=None, **kwargs):
        """
        Add ``dataset`` to the index. No-op if it is already present.

        :param Dataset dataset: dataset to add
        :param bool with_lineage: True -- attempt adding lineage if it's missing, False don't
        :rtype: Dataset
        """
        def process_bunch(dss, main_ds, transaction):
            edges = []

            # First insert all new datasets
            for ds in dss:
                is_new = transaction.insert_dataset(
                    ds.metadata_doc_without_lineage(), ds.id, ds.type.id)
                if is_new:
                    edges.extend((name, ds.id, src.id)
                                 for name, src in ds.sources.items())

            # Second insert lineage graph edges
            for ee in edges:
                transaction.insert_dataset_source(*ee)

            # Finally update location for top-level dataset only
            if main_ds.uris is not None:
                self._ensure_new_locations(main_ds, transaction=transaction)

        if with_lineage is None:
            policy = kwargs.pop('sources_policy', None)
            if policy is not None:
                _LOG.debug('Use of sources_policy is deprecated')
                with_lineage = (policy != "skip")
                if policy == 'verify':
                    _LOG.debug('Verify is no longer done inside add')
            else:
                with_lineage = True

        _LOG.info('Indexing %s', dataset.id)

        if with_lineage:
            ds_by_uuid = flatten_datasets(dataset)
            all_uuids = list(ds_by_uuid)

            present = {
                k: v
                for k, v in zip(all_uuids, self.bulk_has(all_uuids))
            }

            if present[dataset.id]:
                _LOG.warning('Dataset %s is already in the database',
                             dataset.id)
                return dataset

            dss = [
                ds for ds in [dss[0] for dss in ds_by_uuid.values()]
                if not present[ds.id]
            ]
        else:
            if self.has(dataset.id):
                _LOG.warning('Dataset %s is already in the database',
                             dataset.id)
                return dataset

            dss = [dataset]

        with self._db.begin() as transaction:
            process_bunch(dss, dataset, transaction)

        return dataset