def test_dataset_add_ambgious_products(dataset_add_configs, index_empty, clirunner):
    p = dataset_add_configs
    index = index_empty

    dss = [SimpleDocNav(dataset_maker(i)(
        'A',
        product_type='eo',
        flag_a='a',
        flag_b='b')) for i in [1, 2]]

    prefix = write_files({
        'products.yml': '''
name: A
description: test product A
metadata_type: minimal
metadata:
    product_type: eo
    flag_a: a

---
name: B
description: test product B
metadata_type: minimal
metadata:
    product_type: eo
    flag_b: b
    ''',
        'dataset1.yml': yaml.safe_dump(dss[0].doc),
        'dataset2.yml': yaml.safe_dump(dss[1].doc),
    })

    clirunner(['metadata', 'add', p.metadata])
    clirunner(['product', 'add', str(prefix / 'products.yml')])

    pp = list(index.products.get_all())
    assert len(pp) == 2

    for ds, i in zip(dss, (1, 2)):
        r = clirunner(['dataset', 'add', str(prefix / ('dataset%d.yml' % i))])
        assert 'ERROR Auto match failed' in r.output
        assert 'matches several products' in r.output
        assert index.datasets.has(ds.id) is False

    # check that forcing product works
    ds, fname = dss[0], 'dataset1.yml'
    r = clirunner(['dataset', 'add',
                   '--product', 'A',
                   str(prefix / fname)])

    assert index.datasets.has(ds.id) is True

    # check that forcing via exclude works
    ds, fname = dss[1], 'dataset2.yml'
    r = clirunner(['dataset', 'add',
                   '--exclude-product', 'B',
                   str(prefix / fname)])

    assert index.datasets.has(ds.id) is True
Пример #2
0
def test_dataset_maker():
    mk = dataset_maker(0)
    assert mk('aa') == mk('aa')

    a = SimpleDocNav(mk('A'))
    b = SimpleDocNav(mk('B'))

    assert a.id != b.id
    assert a.doc['creation_dt'] == b.doc['creation_dt']
    assert isinstance(a.id, str)
    assert a.sources == {}

    a1, a2 = [dataset_maker(i)('A', product_type='eo') for i in (0, 1)]
    assert a1['id'] != a2['id']
    assert a1['creation_dt'] != a2['creation_dt']
    assert a1['product_type'] == 'eo'

    c = SimpleDocNav(mk('C', sources=dict(a=a.doc, b=b.doc)))
    assert c.sources['a'].doc is a.doc
    assert c.sources['b'].doc is b.doc
Пример #3
0
def test_dataset_add_ambgious_products(dataset_add_configs, index_empty,
                                       clirunner):
    p = dataset_add_configs
    index = index_empty
    mk = dataset_maker(0)

    ds = SimpleDocNav(mk('A', product_type='eo', flag_a='a', flag_b='b'))

    prefix = write_files({
        'products.yml': '''
name: A
description: test product A
metadata_type: minimal
metadata:
    product_type: eo
    flag_a: a

---
name: B
description: test product B
metadata_type: minimal
metadata:
    product_type: eo
    flag_b: b
    ''',
        'dataset.yml': yaml.safe_dump(ds.doc),
    })

    clirunner(['metadata_type', 'add', p.metadata])
    clirunner(['product', 'add', str(prefix / 'products.yml')])

    pp = list(index.products.get_all())
    assert len(pp) == 2

    r = clirunner(['dataset', 'add', str(prefix / 'dataset.yml')])
    assert 'ERROR Auto match failed' in r.output
    assert 'matches several products' in r.output
    assert index.datasets.has(ds.id) is False

    # check that forcing product works
    r = clirunner(
        ['dataset', 'add', '--product', 'A',
         str(prefix / 'dataset.yml')])

    assert index.datasets.has(ds.id) is True
def test_dataset_add_with_nans(dataset_add_configs, index_empty, clirunner):
    p = dataset_add_configs
    index = index_empty

    clirunner(['metadata', 'add', p.metadata])
    clirunner(['product', 'add', p.products])

    mk = dataset_maker(0)

    c = mk('C',
           product_type='C',
           val_is_nan=math.nan,
           val_is_inf=math.inf,
           val_is_neginf=-math.inf)

    b = mk('B', sources={'bc': c}, product_type='B')
    a = mk('A', sources={'ac': c}, product_type='A')

    prefix = write_files({
        'dataset.yml': yaml.safe_dump_all([a, b]),
    })

    r = clirunner([
        'dataset', 'add', '--auto-add-lineage', '--verify-lineage',
        str(prefix / 'dataset.yml')
    ])

    assert "ERROR" not in r.output

    a, b, c = [SimpleDocNav(v) for v in (a, b, c)]

    assert index.datasets.bulk_has([a.id, b.id, c.id]) == [True, True, True]

    c_doc = index.datasets.get(c.id).metadata_doc

    assert c_doc['val_is_nan'] == 'NaN'
    assert c_doc['val_is_inf'] == 'Infinity'
    assert c_doc['val_is_neginf'] == '-Infinity'
def test_dataset_add_inconsistent_measurements(dataset_add_configs,
                                               index_empty, clirunner):
    p = dataset_add_configs
    index = index_empty
    mk = dataset_maker(0)

    # not set, empty, subset, full set, super-set
    ds1 = SimpleDocNav(mk(
        'A',
        product_type='eo',
    ))
    ds2 = SimpleDocNav(mk('B', product_type='eo', measurements={}))
    ds3 = SimpleDocNav(mk('C', product_type='eo', measurements={'red': {}}))
    ds4 = SimpleDocNav(
        mk('D', product_type='eo', measurements={
            'red': {},
            'green': {},
        }))
    ds5 = SimpleDocNav(
        mk('E',
           product_type='eo',
           measurements={
               'red': {},
               'green': {},
               'extra': {},
           }))

    dss = (ds1, ds2, ds3, ds4, ds5)
    docs = [ds.doc for ds in dss]

    prefix = write_files({
        'products.yml': '''
name: eo
description: test product
metadata_type: with_measurements
metadata:
    product_type: eo

measurements:
    - name: red
      dtype: int16
      nodata: -999
      units: '1'

    - name: green
      dtype: int16
      nodata: -999
      units: '1'
    ''',
        'dataset.yml': yaml.safe_dump_all(docs),
    })

    clirunner(['metadata', 'add', p.metadata])
    r = clirunner(['product', 'add', str(prefix / 'products.yml')])

    pp = list(index.products.get_all())
    assert len(pp) == 1

    r = clirunner(['dataset', 'add', str(prefix / 'dataset.yml')])
    print(r.output)

    r = clirunner(['dataset', 'search', '-f', 'csv'])
    assert ds1.id not in r.output
    assert ds2.id not in r.output
    assert ds3.id not in r.output
    assert ds4.id in r.output
    assert ds5.id in r.output