示例#1
0
def test_Dataset_validate(tmpdir, mocker):
    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.write(ValueTable=[])
    assert ds.validate()
    ds['ValueTable'].tableSchema.columns = []
    with pytest.raises(ValueError):
        ds.validate()
    assert not ds.validate(log=mocker.Mock())
    ds.tablegroup.tables = []
    with pytest.raises(ValueError):
        ds.validate()

    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.add_component('LanguageTable')
    ds.write(ValueTable=[])
    ds['LanguageTable'].common_props[
        'dc:conformsTo'] = 'http://cldf.clld.org/404'
    with pytest.raises(ValueError):
        ds.validate()

    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds['ValueTable'].get_column('Source').propertyUrl = URITemplate(
        'http://cldf.clld.org/404')
    ds.write(ValueTable=[])
    with pytest.raises(ValueError):
        ds.validate()
示例#2
0
def test_Dataset_write(tmpdir):
    ds = StructureDataset.from_metadata(str(tmpdir))
    ds.write(ValueTable=[])
    assert (tmpdir / 'values.csv').exists()
    ds.validate()
    ds.add_sources("@misc{ky,\ntitle={the title}\n}")
    ds.write(ValueTable=[{
        'ID': '1',
        'Language_ID': 'abcd1234',
        'Parameter_ID': 'f1',
        'Value': 'yes',
        'Source': ['key[1-20]', 'ky'],
    }])
    ds2 = StructureDataset.from_metadata(
        str(tmpdir.join('StructureDataset-metadata.json')))
    assert ds2['ValueTable'].common_props['dc:extent'] == 1
    assert {s[1]: s[2] for s in ds.stats()}['ValueTable'] == 1
    ds['ValueTable'].common_props['dc:extent'] = 3
    assert {s[1]: s[2] for s in ds.stats()}['ValueTable'] == 3
    with pytest.raises(ValueError):
        ds.validate()
    ds.sources.add("@misc{key,\ntitle={the title}\n}")
    ds.write(ValueTable=({
        'ID': '1',
        'Language_ID': 'abcd1234',
        'Parameter_ID': 'f1',
        'Value': 'yes',
        'Source': ['key[1-20]'],
    } for _ in range(1)))
    ds.validate()
    ds.add_component('ExampleTable')
    ds.write(ValueTable=[{
        'ID': '1',
        'Language_ID': 'abcd1234',
        'Parameter_ID': 'f1',
        'Value': 'yes',
        'Source': ['key[1-20]'],
    }],
             ExampleTable=[{
                 'ID': '1',
                 'Language_ID': 'abcd1234',
                 'Primary': 'si',
                 'Translation': 'yes',
                 'Analyzed': ['morph1', 'morph2', 'morph3'],
                 'Gloss': ['gl1', 'gl2'],
             }])
    with pytest.raises(ValueError):
        ds.validate()
    ds['ExampleTable'].write([{
        'ID': '1',
        'Language_ID': 'abcd1234',
        'Primary_Text': 'si',
        'Translated_Text': 'yes',
        'Analyzed_Word': ['morph1', 'morph2', 'morph3'],
        'Gloss': ['gl1', 'gl2', 'gl3'],
    }])
    ds.validate()
示例#3
0
def test_Dataset_validate(tmpdir, mocker):
    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.write(ValueTable=[])
    values = tmpdir / 'new' / 'values.csv'
    assert values.check()
    Path(str(values)).unlink()
    log = mocker.Mock()
    assert not ds.validate(log=log)
    assert log.warn.called

    ds.write(ValueTable=[])
    assert ds.validate()

    ds['ValueTable'].tableSchema.columns = []
    with pytest.raises(ValueError):
        ds.validate()
    assert not ds.validate(log=mocker.Mock())
    ds.tablegroup.tables = []
    with pytest.raises(ValueError):
        ds.validate()

    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.add_component('LanguageTable')
    ds.write(ValueTable=[], LanguageTable=[])
    assert ds.validate()

    # test violation of referential integrity:
    ds.write(ValueTable=[{
        'ID': '1',
        'Value': '1',
        'Language_ID': 'lid',
        'Parameter_ID': 'pid'
    }],
             LanguageTable=[])
    assert not ds.validate(log=mocker.Mock())

    # test an invalid CLDF URL:
    ds['LanguageTable'].common_props[
        'dc:conformsTo'] = 'http://cldf.clld.org/404'
    with pytest.raises(ValueError):
        ds.validate()

    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds['ValueTable'].get_column('Source').propertyUrl = URITemplate(
        'http://cldf.clld.org/404')
    ds.write(ValueTable=[])
    with pytest.raises(ValueError):
        ds.validate()
示例#4
0
def test_newcol(tmpdir, db):
    ds = StructureDataset.in_dir(str(tmpdir / 'd'))

    # We rename the ID column of the ValueTable. Note that the propertyUrl
    # remains the same:
    ds['ValueTable', 'ID'].name = 'idx'
    ds['ValueTable'].tableSchema.columns.extend([
        Column(name='col1', datatype='anyURI'),
        Column(name='col2', datatype='integer'),
        Column(name='col3'),
    ])
    ds.write(ValueTable=[{
        'idx': '1',
        'Language_ID': 'l',
        'Parameter_ID': 'p',
        'Value': 'v',
        'Source': ['meier2015'],
        'col2': 5,
        'col1': anyURI().to_python('http://example.org')
    }])
    db.create()
    with pytest.raises(IntegrityError):  # A missing source is referenced!
        db.load(ds)
    ds.add_sources("@misc{meier2015,\ntitle={title}\n}")
    db.load(ds)
    assert db.fetchone("""\
select
  s.title
from
  SourceTable as s, ValueSource as vs, ValueTable as v
where
  s.ID = vs.Source_ID and vs.Value_ID = v.id and v.id = 1""")[0] == 'title'
    assert db.fetchone(
        "select col1 from valuetable")[0] == 'http://example.org'
    assert db.fetchone("select col2 from valuetable")[0] == 5
示例#5
0
def test_Dataset_validate_missing_table(tmpdir, mocker):
    ds = StructureDataset.from_metadata(str(tmpdir))
    ds.tablegroup.tables = []
    ds.write()
    log = mocker.Mock()
    ds.validate(log=log)
    assert log.warn.called
示例#6
0
def test_Dataset_remove_columns(tmpdir):
    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.add_component('LanguageTable')
    ds.add_foreign_key('ValueTable', 'Value', 'LanguageTable', 'Name')
    ds.remove_columns('languages.csv', 'ID')

    ds.write(ValueTable=[{
        'ID': '1',
        'Language_ID': '1',
        'Parameter_ID': '1',
        'Value': '1'
    }],
             LanguageTable=[{
                 'Name': 'x'
             }])
    with pytest.raises(ValueError):
        ds.validate()

    ds.write(ValueTable=[{
        'ID': '1',
        'Language_ID': '1',
        'Parameter_ID': '1',
        'Value': '1'
    }],
             LanguageTable=[{
                 'Name': '1'
             }])
    assert ds.validate()
示例#7
0
def test_Dataset_validate_missing_table(tmpdir, mocker):
    ds = StructureDataset.from_metadata(str(tmpdir))
    ds.tablegroup.tables = []
    ds.write()
    log = mocker.Mock()
    ds.validate(log=log)
    assert log.warn.called
示例#8
0
def test_Dataset_auto_foreign_keys(tmpdir):
    ds = StructureDataset.in_dir(str(tmpdir), empty_tables=True)
    ds.add_component(
        {
            'url': 'languages.csv',
            'dc:conformsTo': 'http://cldf.clld.org/v1.0/terms.rdf#LanguageTable',
            'tableSchema': {'primaryKey': 'lid'}},
        {'name': 'lid', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#id'})
    ds.add_component(
        {
            'url': 'values.csv',
            'dc:conformsTo': 'http://cldf.clld.org/v1.0/terms.rdf#ValueTable',
            'tableSchema': {'primaryKey': 'vid'}},
        {'name': 'vid', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#id'},
        {
            'name': 'feature',
            'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#parameterReference'},
        {
            'name': 'language_lid',
            'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#languageReference'},
        {'name': 'value', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#value'})
    ds.write(
        LanguageTable=[{'lid': 'spam'}],
        ValueTable=[
            {'vid': '1', 'feature': 'bing', 'language_lid': 'spam', 'value': 'eggs'}])
    ds.validate()
示例#9
0
def test_Dataset_remove_table(tmpdir):
    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.add_component('LanguageTable')
    ds.add_component('ParameterTable')
    ds.write(
        ValueTable=[{
            'ID': '1',
            'Language_ID': '1',
            'Parameter_ID': 1,
            'Value': 1
        }],
        LanguageTable=[{
            'ID': '1',
            'Name': 'l'
        }],
        ParameterTable=[{
            'ID': '1',
            'Name': 'l'
        }],
    )
    assert ds.validate()

    ds.remove_table('LanguageTable')

    # Make sure other foreign key constraints are still enforced:
    ds.write(
        ValueTable=[{
            'ID': '1',
            'Language_ID': '1',
            'Parameter_ID': 1,
            'Value': 1
        }],
        ParameterTable=[{
            'ID': 'x',
            'Name': 'l'
        }],
    )
    with pytest.raises(ValueError):
        ds.validate()

    # But foreign keys into the removed table are not:
    ds.write(
        ValueTable=[{
            'ID': '1',
            'Language_ID': '1',
            'Parameter_ID': 1,
            'Value': 1
        }],
        ParameterTable=[{
            'ID': '1',
            'Name': 'l'
        }],
    )
    assert ds.validate()
示例#10
0
def test_Dataset_validate(tmpdir, mocker):
    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.write(ValueTable=[])
    values = tmpdir / 'new' / 'values.csv'
    assert values.check()
    remove(str(values))
    log = mocker.Mock()
    assert not ds.validate(log=log)
    assert log.warn.called

    ds.write(ValueTable=[])
    assert ds.validate()

    ds['ValueTable'].tableSchema.columns = []
    with pytest.raises(ValueError):
        ds.validate()
    assert not ds.validate(log=mocker.Mock())
    ds.tablegroup.tables = []
    with pytest.raises(ValueError):
        ds.validate()

    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.add_component('LanguageTable')
    ds.write(ValueTable=[], LanguageTable=[])
    assert ds.validate()

    # test violation of referential integrity:
    ds.write(ValueTable=[{'ID': '1', 'Value': '1', 'Language_ID': 'lid', 'Parameter_ID': 'pid'}], LanguageTable=[])
    assert not ds.validate(log=mocker.Mock())

    # test an invalid CLDF URL:
    ds['LanguageTable'].common_props['dc:conformsTo'] = 'http://cldf.clld.org/404'
    with pytest.raises(ValueError):
        ds.validate()

    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds['ValueTable'].get_column('Source').propertyUrl = URITemplate(
        'http://cldf.clld.org/404')
    ds.write(ValueTable=[])
    with pytest.raises(ValueError):
        ds.validate()
示例#11
0
def test_Dataset_validate_custom_validator(tmpdir):
    ds = StructureDataset.in_dir(str(tmpdir / 'new'))
    ds.write(ValueTable=[
        {'ID': '1', 'Value': 'x', 'Language_ID': 'l', 'Parameter_ID': 'p'}])
    assert ds.validate()

    def v(tg, t, c, r):
        if r[c.name] == 'x':
            raise ValueError()

    with pytest.raises(ValueError):
        ds.validate(validators=[('ValueTable', 'Value', v)])
示例#12
0
def test_modules(tmpdir):
    ds = Dataset(_make_tg(tmpdir))
    assert ds.primary_table is None
    ds = Dataset(_make_tg(tmpdir, {"url": "data.csv"}))
    assert ds.primary_table is None
    ds = Dataset(_make_tg(tmpdir, {
        "url": "data.csv",
        "dc:conformsTo": "http://cldf.clld.org/v1.0/terms.rdf#ValueTable"}))
    assert ds.primary_table == 'ValueTable'
    assert Wordlist.in_dir(str(tmpdir)).primary_table
    assert Dictionary.in_dir(str(tmpdir)).primary_table
    assert StructureDataset.in_dir(str(tmpdir)).primary_table
示例#13
0
def test_Dataset_write(tmpdir):
    ds = StructureDataset.from_metadata(str(tmpdir))
    ds.write(ValueTable=[])
    assert (tmpdir / 'values.csv').exists()
    ds.validate()
    ds.add_sources("@misc{ky,\ntitle={the title}\n}")
    ds.write(ValueTable=[
        {
            'ID': '1',
            'Language_ID': 'abcd1234',
            'Parameter_ID': 'f1',
            'Value': 'yes',
            'Source': ['key[1-20]', 'ky'],
        }])
    ds2 = StructureDataset.from_metadata(
        str(tmpdir.join('StructureDataset-metadata.json')))
    assert ds2['ValueTable'].common_props['dc:extent'] == 1
    assert {s[1]: s[2] for s in ds.stats()}['ValueTable'] == 1
    ds['ValueTable'].common_props['dc:extent'] = 3
    assert {s[1]: s[2] for s in ds.stats()}['ValueTable'] == 3
    with pytest.raises(ValueError):
        ds.validate()
    ds.sources.add("@misc{key,\ntitle={the title}\n}")
    ds.write(ValueTable=(
        {
            'ID': '1',
            'Language_ID': 'abcd1234',
            'Parameter_ID': 'f1',
            'Value': 'yes',
            'Source': ['key[1-20]'],
        } for _ in range(1)))
    ds.validate()
    ds.add_component('ExampleTable')
    ds.write(
        ValueTable=[
            {
                'ID': '1',
                'Language_ID': 'abcd1234',
                'Parameter_ID': 'f1',
                'Value': 'yes',
                'Source': ['key[1-20]'],
            }],
        ExampleTable=[
            {
                'ID': '1',
                'Language_ID': 'abcd1234',
                'Primary': 'si',
                'Translation': 'yes',
                'Analyzed': ['morph1', 'morph2', 'morph3'],
                'Gloss': ['gl1', 'gl2'],
            }])
    with pytest.raises(ValueError):
        ds.validate()
    ds['ExampleTable'].write([
        {
            'ID': '1',
            'Language_ID': 'abcd1234',
            'Primary_Text': 'si',
            'Translated_Text': 'yes',
            'Analyzed_Word': ['morph1', 'morph2', 'morph3'],
            'Gloss': ['gl1', 'gl2', 'gl3'],
        }])
    ds.validate()