def test_Dataset_validate(tmpdir, mocker): ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds.write(ValueTable=[]) assert ds.validate() ds['ValueTable'].tableSchema.columns = [] with pytest.raises(ValueError): ds.validate() assert not ds.validate(log=mocker.Mock()) ds.tablegroup.tables = [] with pytest.raises(ValueError): ds.validate() ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds.add_component('LanguageTable') ds.write(ValueTable=[]) ds['LanguageTable'].common_props[ 'dc:conformsTo'] = 'http://cldf.clld.org/404' with pytest.raises(ValueError): ds.validate() ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds['ValueTable'].get_column('Source').propertyUrl = URITemplate( 'http://cldf.clld.org/404') ds.write(ValueTable=[]) with pytest.raises(ValueError): ds.validate()
def test_Dataset_write(tmpdir): ds = StructureDataset.from_metadata(str(tmpdir)) ds.write(ValueTable=[]) assert (tmpdir / 'values.csv').exists() ds.validate() ds.add_sources("@misc{ky,\ntitle={the title}\n}") ds.write(ValueTable=[{ 'ID': '1', 'Language_ID': 'abcd1234', 'Parameter_ID': 'f1', 'Value': 'yes', 'Source': ['key[1-20]', 'ky'], }]) ds2 = StructureDataset.from_metadata( str(tmpdir.join('StructureDataset-metadata.json'))) assert ds2['ValueTable'].common_props['dc:extent'] == 1 assert {s[1]: s[2] for s in ds.stats()}['ValueTable'] == 1 ds['ValueTable'].common_props['dc:extent'] = 3 assert {s[1]: s[2] for s in ds.stats()}['ValueTable'] == 3 with pytest.raises(ValueError): ds.validate() ds.sources.add("@misc{key,\ntitle={the title}\n}") ds.write(ValueTable=({ 'ID': '1', 'Language_ID': 'abcd1234', 'Parameter_ID': 'f1', 'Value': 'yes', 'Source': ['key[1-20]'], } for _ in range(1))) ds.validate() ds.add_component('ExampleTable') ds.write(ValueTable=[{ 'ID': '1', 'Language_ID': 'abcd1234', 'Parameter_ID': 'f1', 'Value': 'yes', 'Source': ['key[1-20]'], }], ExampleTable=[{ 'ID': '1', 'Language_ID': 'abcd1234', 'Primary': 'si', 'Translation': 'yes', 'Analyzed': ['morph1', 'morph2', 'morph3'], 'Gloss': ['gl1', 'gl2'], }]) with pytest.raises(ValueError): ds.validate() ds['ExampleTable'].write([{ 'ID': '1', 'Language_ID': 'abcd1234', 'Primary_Text': 'si', 'Translated_Text': 'yes', 'Analyzed_Word': ['morph1', 'morph2', 'morph3'], 'Gloss': ['gl1', 'gl2', 'gl3'], }]) ds.validate()
def test_Dataset_validate(tmpdir, mocker): ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds.write(ValueTable=[]) values = tmpdir / 'new' / 'values.csv' assert values.check() Path(str(values)).unlink() log = mocker.Mock() assert not ds.validate(log=log) assert log.warn.called ds.write(ValueTable=[]) assert ds.validate() ds['ValueTable'].tableSchema.columns = [] with pytest.raises(ValueError): ds.validate() assert not ds.validate(log=mocker.Mock()) ds.tablegroup.tables = [] with pytest.raises(ValueError): ds.validate() ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds.add_component('LanguageTable') ds.write(ValueTable=[], LanguageTable=[]) assert ds.validate() # test violation of referential integrity: ds.write(ValueTable=[{ 'ID': '1', 'Value': '1', 'Language_ID': 'lid', 'Parameter_ID': 'pid' }], LanguageTable=[]) assert not ds.validate(log=mocker.Mock()) # test an invalid CLDF URL: ds['LanguageTable'].common_props[ 'dc:conformsTo'] = 'http://cldf.clld.org/404' with pytest.raises(ValueError): ds.validate() ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds['ValueTable'].get_column('Source').propertyUrl = URITemplate( 'http://cldf.clld.org/404') ds.write(ValueTable=[]) with pytest.raises(ValueError): ds.validate()
def test_newcol(tmpdir, db): ds = StructureDataset.in_dir(str(tmpdir / 'd')) # We rename the ID column of the ValueTable. Note that the propertyUrl # remains the same: ds['ValueTable', 'ID'].name = 'idx' ds['ValueTable'].tableSchema.columns.extend([ Column(name='col1', datatype='anyURI'), Column(name='col2', datatype='integer'), Column(name='col3'), ]) ds.write(ValueTable=[{ 'idx': '1', 'Language_ID': 'l', 'Parameter_ID': 'p', 'Value': 'v', 'Source': ['meier2015'], 'col2': 5, 'col1': anyURI().to_python('http://example.org') }]) db.create() with pytest.raises(IntegrityError): # A missing source is referenced! db.load(ds) ds.add_sources("@misc{meier2015,\ntitle={title}\n}") db.load(ds) assert db.fetchone("""\ select s.title from SourceTable as s, ValueSource as vs, ValueTable as v where s.ID = vs.Source_ID and vs.Value_ID = v.id and v.id = 1""")[0] == 'title' assert db.fetchone( "select col1 from valuetable")[0] == 'http://example.org' assert db.fetchone("select col2 from valuetable")[0] == 5
def test_Dataset_validate_missing_table(tmpdir, mocker): ds = StructureDataset.from_metadata(str(tmpdir)) ds.tablegroup.tables = [] ds.write() log = mocker.Mock() ds.validate(log=log) assert log.warn.called
def test_Dataset_remove_columns(tmpdir): ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds.add_component('LanguageTable') ds.add_foreign_key('ValueTable', 'Value', 'LanguageTable', 'Name') ds.remove_columns('languages.csv', 'ID') ds.write(ValueTable=[{ 'ID': '1', 'Language_ID': '1', 'Parameter_ID': '1', 'Value': '1' }], LanguageTable=[{ 'Name': 'x' }]) with pytest.raises(ValueError): ds.validate() ds.write(ValueTable=[{ 'ID': '1', 'Language_ID': '1', 'Parameter_ID': '1', 'Value': '1' }], LanguageTable=[{ 'Name': '1' }]) assert ds.validate()
def test_Dataset_auto_foreign_keys(tmpdir): ds = StructureDataset.in_dir(str(tmpdir), empty_tables=True) ds.add_component( { 'url': 'languages.csv', 'dc:conformsTo': 'http://cldf.clld.org/v1.0/terms.rdf#LanguageTable', 'tableSchema': {'primaryKey': 'lid'}}, {'name': 'lid', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#id'}) ds.add_component( { 'url': 'values.csv', 'dc:conformsTo': 'http://cldf.clld.org/v1.0/terms.rdf#ValueTable', 'tableSchema': {'primaryKey': 'vid'}}, {'name': 'vid', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#id'}, { 'name': 'feature', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#parameterReference'}, { 'name': 'language_lid', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#languageReference'}, {'name': 'value', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#value'}) ds.write( LanguageTable=[{'lid': 'spam'}], ValueTable=[ {'vid': '1', 'feature': 'bing', 'language_lid': 'spam', 'value': 'eggs'}]) ds.validate()
def test_Dataset_remove_table(tmpdir): ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds.add_component('LanguageTable') ds.add_component('ParameterTable') ds.write( ValueTable=[{ 'ID': '1', 'Language_ID': '1', 'Parameter_ID': 1, 'Value': 1 }], LanguageTable=[{ 'ID': '1', 'Name': 'l' }], ParameterTable=[{ 'ID': '1', 'Name': 'l' }], ) assert ds.validate() ds.remove_table('LanguageTable') # Make sure other foreign key constraints are still enforced: ds.write( ValueTable=[{ 'ID': '1', 'Language_ID': '1', 'Parameter_ID': 1, 'Value': 1 }], ParameterTable=[{ 'ID': 'x', 'Name': 'l' }], ) with pytest.raises(ValueError): ds.validate() # But foreign keys into the removed table are not: ds.write( ValueTable=[{ 'ID': '1', 'Language_ID': '1', 'Parameter_ID': 1, 'Value': 1 }], ParameterTable=[{ 'ID': '1', 'Name': 'l' }], ) assert ds.validate()
def test_Dataset_validate(tmpdir, mocker): ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds.write(ValueTable=[]) values = tmpdir / 'new' / 'values.csv' assert values.check() remove(str(values)) log = mocker.Mock() assert not ds.validate(log=log) assert log.warn.called ds.write(ValueTable=[]) assert ds.validate() ds['ValueTable'].tableSchema.columns = [] with pytest.raises(ValueError): ds.validate() assert not ds.validate(log=mocker.Mock()) ds.tablegroup.tables = [] with pytest.raises(ValueError): ds.validate() ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds.add_component('LanguageTable') ds.write(ValueTable=[], LanguageTable=[]) assert ds.validate() # test violation of referential integrity: ds.write(ValueTable=[{'ID': '1', 'Value': '1', 'Language_ID': 'lid', 'Parameter_ID': 'pid'}], LanguageTable=[]) assert not ds.validate(log=mocker.Mock()) # test an invalid CLDF URL: ds['LanguageTable'].common_props['dc:conformsTo'] = 'http://cldf.clld.org/404' with pytest.raises(ValueError): ds.validate() ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds['ValueTable'].get_column('Source').propertyUrl = URITemplate( 'http://cldf.clld.org/404') ds.write(ValueTable=[]) with pytest.raises(ValueError): ds.validate()
def test_Dataset_validate_custom_validator(tmpdir): ds = StructureDataset.in_dir(str(tmpdir / 'new')) ds.write(ValueTable=[ {'ID': '1', 'Value': 'x', 'Language_ID': 'l', 'Parameter_ID': 'p'}]) assert ds.validate() def v(tg, t, c, r): if r[c.name] == 'x': raise ValueError() with pytest.raises(ValueError): ds.validate(validators=[('ValueTable', 'Value', v)])
def test_modules(tmpdir): ds = Dataset(_make_tg(tmpdir)) assert ds.primary_table is None ds = Dataset(_make_tg(tmpdir, {"url": "data.csv"})) assert ds.primary_table is None ds = Dataset(_make_tg(tmpdir, { "url": "data.csv", "dc:conformsTo": "http://cldf.clld.org/v1.0/terms.rdf#ValueTable"})) assert ds.primary_table == 'ValueTable' assert Wordlist.in_dir(str(tmpdir)).primary_table assert Dictionary.in_dir(str(tmpdir)).primary_table assert StructureDataset.in_dir(str(tmpdir)).primary_table
def test_Dataset_write(tmpdir): ds = StructureDataset.from_metadata(str(tmpdir)) ds.write(ValueTable=[]) assert (tmpdir / 'values.csv').exists() ds.validate() ds.add_sources("@misc{ky,\ntitle={the title}\n}") ds.write(ValueTable=[ { 'ID': '1', 'Language_ID': 'abcd1234', 'Parameter_ID': 'f1', 'Value': 'yes', 'Source': ['key[1-20]', 'ky'], }]) ds2 = StructureDataset.from_metadata( str(tmpdir.join('StructureDataset-metadata.json'))) assert ds2['ValueTable'].common_props['dc:extent'] == 1 assert {s[1]: s[2] for s in ds.stats()}['ValueTable'] == 1 ds['ValueTable'].common_props['dc:extent'] = 3 assert {s[1]: s[2] for s in ds.stats()}['ValueTable'] == 3 with pytest.raises(ValueError): ds.validate() ds.sources.add("@misc{key,\ntitle={the title}\n}") ds.write(ValueTable=( { 'ID': '1', 'Language_ID': 'abcd1234', 'Parameter_ID': 'f1', 'Value': 'yes', 'Source': ['key[1-20]'], } for _ in range(1))) ds.validate() ds.add_component('ExampleTable') ds.write( ValueTable=[ { 'ID': '1', 'Language_ID': 'abcd1234', 'Parameter_ID': 'f1', 'Value': 'yes', 'Source': ['key[1-20]'], }], ExampleTable=[ { 'ID': '1', 'Language_ID': 'abcd1234', 'Primary': 'si', 'Translation': 'yes', 'Analyzed': ['morph1', 'morph2', 'morph3'], 'Gloss': ['gl1', 'gl2'], }]) with pytest.raises(ValueError): ds.validate() ds['ExampleTable'].write([ { 'ID': '1', 'Language_ID': 'abcd1234', 'Primary_Text': 'si', 'Translated_Text': 'yes', 'Analyzed_Word': ['morph1', 'morph2', 'morph3'], 'Gloss': ['gl1', 'gl2', 'gl3'], }]) ds.validate()