def test_mosdef_only(testresourcepath, expected_modout1): modin = newmodel() literate.parse(INPUT_GRAPH_1, modin) modin = newmodel() literate.parse(INPUT_GRAPH_1, modin) FINGERPRINT_RULES = { SCH_NS('MusicAlbum'): (if_(contains(follow(SCH_NS('byArtist')), DOC_NS('md')), materialize(COPY()))), SCH_NS('Person'): (materialize(COPY())), } ppl = generic_pipeline(FINGERPRINT_RULES, {}, {}) modout = ppl.run(input_model=modin) # Use -s to see this print('=' * 10, 'test_mosdef_only', '=' * 10) literate.write(modout) # import pprint; pprint.pprint(list(iter(modout))) assert len(modout) == 17 assert len( list(util.all_origins(modout, only_types={SCH_NS('MusicAlbum')}))) == 1 assert len(list(util.all_origins(modout, only_types={SCH_NS('Person')}))) == 3
def test_basics_1(testresourcepath, expected_modout1): modin = newmodel() modin_fpath = 'schemaorg/catcherintherye-ugly.md' literate.parse( open(os.path.join(testresourcepath, modin_fpath)).read(), modin) FINGERPRINT_RULES = { SCH_NS('Book'): (materialize( BF_NS('Instance'), fprint=[ (BF_NS('isbn'), follow(SCH_NS('isbn'))), ], )) } TRANSFORM_RULES = { SCH_NS('name'): link(rel=BF_NS('name')), SCH_NS('author'): materialize(BF_NS('Person'), BF_NS('creator'), vars={ 'birthDate': follow(SCH_NS('authorBirthDate'), origin=var('input-resource')) }, fprint=[ (BF_NS('name'), target()), (BF_NS('birthDate'), var('birthDate')), ], links=[ (BF_NS('name'), target()), (BF_NS('birthDate'), var('birthDate')), ]), } modout = newmodel() def new_entity_hook(eid): # Add a triple to each materialized resource modout.add(eid, 'http://example.org/materializedBy', 'py.test') return ctxextras = {'@new-entity-hook': new_entity_hook} root_ctx = DUMMY_CONTEXT.copy(output_model=modout, extras=ctxextras) ppl = generic_pipeline(FINGERPRINT_RULES, TRANSFORM_RULES, LABELIZE_RULES, root_ctx=root_ctx) ppl.run(input_model=modin, output_model=modout) # Use -s to see this print('=' * 10, 'test_basics_1', '=' * 10) literate.write(modout) assert len( list(modout.match(None, 'http://example.org/materializedBy', None))) == 2
def Xtest_versa_syntax1(): # logging.debug(recs) m = newmodel() m.create_space() # from_markdown(VERSA_LITERATE1, m, encoding='utf-8') literate.parse(VERSA_LITERATE1, m) logging.debug('VERSA LITERATE EXAMPLE 1') for link in m.match(): logging.debug('Result: {0}'.format(repr(link)))
def test_versa_syntax1(testresourcepath): config = { 'autotype-h1': 'http://example.org/r1', 'autotype-h2': 'http://example.org/r2', 'interpretations': { VERSA_BASEIRI + 'refines': VERSA_BASEIRI + 'resourceset', VERSA_BASEIRI + 'properties': VERSA_BASEIRI + 'resourceset', VERSA_BASEIRI + 'synonyms': VERSA_BASEIRI + 'resourceset' } } m1 = newmodel(baseiri='http://example.org/') # from_markdown(VERSA_LITERATE1, m, encoding='utf-8') doc = open(os.path.join(testresourcepath, 'doc1.md')).read() literate.parse(doc, m1, config=config) m2 = newmodel(baseiri='http://example.org/') # from_markdown(VERSA_LITERATE1, m, encoding='utf-8') doc = open(os.path.join(testresourcepath, 'doc1.abbr.md')).read() literate.parse(doc, m2, config=config) # logging.debug('VERSA LITERATE EXAMPLE 1') equiv_results = [list(m1.match()), list(m2.match())] for results in equiv_results: # import pprint; pprint.pprint(results) assert len(results) == 6 assert (I('http://uche.ogbuji.net/ndewo/'), I('http://bibfra.me/purl/versa/type'), 'http://example.org/r1', {}) in results assert (I('http://uche.ogbuji.net/ndewo/'), I('http://www.w3.org/TR/html5/title'), 'Ndewo, Colorado', { '@lang': None }) in results assert (I('http://uche.ogbuji.net/ndewo/'), I('http://www.w3.org/TR/html5/link-type/author'), I('http://uche.ogbuji.net/'), { I('http://www.w3.org/TR/html5/link/description'): 'Uche Ogbuji' }) in results assert ( I('http://uche.ogbuji.net/ndewo/'), I('http://www.w3.org/TR/html5/link-type/see-also'), I('http://www.goodreads.com/book/show/18714145-ndewo-colorado'), { I('http://www.w3.org/TR/html5/link/label'): 'Goodreads' }) in results assert (I('http://uche.ogbuji.net/'), I('http://bibfra.me/purl/versa/type'), 'http://example.org/r1', {}) in results assert (I('http://uche.ogbuji.net/'), I('http://www.w3.org/TR/html5/link-type/see-also'), I('http://uche.ogbuji.net/ndewo/'), {}) in results
def test_basics_1(testresourcepath, expected_modout1): modin = newmodel() modin_fpath = 'schemaorg/catcherintherye-ugly.md' literate.parse(open(os.path.join(testresourcepath, modin_fpath)).read(), modin) FINGERPRINT_RULES = { SCH_NS('Book'): ( materialize(BF_NS('Instance'), fprint=[ (BF_NS('isbn'), follow(SCH_NS('isbn'))), ], ) ) } TRANSFORM_RULES = { SCH_NS('name'): link(rel=BF_NS('name')), SCH_NS('author'): materialize(BF_NS('Person'), BF_NS('creator'), vars={ 'birthDate': follow(SCH_NS('authorBirthDate'), origin=var('input-resource')) }, fprint=[ (BF_NS('name'), target()), (BF_NS('birthDate'), var('birthDate')), ], links=[ (BF_NS('name'), target()), (BF_NS('birthDate'), var('birthDate')), ] ), } ppl = generic_pipeline(FINGERPRINT_RULES, TRANSFORM_RULES, LABELIZE_RULES) modout = ppl.run(input_model=modin) # Use -s to see this print('='*10, 'test_basics_1', '='*10) literate.write(modout) assert len(modout) == 8 assert len(list(util.all_origins(modout, only_types={BF_NS('Instance')}))) == 1 assert len(list(util.all_origins(modout, only_types={BF_NS('Person')}))) == 1 assert len(list(modout.match(None, BF_NS('birthDate'), '1919-01-01'))) == 1
def test_basics_2(testresourcepath): modin = newmodel() modin_fpath = 'schemaorg/catcherintherye-ugly.md' literate.parse(open(os.path.join(testresourcepath, modin_fpath)).read(), modin) FINGERPRINT_RULES = { SCH_NS('Book'): ( materialize(var('itype'), fprint=[ (BF_NS('isbn'), follow(SCH_NS('isbn'))), ], links=[ (BF_NS('instantiates'), materialize(BF_NS('Work'), fprint=[ (BF_NS('name'), follow(SCH_NS('title'))), (BF_NS('creator'), follow(SCH_NS('author'))), (BF_NS('language'), var('lang')), ], links=[('http://instantiated-by', var('@stem'))], attach=False # Can remove when we have smart sessions to avoid duplicate instantiates links ), ) ], # Not really necessary; just testing vars in this scenario vars={ 'lang': follow(SCH_NS('inLanguage')), 'itype': BF_NS('Instance') } ) ) } TRANSFORM_RULES = { # Rule for output resource type of Work or Instance (SCH_NS('name'), WT, IT): link(rel=BF_NS('name')), # Rule only for output resource type of Work (SCH_NS('author'), WT): materialize(BF_NS('Person'), BF_NS('creator'), vars={ 'birthDate': follow(SCH_NS('authorBirthDate'), origin=var('input-resource')) }, fprint=[ # Supplementary type (VTYPE_REL, SCH_NS('Novelist')), (BF_NS('name'), target()), (BF_NS('birthDate'), var('birthDate')), ], links=[ # Supplementary type (VTYPE_REL, SCH_NS('Novelist')), (BF_NS('name'), target()), (BF_NS('birthDate'), var('birthDate')), ], preserve_fprint=True, ), } ppl = generic_pipeline(FINGERPRINT_RULES, TRANSFORM_RULES, LABELIZE_RULES) modout = ppl.run(input_model=modin) # Use -s to see this print('='*10, 'test_basics_2', '='*10) literate.write(modout) #import pprint; pprint.pprint(list(iter(modout))) assert len(modout) == 15 assert len(list(util.all_origins(modout, only_types={BF_NS('Instance')}))) == 1 assert len(list(util.all_origins(modout, only_types={BF_NS('Work')}))) == 1 assert len(list(util.all_origins(modout, only_types={BF_NS('Person')}))) == 1 assert len(list(modout.match(None, BF_NS('birthDate'), '1919-01-01'))) == 1
def test_basics_4(testresourcepath): ''' Convert from schema.org to [MusicBrainz scheme](https://musicbrainz.org/doc/MusicBrainz_Database/Schema) ''' import sys # Uncomment to debug MB_NS = I('https://musicbrainz.org/doc/MusicBrainz_Database/Schema/') R_TYP = MB_NS('Release') RG_TYP = MB_NS('ReleaseGroup') A_TYP = MB_NS('Artist') DOC_NS = I('http://example.org/records/') modin = newmodel() modin_fpath = 'schemaorg/blackstar.md' literate.parse(open(os.path.join(testresourcepath, modin_fpath)).read(), modin) # Hand-add a comment property to the Mos Def resource to test that this value doesn't bleed e.g. to Kweli's output modin.add(DOC_NS('md'), SCH_NS('comment'), 'test') FINGERPRINT_RULES = { SCH_NS('MusicAlbum'): ( materialize(MB_NS('ReleaseGroup'), fprint=[ (MB_NS('title'), follow(SCH_NS('name'))), (MB_NS('artist'), follow(SCH_NS('byArtist'), SCH_NS('name'))), ], links=[ (MB_NS('contains'), materialize(MB_NS('Release'), fprint=[ (MB_NS('catalogue-number'), var('catnum')), ], links=[ (MB_NS('catalogue-number'), var('catnum')), ] )) ], vars={'catnum': follow(SCH_NS('catalogNumber'))}, # debug=sys.stderr, # Uncomment to debug ) ), SCH_NS('Person'): ( materialize(MB_NS('Artist'), fprint=[ (MB_NS('name'), var('aname')), ], links=[ (MB_NS('name'), var('aname')), (MB_NS('remark'), var('comment')), ], vars={'aname': follow(SCH_NS('name')), 'comment': follow(SCH_NS('comment'))}, ) ) } TRANSFORM_RULES = { (SCH_NS('name'), R_TYP, RG_TYP): link(rel=MB_NS('title')), (SCH_NS('byArtist'), R_TYP): link(rel=MB_NS('by'), target=lookup('@resource')), } # Intentionally shadows the global LABELIZE_RULES LABELIZE_RULES = { MB_NS('ReleaseGroup'): follow(MB_NS('title')), MB_NS('Release'): follow(MB_NS('title')), MB_NS('Artist'): follow(MB_NS('name')) } ppl = generic_pipeline(FINGERPRINT_RULES, TRANSFORM_RULES, LABELIZE_RULES) modout = ppl.run(input_model=modin) # Use -s to see this print('='*10, 'test_basics_4', '='*10) literate.write(modout) # import pprint; pprint.pprint(list(iter(modout))) assert len(modout) == 16 assert len(list(util.all_origins(modout, only_types={MB_NS('ReleaseGroup')}))) == 1 assert len(list(util.all_origins(modout, only_types={MB_NS('ReleaseGroup')}))) == 1 assert len(list(util.all_origins(modout, only_types={MB_NS('Artist')}))) == 2 # assert len(list(modout.match(None, BF_NS('birthDate'), '1919-01-01'))) == 1 # DOC_NS('md') -> I('i5GvPVm7ClA') in the transform assert [ l[0] for l in modout.match(None, MB_NS('remark'), 'test')] == [I('i5GvPVm7ClA')]
def labelize(self): ''' Executes a utility rule to create labels in output model for new resources ''' # XXX Check if there's already a label? # Apply a common transform strategy using rules defined above def missed_label(origin, type): ''' Callback to handle cases where a transform wasn't found to match a link (by relationship) in the input model ''' warnings.warn(f'No label generated for: {origin}') labels = self.labelize_helper(LABELIZE_RULES, handle_misses=missed_label) return True if __name__ == '__main__': for rec in INPUT_RECORDS: ppl = dc_schema_pipeline() input_model = newmodel() literate.parse(rec, input_model) output_model = ppl.run(input_model=input_model) print('Resulting record Fingerprints:', ppl.fingerprints) print('Low level JSON dump of output data model: ') util.jsondump(output_model, sys.stdout) print('Versa literate form of output: ') literate.write(output_model, out=sys.stdout) # from versa.serial import mermaid # print('Mermaid diagram form of output: ') # mermaid.write(output_model, out=sys.stdout)