Python write示例，versa.serial.literate.write Python示例

示例#1

0

显示文件

def test_mosdef_only(testresourcepath, expected_modout1):
    modin = newmodel()
    literate.parse(INPUT_GRAPH_1, modin)

    modin = newmodel()
    literate.parse(INPUT_GRAPH_1, modin)

    FINGERPRINT_RULES = {
        SCH_NS('MusicAlbum'):
        (if_(contains(follow(SCH_NS('byArtist')), DOC_NS('md')),
             materialize(COPY()))),
        SCH_NS('Person'): (materialize(COPY())),
    }

    ppl = generic_pipeline(FINGERPRINT_RULES, {}, {})

    modout = ppl.run(input_model=modin)
    # Use -s to see this
    print('=' * 10, 'test_mosdef_only', '=' * 10)
    literate.write(modout)
    # import pprint; pprint.pprint(list(iter(modout)))

    assert len(modout) == 17
    assert len(
        list(util.all_origins(modout, only_types={SCH_NS('MusicAlbum')}))) == 1
    assert len(list(util.all_origins(modout,
                                     only_types={SCH_NS('Person')}))) == 3

示例#2

0

显示文件

文件： csv_to_bibframe.py 项目： zepheira/versa

def main(source):
    'Transform CSV SOURCE file to BF Lite in Versa'
    ppl = csv_bibframe_pipeline()
    input_model = newmodel()
    with open(source) as csvfp:
        for row_model in csv.parse_iter(csvfp, VLITERATE_TEMPLATE):
            if row_model: input_model.update(row_model)

    # Debug print of input model
    # literate.write([input_model], out=sys.stdout)
    output_model = ppl.run(input_model=input_model)
    print('Low level JSON dump of output data model: ')
    util.jsondump(output_model, sys.stdout)
    print('\n')  # 2 CRs
    print('Versa literate form of output: ')
    literate.write(output_model, out=sys.stdout)

    print('Diagram from extracted a sample: ')
    out_resources = []
    for vs in ppl.fingerprints.values():
        out_resources.extend(vs)
    ITYPE = BF_NS('Instance')
    instances = [
        r for r in out_resources
        if ITYPE in util.resourcetypes(output_model, r)
    ]
    zoomed, _ = util.zoom_in(output_model, random.choice(instances), depth=2)
    mermaid.write(zoomed)

示例#3

0

显示文件

def test_basics_1(testresourcepath, expected_modout1):
    modin = newmodel()
    modin_fpath = 'schemaorg/catcherintherye-ugly.md'
    literate.parse(
        open(os.path.join(testresourcepath, modin_fpath)).read(), modin)

    FINGERPRINT_RULES = {
        SCH_NS('Book'): (materialize(
            BF_NS('Instance'),
            fprint=[
                (BF_NS('isbn'), follow(SCH_NS('isbn'))),
            ],
        ))
    }

    TRANSFORM_RULES = {
        SCH_NS('name'):
        link(rel=BF_NS('name')),
        SCH_NS('author'):
        materialize(BF_NS('Person'),
                    BF_NS('creator'),
                    vars={
                        'birthDate':
                        follow(SCH_NS('authorBirthDate'),
                               origin=var('input-resource'))
                    },
                    fprint=[
                        (BF_NS('name'), target()),
                        (BF_NS('birthDate'), var('birthDate')),
                    ],
                    links=[
                        (BF_NS('name'), target()),
                        (BF_NS('birthDate'), var('birthDate')),
                    ]),
    }

    modout = newmodel()

    def new_entity_hook(eid):
        # Add a triple to each materialized resource
        modout.add(eid, 'http://example.org/materializedBy', 'py.test')
        return

    ctxextras = {'@new-entity-hook': new_entity_hook}
    root_ctx = DUMMY_CONTEXT.copy(output_model=modout, extras=ctxextras)

    ppl = generic_pipeline(FINGERPRINT_RULES,
                           TRANSFORM_RULES,
                           LABELIZE_RULES,
                           root_ctx=root_ctx)

    ppl.run(input_model=modin, output_model=modout)
    # Use -s to see this
    print('=' * 10, 'test_basics_1', '=' * 10)
    literate.write(modout)

    assert len(
        list(modout.match(None, 'http://example.org/materializedBy',
                          None))) == 2

示例#4

0

显示文件

def main(source):
    'Transform CSV SOURCE file to Schema.org in Versa'
    ppl = csv_schema_pipeline()
    input_model = newmodel()
    with open(source) as csvfp:
        csv.parse(csvfp, VLITERATE_TEMPLATE, input_model)

    # Debug print of input model
    # literate.write([input_model], out=sys.stdout)
    output_model = ppl.run(input_model=input_model)
    print('Resulting record Fingerprints:', ppl.fingerprints)
    print('Low level JSON dump of output data model: ')
    util.jsondump(output_model, sys.stdout)
    print('Versa literate form of output: ')
    literate.write(output_model, out=sys.stdout)

示例#5

0

显示文件

文件： test_pipeline.py 项目： zepheira/versa

def test_basics_1(testresourcepath, expected_modout1):
    modin = newmodel()
    modin_fpath = 'schemaorg/catcherintherye-ugly.md'
    literate.parse(open(os.path.join(testresourcepath, modin_fpath)).read(), modin)

    FINGERPRINT_RULES = {
        SCH_NS('Book'): ( 
            materialize(BF_NS('Instance'),
                fprint=[
                    (BF_NS('isbn'), follow(SCH_NS('isbn'))),
                ],
            )
        )
    }

    TRANSFORM_RULES = {
        SCH_NS('name'): link(rel=BF_NS('name')),

        SCH_NS('author'): materialize(BF_NS('Person'),
                                    BF_NS('creator'),
                                    vars={
                                        'birthDate': follow(SCH_NS('authorBirthDate'),
                                            origin=var('input-resource'))
                                    },
                                    fprint=[
                                        (BF_NS('name'), target()),
                                        (BF_NS('birthDate'), var('birthDate')),
                                    ],
                                    links=[
                                        (BF_NS('name'), target()),
                                        (BF_NS('birthDate'), var('birthDate')),
                                    ]
        ),
    }

    ppl = generic_pipeline(FINGERPRINT_RULES, TRANSFORM_RULES, LABELIZE_RULES)

    modout = ppl.run(input_model=modin)
    # Use -s to see this
    print('='*10, 'test_basics_1', '='*10)
    literate.write(modout)

    assert len(modout) == 8
    assert len(list(util.all_origins(modout, only_types={BF_NS('Instance')}))) == 1
    assert len(list(util.all_origins(modout, only_types={BF_NS('Person')}))) == 1
    assert len(list(modout.match(None, BF_NS('birthDate'), '1919-01-01'))) == 1

示例#6

0

显示文件

文件： test_pipeline.py 项目： zepheira/versa

def test_basics_2(testresourcepath):
    modin = newmodel()
    modin_fpath = 'schemaorg/catcherintherye-ugly.md'
    literate.parse(open(os.path.join(testresourcepath, modin_fpath)).read(), modin)

    FINGERPRINT_RULES = {
        SCH_NS('Book'): ( 
            materialize(var('itype'),
                fprint=[
                    (BF_NS('isbn'), follow(SCH_NS('isbn'))),
                ],
                links=[
                    (BF_NS('instantiates'),
                        materialize(BF_NS('Work'),
                            fprint=[
                                (BF_NS('name'), follow(SCH_NS('title'))),
                                (BF_NS('creator'), follow(SCH_NS('author'))),
                                (BF_NS('language'), var('lang')),
                            ],
                            links=[('http://instantiated-by', var('@stem'))],
                            attach=False # Can remove when we have smart sessions to avoid duplicate instantiates links
                        ),
                    )
                ],
                # Not really necessary; just testing vars in this scenario
                vars={
                    'lang': follow(SCH_NS('inLanguage')),
                    'itype': BF_NS('Instance')
                    }
            )
        )
    }

    TRANSFORM_RULES = {
        # Rule for output resource type of Work or Instance
        (SCH_NS('name'), WT, IT): link(rel=BF_NS('name')),

        # Rule only for output resource type of Work
        (SCH_NS('author'), WT): materialize(BF_NS('Person'),
                                    BF_NS('creator'),
                                    vars={
                                        'birthDate': follow(SCH_NS('authorBirthDate'),
                                            origin=var('input-resource'))
                                    },
                                    fprint=[
                                        # Supplementary type
                                        (VTYPE_REL, SCH_NS('Novelist')),
                                        (BF_NS('name'), target()),
                                        (BF_NS('birthDate'), var('birthDate')),
                                    ],
                                    links=[
                                        # Supplementary type
                                        (VTYPE_REL, SCH_NS('Novelist')),
                                        (BF_NS('name'), target()),
                                        (BF_NS('birthDate'), var('birthDate')),
                                    ],
                                    preserve_fprint=True,
        ),
    }

    ppl = generic_pipeline(FINGERPRINT_RULES, TRANSFORM_RULES, LABELIZE_RULES)

    modout = ppl.run(input_model=modin)
    # Use -s to see this
    print('='*10, 'test_basics_2', '='*10)
    literate.write(modout)
    #import pprint; pprint.pprint(list(iter(modout)))

    assert len(modout) == 15
    assert len(list(util.all_origins(modout, only_types={BF_NS('Instance')}))) == 1
    assert len(list(util.all_origins(modout, only_types={BF_NS('Work')}))) == 1
    assert len(list(util.all_origins(modout, only_types={BF_NS('Person')}))) == 1
    assert len(list(modout.match(None, BF_NS('birthDate'), '1919-01-01'))) == 1

示例#7

0

显示文件

文件： test_pipeline.py 项目： zepheira/versa

def test_basics_4(testresourcepath):
    '''
    Convert from schema.org to [MusicBrainz scheme](https://musicbrainz.org/doc/MusicBrainz_Database/Schema)
    '''
    import sys # Uncomment to debug
    MB_NS = I('https://musicbrainz.org/doc/MusicBrainz_Database/Schema/')
    R_TYP = MB_NS('Release')
    RG_TYP = MB_NS('ReleaseGroup')
    A_TYP = MB_NS('Artist')
    DOC_NS = I('http://example.org/records/')

    modin = newmodel()
    modin_fpath = 'schemaorg/blackstar.md'
    literate.parse(open(os.path.join(testresourcepath, modin_fpath)).read(), modin)
    # Hand-add a comment property to the Mos Def resource to test that this value doesn't bleed e.g. to Kweli's output
    modin.add(DOC_NS('md'), SCH_NS('comment'), 'test')

    FINGERPRINT_RULES = {
        SCH_NS('MusicAlbum'): ( 
            materialize(MB_NS('ReleaseGroup'),
                fprint=[
                    (MB_NS('title'), follow(SCH_NS('name'))),
                    (MB_NS('artist'), follow(SCH_NS('byArtist'), SCH_NS('name'))),
                ],
                links=[
                    (MB_NS('contains'), materialize(MB_NS('Release'),
                        fprint=[
                            (MB_NS('catalogue-number'), var('catnum')),
                        ],
                        links=[
                            (MB_NS('catalogue-number'), var('catnum')),
                        ]
                    ))
                ],
                vars={'catnum': follow(SCH_NS('catalogNumber'))},
                # debug=sys.stderr, # Uncomment to debug
            )
        ),

        SCH_NS('Person'): ( 
            materialize(MB_NS('Artist'),
                fprint=[
                    (MB_NS('name'), var('aname')),
                ],
                links=[
                    (MB_NS('name'), var('aname')),
                    (MB_NS('remark'), var('comment')),
                ],
                vars={'aname': follow(SCH_NS('name')), 'comment': follow(SCH_NS('comment'))},
            )
        )
    }

    TRANSFORM_RULES = {
        (SCH_NS('name'), R_TYP, RG_TYP): link(rel=MB_NS('title')),

        (SCH_NS('byArtist'), R_TYP): link(rel=MB_NS('by'), target=lookup('@resource')),
    }

    # Intentionally shadows the global LABELIZE_RULES
    LABELIZE_RULES = {
        MB_NS('ReleaseGroup'): follow(MB_NS('title')),
        MB_NS('Release'): follow(MB_NS('title')),
        MB_NS('Artist'): follow(MB_NS('name'))
    }

    ppl = generic_pipeline(FINGERPRINT_RULES, TRANSFORM_RULES, LABELIZE_RULES)

    modout = ppl.run(input_model=modin)
    # Use -s to see this
    print('='*10, 'test_basics_4', '='*10)
    literate.write(modout)
    # import pprint; pprint.pprint(list(iter(modout)))

    assert len(modout) == 16
    assert len(list(util.all_origins(modout, only_types={MB_NS('ReleaseGroup')}))) == 1
    assert len(list(util.all_origins(modout, only_types={MB_NS('ReleaseGroup')}))) == 1
    assert len(list(util.all_origins(modout, only_types={MB_NS('Artist')}))) == 2
    # assert len(list(modout.match(None, BF_NS('birthDate'), '1919-01-01'))) == 1
    # DOC_NS('md') -> I('i5GvPVm7ClA') in the transform
    assert [ l[0] for l in modout.match(None, MB_NS('remark'), 'test')] == [I('i5GvPVm7ClA')]

示例#8

0

显示文件

    def labelize(self):
        '''
        Executes a utility rule to create labels in output model for new resources
        '''
        # XXX Check if there's already a label?
        # Apply a common transform strategy using rules defined above
        def missed_label(origin, type):
            '''
            Callback to handle cases where a transform wasn't found to match a link (by relationship) in the input model
            '''
            warnings.warn(f'No label generated for: {origin}')
        labels = self.labelize_helper(LABELIZE_RULES, handle_misses=missed_label)
        return True


if __name__ == '__main__':
    for rec in INPUT_RECORDS:
        ppl = dc_schema_pipeline()
        input_model = newmodel()
        literate.parse(rec, input_model)
        output_model = ppl.run(input_model=input_model)
        print('Resulting record Fingerprints:', ppl.fingerprints)
        print('Low level JSON dump of output data model: ')
        util.jsondump(output_model, sys.stdout)
        print('Versa literate form of output: ')
        literate.write(output_model, out=sys.stdout)
        # from versa.serial import mermaid
        # print('Mermaid diagram form of output: ')
        # mermaid.write(output_model, out=sys.stdout)