示例#1
0
 def test_dump_dictionary_to_rdf(self):
     graph_dump = utils.rdf_dumper(self._get_materials_provider())
     xml = graph_dump.serialize(format='xml', encoding="UTF-8")
     if isinstance(xml, bytes):
         xml = xml.decode("UTF-8")
     print(xml)
     self.assertEquals("<?xml", xml[:5])
     bont_skos_definition = '<skos:definition xml:lang="nl-BE">Bont is een gelooide dierlijke huid, dicht bezet met haren. Het wordt voornamelijk gebruikt voor het maken van kleding.</skos:definition>'
     dcterms_id_skos_definition = '<dcterms:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">9</dcterms:identifier>'
     self.assertIn(bont_skos_definition, xml)
     self.assertIn(dcterms_id_skos_definition, xml)
 def test_dump_dictionary_to_rdf(self):
     graph_dump = utils.rdf_dumper(self._get_materials_provider())
     xml = graph_dump.serialize(format='xml', encoding="UTF-8")
     if isinstance(xml, bytes):
         xml = xml.decode("UTF-8")
     print (xml)
     self.assertEquals("<?xml", xml[:5])
     bont_skos_definition = '<skos:definition xml:lang="nl-BE">Bont is een gelooide dierlijke huid, dicht bezet met haren. Het wordt voornamelijk gebruikt voor het maken van kleding.</skos:definition>'
     dcterms_id_skos_definition = '<dcterms:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">9</dcterms:identifier>'
     self.assertIn(bont_skos_definition, xml)
     self.assertIn(dcterms_id_skos_definition, xml)
示例#3
0
 def test_dump_one_id_to_rdf_and_reload(self):
     graph_dump1 = utils.rdf_c_dumper(self.tree_provider, 1)
     provider = RDFProvider(
         {
             'id': 'Number1',
             'dataset': {
                 'uri': 'http://id.trees.org/dataset'
             }
         }, graph_dump1)
     graph_dump2 = utils.rdf_c_dumper(provider, 1)
     graph_full_dump2 = utils.rdf_dumper(provider)
     assert len(graph_dump1) == len(graph_dump2)
     assert len(graph_full_dump2) > len(graph_dump2)
 def test_dump_one_id_to_rdf_and_reload(self):
     graph_dump1 = utils.rdf_c_dumper(self.tree_provider, 1)
     provider = RDFProvider(
         {
             'id': 'Number1',
             'dataset': {
                 'uri': 'http://id.trees.org/dataset'
             }
         },
         graph_dump1
     )
     graph_dump2 = utils.rdf_c_dumper(provider, 1)
     graph_full_dump2 = utils.rdf_dumper(provider)
     assert len(graph_dump1) ==  len(graph_dump2)
     assert len(graph_full_dump2) > len(graph_dump2)
 def test_dump_tree_to_rdf_2(self):
     graph_dump = utils.rdf_dumper(self.tree_provider2)
     xml = graph_dump.serialize(format='xml', encoding="UTF-8")
     if isinstance(xml, bytes):
         xml = xml.decode("UTF-8")
     self.assertEquals("<?xml", xml[:5])
 def test_dump_rdf_compare_type(self):
     graph_dump = utils.rdf_dumper(self.rdf_products_provider)
     self.assertEquals(type(graph_dump), Graph)
示例#7
0
def main():
    description = """\
    Dump all conceptschemes to files. Will serialise as Turtle and RDF/XML format.
    """
    usage = "usage: %prog config_uri"
    parser = optparse.OptionParser(
        usage=usage,
        description=textwrap.dedent(description)
    )
    parser.add_option(
        '-l', '--location', dest='dump_location', type='string',
        help='Specify where to dump the conceptschemes. If not specified, this \
        is set to the atramhasis.dump_location from your ini file.'
    )
    parser.add_option(
        '-r', '--rdf2hdt', dest='rdf2hdt', type='string', default=False,
        help='Specify where the rdf2hdt command can be found. If not specified, this \
        is set to atramhasis.rdf2hdt from your ini file.'
    )

    options, args = parser.parse_args(sys.argv[1:])

    if not len(args) >= 1:
        print('You must provide at least one argument.')
        return 2

    config_uri = args[0]

    env = bootstrap(config_uri)
    setup_logging(config_uri)

    dump_location = options.dump_location
    if dump_location is None:
        dump_location = env['registry'].settings.get(
            'atramhasis.dump_location',
            os.path.abspath(os.path.dirname(config_uri))
        )

    rdf2hdt = options.rdf2hdt
    if not rdf2hdt:
        rdf2hdt = env['registry'].settings.get(
            'atramhasis.rdf2hdt',
            False
        )

    request = env['request']

    if hasattr(request, 'skos_registry') and request.skos_registry is not None:
        skos_registry = request.skos_registry
    else:
        raise SkosRegistryNotFoundException()   # pragma: no cover

    counts = []

    files = []

    for p in skos_registry.get_providers():
        if any([not_shown in p.get_metadata()['subject'] for not_shown in ['external']]):
            continue
        start_time = time.time()
        pid = p.get_metadata()['id']
        filename = os.path.join(dump_location, '%s-full' % pid)
        filename_ttl = '%s.ttl' % filename
        filename_rdf = '%s.rdf' % filename
        files.append(filename_ttl)
        print('Generating dump for %s' % pid)
        graph = utils.rdf_dumper(p)
        triples = len(graph)
        print('Number of triples in Graph: %d' % triples)
        csuri = URIRef(p.concept_scheme.uri)
        cs_triples = len(list(graph.predicate_objects(csuri)))
        print('Number of triples in Conceptscheme: %d' % cs_triples)
        count_concepts = len(list(graph.subjects(RDF.type, SKOS.Concept)))
        count_collections = len(list(graph.subjects(RDF.type, SKOS.Collection)))
        avg_concept_triples = (triples - cs_triples) / (count_concepts + count_collections)
        print('Average number of triples per concept: %d' % avg_concept_triples)
        counts.append({
            'conceptscheme_id': pid,
            'triples': triples,
            'conceptscheme_triples': cs_triples,
            'avg_concept_triples': avg_concept_triples
        })
        print('Dumping %s to Turtle: %s' % (pid, filename_ttl))
        graph.serialize(destination=filename_ttl, format='turtle')
        print('Dumping %s to RDFxml: %s' % (pid, filename_rdf))
        graph.serialize(destination=filename_rdf, format='pretty-xml')
        print("--- %s seconds ---" % (time.time() - start_time))

    print('All files dumped to %s' % dump_location)

    if rdf2hdt:
        from subprocess import check_call, CalledProcessError
        for f in files:
            print('Converting %s to hdt' % f)
            hdtf = f.replace('.ttl', '.hdt')
            try:
                check_call([rdf2hdt, '-f', 'turtle', f, hdtf])
            except CalledProcessError:
                # Turtle failed, let's try rdfxml
                rdff = f.replace('.ttl', '.rdf')
                check_call([rdf2hdt, '-f', 'rdfxml', rdff, hdtf])
        print('All hdt files dumped to %s' % dump_location)

    with transaction.manager:
        dbsession = request.registry.dbmaker()
        manager = CountsManager(dbsession)
        for c in counts:
            cs_count = ConceptschemeCounts(
                conceptscheme_id=c['conceptscheme_id'],
                triples=c['triples'],
                conceptscheme_triples=c['conceptscheme_triples'],
                avg_concept_triples=c['avg_concept_triples']
            )
            manager.save(cs_count)

    env['closer']()
示例#8
0
文件: rdf.py 项目: JDeVos/atramhasis
 def rdf_export_turtle(self):
     graph = utils.rdf_dumper(self.provider)
     response = Response(content_type='text/turtle')
     response.body = graph.serialize(format='turtle')
     response.content_disposition = 'attachment; filename="skos.ttl"'
     return response
示例#9
0
文件: rdf.py 项目: JDeVos/atramhasis
 def rdf_export(self):
     graph = utils.rdf_dumper(self.provider)
     response = Response(content_type='application/rdf+xml')
     response.body = graph.serialize(format='xml')
     response.content_disposition = 'attachment; filename="skos.xml"'
     return response
示例#10
0
from skosprovider.skos import ConceptScheme, Label, Note, Source

from skosprovider_rdf.utils import rdf_dumper

ifile = open(os.path.join(os.path.dirname(__file__), 'data', 'menu.csv'), "r")

reader = csv.reader(ifile)

csvprovider = SimpleCsvProvider(
    {'id': 'MENU'},
    reader,
    uri_generator=UriPatternGenerator('http://id.python.org/menu/%s'),
    concept_scheme=ConceptScheme(
        uri='http://id.python.org/menu',
        labels=[
            Label(type='prefLabel', language='en', label='A pythonesque menu.')
        ],
        notes=[
            Note(
                type='changeNote',
                language='en',
                note=
                "<strong>We didn't need no change notes when I was younger.</strong>",
                markup='HTML')
        ],
        sources=[Source("Monthy Python's Flying Circus, 1970. Spam.")]))

graph = rdf_dumper(csvprovider)

print graph.serialize(format='n3')
示例#11
0
)

reader = csv.reader(ifile)

csvprovider = SimpleCsvProvider(
    {'id': 'MENU'},
    reader,
    uri_generator=UriPatternGenerator('http://id.python.org/menu/%s'),
    concept_scheme=ConceptScheme(
        uri='http://id.python.org/menu',
        labels=[
            Label(type='prefLabel', language='en', label='A pythonesque menu.')
        ],
        notes=[
            Note(
                type='changeNote',
                language='en',
                note="<strong>We didn't need no change notes when I was younger.</strong>",
                markup='HTML'
            )
        ],
        sources=[
            Source("Monthy Python's Flying Circus, 1970. Spam.")
        ]
    )
)

graph = rdf_dumper(csvprovider)

print graph.serialize(format='n3')
示例#12
0
 def rdf_full_export_turtle(self):
     graph = utils.rdf_dumper(self.provider)
     response = Response(content_type='text/turtle')
     response.body = graph.serialize(format='turtle')
     response.content_disposition = 'attachment; filename="%s-full.ttl"' % (str(self.scheme_id),)
     return response
示例#13
0
 def rdf_full_export(self):
     graph = utils.rdf_dumper(self.provider)
     response = Response(content_type='application/rdf+xml')
     response.body = graph.serialize(format='xml')
     response.content_disposition = 'attachment; filename="%s-full.rdf"' % (str(self.scheme_id),)
     return response
示例#14
0
def main():
    description = """\
    Dump all conceptschemes to files. Will serialise as Turtle and RDF/XML format.
    """
    usage = "usage: %prog config_uri"
    parser = optparse.OptionParser(usage=usage,
                                   description=textwrap.dedent(description))
    parser.add_option(
        '-l',
        '--location',
        dest='dump_location',
        type='string',
        help='Specify where to dump the conceptschemes. If not specified, this \
        is set to the atramhasis.dump_location from your ini file.')
    parser.add_option(
        '-r',
        '--rdf2hdt',
        dest='rdf2hdt',
        type='string',
        default=False,
        help=
        'Specify where the rdf2hdt command can be found. If not specified, this \
        is set to atramhasis.rdf2hdt from your ini file.')

    options, args = parser.parse_args(sys.argv[1:])

    if not len(args) >= 1:
        print('You must provide at least one argument.')
        return 2

    config_uri = args[0]

    env = bootstrap(config_uri)
    setup_logging(config_uri)

    dump_location = options.dump_location
    if dump_location is None:
        dump_location = env['registry'].settings.get(
            'atramhasis.dump_location',
            os.path.abspath(os.path.dirname(config_uri)))

    rdf2hdt = options.rdf2hdt
    if not rdf2hdt:
        rdf2hdt = env['registry'].settings.get('atramhasis.rdf2hdt', False)

    request = env['request']

    if hasattr(request, 'skos_registry') and request.skos_registry is not None:
        skos_registry = request.skos_registry
    else:
        raise SkosRegistryNotFoundException()  # pragma: no cover

    counts = []

    files = []

    for p in skos_registry.get_providers():
        if any([
                not_shown in p.get_metadata()['subject']
                for not_shown in ['external']
        ]):
            continue
        start_time = time.time()
        pid = p.get_metadata()['id']
        filename = os.path.join(dump_location, '%s-full' % pid)
        filename_ttl = '%s.ttl' % filename
        filename_rdf = '%s.rdf' % filename
        files.append(filename_ttl)
        print('Generating dump for %s' % pid)
        graph = utils.rdf_dumper(p)
        triples = len(graph)
        print('Number of triples in Graph: %d' % triples)
        csuri = URIRef(p.concept_scheme.uri)
        cs_triples = len(list(graph.predicate_objects(csuri)))
        print('Number of triples in Conceptscheme: %d' % cs_triples)
        count_concepts = len(list(graph.subjects(RDF.type, SKOS.Concept)))
        count_collections = len(list(graph.subjects(RDF.type,
                                                    SKOS.Collection)))
        avg_concept_triples = (triples - cs_triples) / (count_concepts +
                                                        count_collections)
        print('Average number of triples per concept: %d' %
              avg_concept_triples)
        counts.append({
            'conceptscheme_id': pid,
            'triples': triples,
            'conceptscheme_triples': cs_triples,
            'avg_concept_triples': avg_concept_triples
        })
        print('Dumping %s to Turtle: %s' % (pid, filename_ttl))
        graph.serialize(destination=filename_ttl, format='turtle')
        print('Dumping %s to RDFxml: %s' % (pid, filename_rdf))
        graph.serialize(destination=filename_rdf, format='pretty-xml')
        print("--- %s seconds ---" % (time.time() - start_time))

    print('All files dumped to %s' % dump_location)

    if rdf2hdt:
        from subprocess import check_call, CalledProcessError
        for f in files:
            print('Converting %s to hdt' % f)
            hdtf = f.replace('.ttl', '.hdt')
            try:
                check_call([rdf2hdt, '-f', 'turtle', f, hdtf])
            except CalledProcessError:
                # Turtle failed, let's try rdfxml
                rdff = f.replace('.ttl', '.rdf')
                check_call([rdf2hdt, '-f', 'rdfxml', rdff, hdtf])
        print('All hdt files dumped to %s' % dump_location)

    with transaction.manager:
        dbsession = request.registry.dbmaker()
        manager = CountsManager(dbsession)
        for c in counts:
            cs_count = ConceptschemeCounts(
                conceptscheme_id=c['conceptscheme_id'],
                triples=c['triples'],
                conceptscheme_triples=c['conceptscheme_triples'],
                avg_concept_triples=c['avg_concept_triples'])
            manager.save(cs_count)

    env['closer']()
示例#15
0
 def test_dump_tree_to_rdf_2(self):
     graph_dump = utils.rdf_dumper(self.tree_provider2)
     xml = graph_dump.serialize(format='xml', encoding="UTF-8")
     if isinstance(xml, bytes):
         xml = xml.decode("UTF-8")
     self.assertEquals("<?xml", xml[:5])
示例#16
0
 def test_dump_rdf_compare_type(self):
     graph_dump = utils.rdf_dumper(self.rdf_products_provider)
     self.assertEquals(type(graph_dump), Graph)