def test_dump_dictionary_to_rdf(self): graph_dump = utils.rdf_dumper(self._get_materials_provider()) xml = graph_dump.serialize(format='xml', encoding="UTF-8") if isinstance(xml, bytes): xml = xml.decode("UTF-8") print(xml) self.assertEquals("<?xml", xml[:5]) bont_skos_definition = '<skos:definition xml:lang="nl-BE">Bont is een gelooide dierlijke huid, dicht bezet met haren. Het wordt voornamelijk gebruikt voor het maken van kleding.</skos:definition>' dcterms_id_skos_definition = '<dcterms:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">9</dcterms:identifier>' self.assertIn(bont_skos_definition, xml) self.assertIn(dcterms_id_skos_definition, xml)
def test_dump_dictionary_to_rdf(self): graph_dump = utils.rdf_dumper(self._get_materials_provider()) xml = graph_dump.serialize(format='xml', encoding="UTF-8") if isinstance(xml, bytes): xml = xml.decode("UTF-8") print (xml) self.assertEquals("<?xml", xml[:5]) bont_skos_definition = '<skos:definition xml:lang="nl-BE">Bont is een gelooide dierlijke huid, dicht bezet met haren. Het wordt voornamelijk gebruikt voor het maken van kleding.</skos:definition>' dcterms_id_skos_definition = '<dcterms:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">9</dcterms:identifier>' self.assertIn(bont_skos_definition, xml) self.assertIn(dcterms_id_skos_definition, xml)
def test_dump_one_id_to_rdf_and_reload(self): graph_dump1 = utils.rdf_c_dumper(self.tree_provider, 1) provider = RDFProvider( { 'id': 'Number1', 'dataset': { 'uri': 'http://id.trees.org/dataset' } }, graph_dump1) graph_dump2 = utils.rdf_c_dumper(provider, 1) graph_full_dump2 = utils.rdf_dumper(provider) assert len(graph_dump1) == len(graph_dump2) assert len(graph_full_dump2) > len(graph_dump2)
def test_dump_one_id_to_rdf_and_reload(self): graph_dump1 = utils.rdf_c_dumper(self.tree_provider, 1) provider = RDFProvider( { 'id': 'Number1', 'dataset': { 'uri': 'http://id.trees.org/dataset' } }, graph_dump1 ) graph_dump2 = utils.rdf_c_dumper(provider, 1) graph_full_dump2 = utils.rdf_dumper(provider) assert len(graph_dump1) == len(graph_dump2) assert len(graph_full_dump2) > len(graph_dump2)
def test_dump_tree_to_rdf_2(self): graph_dump = utils.rdf_dumper(self.tree_provider2) xml = graph_dump.serialize(format='xml', encoding="UTF-8") if isinstance(xml, bytes): xml = xml.decode("UTF-8") self.assertEquals("<?xml", xml[:5])
def test_dump_rdf_compare_type(self): graph_dump = utils.rdf_dumper(self.rdf_products_provider) self.assertEquals(type(graph_dump), Graph)
def main(): description = """\ Dump all conceptschemes to files. Will serialise as Turtle and RDF/XML format. """ usage = "usage: %prog config_uri" parser = optparse.OptionParser( usage=usage, description=textwrap.dedent(description) ) parser.add_option( '-l', '--location', dest='dump_location', type='string', help='Specify where to dump the conceptschemes. If not specified, this \ is set to the atramhasis.dump_location from your ini file.' ) parser.add_option( '-r', '--rdf2hdt', dest='rdf2hdt', type='string', default=False, help='Specify where the rdf2hdt command can be found. If not specified, this \ is set to atramhasis.rdf2hdt from your ini file.' ) options, args = parser.parse_args(sys.argv[1:]) if not len(args) >= 1: print('You must provide at least one argument.') return 2 config_uri = args[0] env = bootstrap(config_uri) setup_logging(config_uri) dump_location = options.dump_location if dump_location is None: dump_location = env['registry'].settings.get( 'atramhasis.dump_location', os.path.abspath(os.path.dirname(config_uri)) ) rdf2hdt = options.rdf2hdt if not rdf2hdt: rdf2hdt = env['registry'].settings.get( 'atramhasis.rdf2hdt', False ) request = env['request'] if hasattr(request, 'skos_registry') and request.skos_registry is not None: skos_registry = request.skos_registry else: raise SkosRegistryNotFoundException() # pragma: no cover counts = [] files = [] for p in skos_registry.get_providers(): if any([not_shown in p.get_metadata()['subject'] for not_shown in ['external']]): continue start_time = time.time() pid = p.get_metadata()['id'] filename = os.path.join(dump_location, '%s-full' % pid) filename_ttl = '%s.ttl' % filename filename_rdf = '%s.rdf' % filename files.append(filename_ttl) print('Generating dump for %s' % pid) graph = utils.rdf_dumper(p) triples = len(graph) print('Number of triples in Graph: %d' % triples) csuri = URIRef(p.concept_scheme.uri) cs_triples = len(list(graph.predicate_objects(csuri))) print('Number of triples in Conceptscheme: %d' % cs_triples) count_concepts = len(list(graph.subjects(RDF.type, SKOS.Concept))) count_collections = len(list(graph.subjects(RDF.type, SKOS.Collection))) avg_concept_triples = (triples - cs_triples) / (count_concepts + count_collections) print('Average number of triples per concept: %d' % avg_concept_triples) counts.append({ 'conceptscheme_id': pid, 'triples': triples, 'conceptscheme_triples': cs_triples, 'avg_concept_triples': avg_concept_triples }) print('Dumping %s to Turtle: %s' % (pid, filename_ttl)) graph.serialize(destination=filename_ttl, format='turtle') print('Dumping %s to RDFxml: %s' % (pid, filename_rdf)) graph.serialize(destination=filename_rdf, format='pretty-xml') print("--- %s seconds ---" % (time.time() - start_time)) print('All files dumped to %s' % dump_location) if rdf2hdt: from subprocess import check_call, CalledProcessError for f in files: print('Converting %s to hdt' % f) hdtf = f.replace('.ttl', '.hdt') try: check_call([rdf2hdt, '-f', 'turtle', f, hdtf]) except CalledProcessError: # Turtle failed, let's try rdfxml rdff = f.replace('.ttl', '.rdf') check_call([rdf2hdt, '-f', 'rdfxml', rdff, hdtf]) print('All hdt files dumped to %s' % dump_location) with transaction.manager: dbsession = request.registry.dbmaker() manager = CountsManager(dbsession) for c in counts: cs_count = ConceptschemeCounts( conceptscheme_id=c['conceptscheme_id'], triples=c['triples'], conceptscheme_triples=c['conceptscheme_triples'], avg_concept_triples=c['avg_concept_triples'] ) manager.save(cs_count) env['closer']()
def rdf_export_turtle(self): graph = utils.rdf_dumper(self.provider) response = Response(content_type='text/turtle') response.body = graph.serialize(format='turtle') response.content_disposition = 'attachment; filename="skos.ttl"' return response
def rdf_export(self): graph = utils.rdf_dumper(self.provider) response = Response(content_type='application/rdf+xml') response.body = graph.serialize(format='xml') response.content_disposition = 'attachment; filename="skos.xml"' return response
from skosprovider.skos import ConceptScheme, Label, Note, Source from skosprovider_rdf.utils import rdf_dumper ifile = open(os.path.join(os.path.dirname(__file__), 'data', 'menu.csv'), "r") reader = csv.reader(ifile) csvprovider = SimpleCsvProvider( {'id': 'MENU'}, reader, uri_generator=UriPatternGenerator('http://id.python.org/menu/%s'), concept_scheme=ConceptScheme( uri='http://id.python.org/menu', labels=[ Label(type='prefLabel', language='en', label='A pythonesque menu.') ], notes=[ Note( type='changeNote', language='en', note= "<strong>We didn't need no change notes when I was younger.</strong>", markup='HTML') ], sources=[Source("Monthy Python's Flying Circus, 1970. Spam.")])) graph = rdf_dumper(csvprovider) print graph.serialize(format='n3')
) reader = csv.reader(ifile) csvprovider = SimpleCsvProvider( {'id': 'MENU'}, reader, uri_generator=UriPatternGenerator('http://id.python.org/menu/%s'), concept_scheme=ConceptScheme( uri='http://id.python.org/menu', labels=[ Label(type='prefLabel', language='en', label='A pythonesque menu.') ], notes=[ Note( type='changeNote', language='en', note="<strong>We didn't need no change notes when I was younger.</strong>", markup='HTML' ) ], sources=[ Source("Monthy Python's Flying Circus, 1970. Spam.") ] ) ) graph = rdf_dumper(csvprovider) print graph.serialize(format='n3')
def rdf_full_export_turtle(self): graph = utils.rdf_dumper(self.provider) response = Response(content_type='text/turtle') response.body = graph.serialize(format='turtle') response.content_disposition = 'attachment; filename="%s-full.ttl"' % (str(self.scheme_id),) return response
def rdf_full_export(self): graph = utils.rdf_dumper(self.provider) response = Response(content_type='application/rdf+xml') response.body = graph.serialize(format='xml') response.content_disposition = 'attachment; filename="%s-full.rdf"' % (str(self.scheme_id),) return response
def main(): description = """\ Dump all conceptschemes to files. Will serialise as Turtle and RDF/XML format. """ usage = "usage: %prog config_uri" parser = optparse.OptionParser(usage=usage, description=textwrap.dedent(description)) parser.add_option( '-l', '--location', dest='dump_location', type='string', help='Specify where to dump the conceptschemes. If not specified, this \ is set to the atramhasis.dump_location from your ini file.') parser.add_option( '-r', '--rdf2hdt', dest='rdf2hdt', type='string', default=False, help= 'Specify where the rdf2hdt command can be found. If not specified, this \ is set to atramhasis.rdf2hdt from your ini file.') options, args = parser.parse_args(sys.argv[1:]) if not len(args) >= 1: print('You must provide at least one argument.') return 2 config_uri = args[0] env = bootstrap(config_uri) setup_logging(config_uri) dump_location = options.dump_location if dump_location is None: dump_location = env['registry'].settings.get( 'atramhasis.dump_location', os.path.abspath(os.path.dirname(config_uri))) rdf2hdt = options.rdf2hdt if not rdf2hdt: rdf2hdt = env['registry'].settings.get('atramhasis.rdf2hdt', False) request = env['request'] if hasattr(request, 'skos_registry') and request.skos_registry is not None: skos_registry = request.skos_registry else: raise SkosRegistryNotFoundException() # pragma: no cover counts = [] files = [] for p in skos_registry.get_providers(): if any([ not_shown in p.get_metadata()['subject'] for not_shown in ['external'] ]): continue start_time = time.time() pid = p.get_metadata()['id'] filename = os.path.join(dump_location, '%s-full' % pid) filename_ttl = '%s.ttl' % filename filename_rdf = '%s.rdf' % filename files.append(filename_ttl) print('Generating dump for %s' % pid) graph = utils.rdf_dumper(p) triples = len(graph) print('Number of triples in Graph: %d' % triples) csuri = URIRef(p.concept_scheme.uri) cs_triples = len(list(graph.predicate_objects(csuri))) print('Number of triples in Conceptscheme: %d' % cs_triples) count_concepts = len(list(graph.subjects(RDF.type, SKOS.Concept))) count_collections = len(list(graph.subjects(RDF.type, SKOS.Collection))) avg_concept_triples = (triples - cs_triples) / (count_concepts + count_collections) print('Average number of triples per concept: %d' % avg_concept_triples) counts.append({ 'conceptscheme_id': pid, 'triples': triples, 'conceptscheme_triples': cs_triples, 'avg_concept_triples': avg_concept_triples }) print('Dumping %s to Turtle: %s' % (pid, filename_ttl)) graph.serialize(destination=filename_ttl, format='turtle') print('Dumping %s to RDFxml: %s' % (pid, filename_rdf)) graph.serialize(destination=filename_rdf, format='pretty-xml') print("--- %s seconds ---" % (time.time() - start_time)) print('All files dumped to %s' % dump_location) if rdf2hdt: from subprocess import check_call, CalledProcessError for f in files: print('Converting %s to hdt' % f) hdtf = f.replace('.ttl', '.hdt') try: check_call([rdf2hdt, '-f', 'turtle', f, hdtf]) except CalledProcessError: # Turtle failed, let's try rdfxml rdff = f.replace('.ttl', '.rdf') check_call([rdf2hdt, '-f', 'rdfxml', rdff, hdtf]) print('All hdt files dumped to %s' % dump_location) with transaction.manager: dbsession = request.registry.dbmaker() manager = CountsManager(dbsession) for c in counts: cs_count = ConceptschemeCounts( conceptscheme_id=c['conceptscheme_id'], triples=c['triples'], conceptscheme_triples=c['conceptscheme_triples'], avg_concept_triples=c['avg_concept_triples']) manager.save(cs_count) env['closer']()