def align_graphs_from_files(ontology_path_1, ontology_path_2, param_path, format=None): """align ontology graphs from RDF files in 'ontology_path_1' and 'ontology_path_2' with the help of parameters from the file in 'param_path' """ # get graphs from files if not format: ontology_1 = Graph() ontology_1.parse(ontology_path_1, format=guess_format(ontology_path_1)) ontology_2 = Graph() ontology_2.parse(ontology_path_2, format=guess_format(ontology_path_2)) else: ontology_1 = Graph(ontology_path_1, format=format) ontology_2 = Graph(ontology_path_2, format=format) # get parameters for alignment from file parameters = dict() exec(compile(open(param_path, "rb").read(), param_path, 'exec'), parameters) # align graphs with parameters yield from align_graphs(ontology_1, ontology_2, parameters)
def main(argv): parser = ArgumentParser( description='This program contains various NIDM-Experiment utilities') sub = parser.add_subparsers(dest='command') concat = sub.add_parser( 'concat', description= "This command will simply concatenate the supplied NIDM files into a single output" ) visualize = sub.add_parser( 'visualize', description= "This command will produce a visualization(png) of the supplied NIDM files" ) for arg in [concat, visualize]: arg.add_argument( '-nl', '--nl', dest="nidm_files", nargs="+", required=True, help="A comma separated list of NIDM files with full path") concat.add_argument('-o', '--o', dest='output_file', required=True, help="Merged NIDM output file name + path") visualize.add_argument('-o', '--o', dest='output_file', required=True, help="Output file name+path of dot graph") args = parser.parse_args() #concatenate nidm files if args.command == 'concat': #create empty graph graph = Graph() for nidm_file in args.nidm_files: tmp = Graph() graph = graph + tmp.parse(nidm_file, format=util.guess_format(nidm_file)) graph.serialize(args.output_file, format='turtle') elif args.command == 'visualize': #create empty graph graph = Graph() for nidm_file in args.nidm_files: tmp = Graph() graph = graph + tmp.parse(nidm_file, format=util.guess_format(nidm_file)) project = read_nidm(StringIO.write(graph.serialize(format='turtle'))) project.save_DotGraph(filename=args.output_file + '.png', format='png')
def run_task_oa2_1(filename_a: str, filename_b: str, filename_c: str, task: str) -> None: TARGET_NAMESPACE_STR: str = "http://www.city.ac.uk/ds/inm713/feiphoon#" TARGET_NAMESPACE: rdflib.Namespace = Namespace(TARGET_NAMESPACE_STR) TARGET_PREFIX: str = "fp" CANDIDATE_NAMESPACE_STR: str = "http://www.co-ode.org/ontologies/pizza/pizza.owl#" CANDIDATE_NAMESPACE: rdflib.Namespace = Namespace(CANDIDATE_NAMESPACE_STR) CANDIDATE_PREFIX: str = "pizza" OWL_PREFIX: str = "owl" graph: rdflib.Graph = Graph() graph.bind(prefix=TARGET_PREFIX, namespace=TARGET_NAMESPACE) graph.bind(prefix=CANDIDATE_PREFIX, namespace=CANDIDATE_NAMESPACE) graph.bind(prefix=OWL_PREFIX, namespace=OWL) graph.load(source=filename_a, format=guess_format(filename_a)) graph.load(source=filename_b, format=guess_format(filename_b)) graph.load(source=filename_c, format=guess_format(filename_c)) _perform_reasoning(graph) _save_graph(graph=graph, output_file=f"all_files_with_reasoning_{task}.ttl")
def convert(nidm_file_list, type): """ This function will convert NIDM files to various RDF-supported formats and name then / put them in the same place as the input file. """ for nidm_file in nidm_file_list.split(','): # WIP: for now we use pynidm for jsonld exports to make more human readable and rdflib for everything # else. if type == 'jsonld': # read in nidm file project = read_nidm(nidm_file) #write jsonld file with same name with open(splitext(nidm_file)[0] + ".json", 'w') as f: f.write(project.serializeJSONLD()) elif type == 'turtle': graph = Graph() graph.parse(nidm_file, format=util.guess_format(nidm_file)) graph.serialize(splitext(nidm_file)[0] + ".ttl", format='turtle') elif type == 'xml-rdf': graph = Graph() graph.parse(nidm_file, format=util.guess_format(nidm_file)) graph.serialize(splitext(nidm_file)[0] + ".xml", format='pretty-xml') elif type == 'n3': graph = Graph() graph.parse(nidm_file, format=util.guess_format(nidm_file)) graph.serialize(splitext(nidm_file)[0] + ".n3", format='n3') elif type == 'trig': # read in nidm file project = read_nidm(nidm_file) with open(splitext(nidm_file)[0] + ".trig", 'w') as f: f.write(project.serializeTrig()) else: print("Error, type is not supported at this time")
def main(): argparser = argparse.ArgumentParser(description=__doc__, fromfile_prefix_chars='@') # argparser.add_argument("task", help="Task to perform", choices=['link_people', 'all'], default='link_people') argparser.add_argument("input_bibale", help="Input Bibale RDF file") argparser.add_argument("input_bodley", help="Input Bodley RDF file") argparser.add_argument("input_sdbm", help="Input SDBM RDF file") argparser.add_argument( "--loglevel", default='DEBUG', help="Logging level", choices=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]) argparser.add_argument("--logfile", default='tasks.log', help="Logfile") args = argparser.parse_args() log = logging.getLogger() # Get root logger log_handler = logging.FileHandler(args.logfile) log_handler.setFormatter( logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s')) log.addHandler(log_handler) log.setLevel(args.loglevel) log.info('Reading input graphs.') bibale = Graph() bibale.parse(args.input_bibale, format=guess_format(args.input_bibale)) bodley = Graph() bodley.parse(args.input_bodley, format=guess_format(args.input_bodley)) sdbm = Graph() sdbm.parse(args.input_sdbm, format=guess_format(args.input_sdbm)) # if args.task in ['link_people', 'all']: log.info('Linking people of three graphs') p = PersonLinker(sdbm, bodley, bibale) p.link() if p.links: bibale, bodley, sdbm = p.datasets() log.info('Serializing output files...') filename_suffix = '_people.ttl' # '_' + args.task + '.ttl' bind_namespaces(bibale).serialize(args.input_bibale.split('.')[0] + filename_suffix, format='turtle') bind_namespaces(bodley).serialize(args.input_bodley.split('.')[0] + filename_suffix, format='turtle') bind_namespaces(sdbm).serialize(args.input_sdbm.split('.')[0] + filename_suffix, format='turtle') else: log.warning('No links found') log.info('Task finished.')
def test_guess_format(self) -> None: self.assertEqual(guess_format("example.trix"), "trix") self.assertEqual(guess_format("local-file.jsonld"), "json-ld") self.assertEqual(guess_format("local-file.json-ld"), "json-ld") self.assertEqual(guess_format("/some/place/on/disk/example.json"), "json-ld") self.assertEqual( guess_format("../../relative/place/on/disk/example.json"), "json-ld")
def _read(self, paths=None): graph = Graph() for path in paths: assert is_readable(path) if not is_gzip(path): graph.parse(path, format=guess_format(path)) else: self.logger.debug("Input recognized as gzip file") with gzip.open(path, 'rb') as f: graph.parse(f, format=guess_format(path[:-3])) return graph
def get_example(): ontology_1 = Graph() ontology_1.parse(ontology_path_1, format=guess_format(ontology_path_1)) ontology_2 = Graph() ontology_2.parse(ontology_path_2, format=guess_format(ontology_path_2)) parameters = dict() exec(compile(open(param_path, "rb").read(), param_path, 'exec'), parameters) return ontology_1, ontology_2, parameters
def load_ontologies(): """Add ontologies into twks-server""" files = Path(current_app.config['ONTOLOGY_PATH']).glob('*') for f in files: path = f.as_posix() pub = Nanopublication.parse_assertions(source=path, format=guess_format(path)) current_app.store.put_nanopublication(pub) for ontology in remote_ontologies: pub = Nanopublication.parse_assertions(source=ontology, format=guess_format(ontology)) current_app.store.put_nanopublication(pub)
def main(): values = ap.parse_args() format1 = guess_format(values.file1) format2 = guess_format(values.file2) g1: Graph = Graph().parse(values.file1, format=format1) g2: Graph = Graph().parse(values.file2, format=format2) iso1: IsomorphicGraph = to_isomorphic(g1) iso2: IsomorphicGraph = to_isomorphic(g2) _in_both, in_first, in_second = graph_diff(iso1, iso2) print(f"Only in {values.file1}") dump_nt_sorted(in_first) print(f"Only in {values.file2}") dump_nt_sorted(in_second)
def main(): argparser = argparse.ArgumentParser(description="Casualty linking tasks", fromfile_prefix_chars='@') argparser.add_argument("task", help="Linking task to perform", choices=["ranks", "persons", "municipalities", "units", "occupations"]) argparser.add_argument("input", help="Input RDF file") argparser.add_argument("output", help="Output file location") argparser.add_argument("--loglevel", default='INFO', help="Logging level, default is INFO.", choices=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]) argparser.add_argument("--logfile", default='tasks.log', help="Logfile") argparser.add_argument("--endpoint", default='http://ldf.fi/warsa/sparql', help="SPARQL Endpoint") argparser.add_argument("--munics", default='output/municipalities.ttl', help="Municipalities RDF file") argparser.add_argument("--arpa", type=str, help="ARPA instance URL for linking") args = argparser.parse_args() log = logging.getLogger() # Get root logger log_handler = logging.FileHandler(args.logfile) log_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) log.addHandler(log_handler) log.setLevel(args.loglevel) input_graph = Graph() input_graph.parse(args.input, format=guess_format(args.input)) if args.task == 'ranks': log.info('Linking ranks') bind_namespaces(link_ranks(input_graph, args.endpoint, CASUALTY_MAPPING['SOTARVO']['uri'], SCHEMA_CAS.rank, SCHEMA_WARSA.DeathRecord)).serialize(args.output, format=guess_format(args.output)) elif args.task == 'persons': log.info('Linking persons') bind_namespaces(link_casualties(input_graph, args.endpoint, args.munics)) \ .serialize(args.output, format=guess_format(args.output)) elif args.task == 'municipalities': log.info('Linking municipalities') bind_namespaces(link_municipalities(input_graph, args.endpoint, args.arpa)) \ .serialize(args.output, format=guess_format(args.output)) elif args.task == 'units': log.info('Linking units') bind_namespaces(link_units(input_graph, args.endpoint, args.arpa)) \ .serialize(args.output, format=guess_format(args.output)) elif args.task == 'occupations': log.info('Linking occupations') bind_namespaces(link_occupations(input_graph, args.endpoint, CASUALTY_MAPPING['AMMATTI']['uri'], BIOC.has_occupation, SCHEMA_WARSA.DeathRecord)) \ .serialize(args.output, format=guess_format(args.output))
def load_data(data_url: str, old_graph: Optional[PPGraph] = None) -> PPGraph: """Create new PPGraph or add triples to the provided one. Args: data_url: path to RDF file or url address of SPARQL endpoint, passing an url will invalidate old_graph old_graph: existing graph, will add triples to it Returns: Graph with triples loaded from data_url (lazy loaded in case of SPARQL endpoint) """ if old_graph: graph = old_graph else: graph = PPGraph(ConjunctiveGraph()) if isfile(data_url): L.info('Loading triples from file `%s`', data_url) data_format = guess_format(data_url) graph.parse(data_url, format=data_format) elif isdir(data_url): L.info('Loading triples from files in directory `%s`', data_url) for extension in TRIPLE_FILE_EXTENSIONS: triples_files = glob(f'{data_url}/*.{extension}') if len(triples_files) > 0: L.info('Found %d `.%s` files', len(triples_files), extension) for i, triples_file in enumerate(triples_files): data_format = guess_format(triples_file) L.debug('%d / %d (`%s`), data format: %s', i, len(triples_files), triples_file, data_format) graph.parse(triples_file, format=data_format) else: L.info('Using remote graph from SPARQL endpoint `%s`', data_url) graph = PPGraph(SPARQLStore(data_url)) # early fail try: graph.query('''SELECT DISTINCT ?s WHERE { ?s rdf:type foaf:Person } LIMIT 1''') except Exception as e: L.error("Can't load data from remote endpoint") raise e return graph
def query(context, data_dict): # Get the resource and query from the form TTL_Resource = data_dict["TTL_Resource"] query = data_dict["query"] try: # Try to create the graph to analyze the vocabulary g = Graph() result = g.parse(TTL_Resource["url"], format=guess_format("ttl"), publicID=TTL_Resource["name"]) # Query the dataset qres = g.query(query) # Save the result of the query result = list() for row in qres: rowRes = list() for res in row: if (res): rowRes.append(res.toPython()) result.append(rowRes) # Return the result of the query return result except Exception as e: # Return the exception return [["Exception: " + str(e)]]
def parse_args(args): """ Parse command line arguments. See [Usage](#usage) (or the source code) for details. `args` is the list of command line arguments. """ argparser = argparse.ArgumentParser(description="Link resources to an RDF graph with ARPA.", fromfile_prefix_chars="@") argparser.add_argument("input", help="Input rdf file") argparser.add_argument("output", help="Output file") argparser.add_argument("tprop", metavar="target_property", help="Target property for the matches") argparser.add_argument("arpa", help="ARPA service URL") argparser.add_argument("--fi", metavar="INPUT_FORMAT", help="Input file format (rdflib parser). Will be guessed if omitted.") argparser.add_argument("--fo", metavar="OUTPUT_FORMAT", help="Output file format (rdflib serializer). Default is turtle.", default="turtle") argparser.add_argument("-n", "--new_graph", action="store_true", help="""Add the ARPA results to a new graph instead of the original. The output file contains all the triples of the original graph by default. With this argument set the output file will contain only the results.""") argparser.add_argument("--rdf_class", metavar="CLASS", help="Process only subjects of the given type (goes through all subjects by default).") argparser.add_argument("--prop", metavar="PROPERTY", help="Property that's value is to be used in matching. Default is skos:prefLabel.") argparser.add_argument("--ignore", nargs="*", metavar="TERM", help="Terms that should be ignored even if matched") argparser.add_argument("--min_ngram", default=1, metavar="N", type=int, help="The minimum ngram length that is considered a match. Default is 1.") argparser.add_argument("--no_duplicates", nargs="*", default=False, metavar="TYPE", help="""Remove duplicate matches based on the 'label' returned by the ARPA service. Here 'duplicate' means a subject with the same label as another subject in the same result set. A list of types can be given with this argument. If given, prioritize matches based on it - the first given type will get the highest priority and so on. Note that the response from the service has to include a 'type' variable for this to work.""") argparser.add_argument("-r", "--retries", default=0, metavar="N", type=int, help="The amount of retries per query if a HTTP error is received. Default is 0.") argparser.add_argument("--log_level", default="INFO", choices=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Logging level, default is INFO. The log file is arpa_linker.log.") args = argparser.parse_args(args) if not args.fi: args.fi = guess_format(args.input) if args.prop: args.prop = URIRef(args.prop) if args.rdf_class: args.rdf_class = URIRef(args.rdf_class) args.tprop = URIRef(args.tprop) if args.no_duplicates == []: args.no_duplicates = True return args
def getGraph(file_path): g = Graph() # print(guess_format(file_path)) g.parse(file_path, format=guess_format(file_path)) # g.parse(file_path,format="turtle") return g
def parse_and_serialize(input_files, input_format, guess, outfile, output_format, ns_bindings, store_conn="", store_type=None): if store_type: store = plugin.get(store_type, Store)() store.open(store_conn) graph = ConjunctiveGraph(store) else: store = None graph = ConjunctiveGraph() for prefix, uri in list(ns_bindings.items()): graph.namespace_manager.bind(prefix, uri, override=False) for fpath in input_files: use_format, kws = _format_and_kws(input_format) if fpath == '-': fpath = sys.stdin elif not input_format and guess: use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT graph.parse(fpath, format=use_format, **kws) if outfile: output_format, kws = _format_and_kws(output_format) kws.setdefault('base', None) graph.serialize(destination=outfile, format=output_format, **kws) if store: store.rollback()
def __load_from_file(self, file, format=None): """ Load the datastructure from a RDF file. If not format is provided, then rdflib is used to guess the format. """ if not os.path.isfile(file): raise Exception("Cannot find RDF file to load: {}".format(file)) if format is None: format = guess_format(file) # use a temporary graph to load from a RDF file g = Graph() g.parse(file, format=format) for s, p, o in g.triples((None, None, None)): # load RDF triples in the dictionary, then index it triple = self._dictionary.insert_triple(strip_uri(s.n3()), strip_uri(p.n3()), strip_uri(o.n3())) self._indexes["spo"].insert(triple, len(self._triples)) self._indexes["sop"].insert((triple[0], triple[2], triple[1]), len(self._triples)) self._indexes["osp"].insert((triple[2], triple[0], triple[1]), len(self._triples)) self._indexes["ops"].insert((triple[2], triple[1], triple[0]), len(self._triples)) self._indexes["pso"].insert((triple[1], triple[0], triple[2]), len(self._triples)) self._indexes["pos"].insert((triple[1], triple[2], triple[0]), len(self._triples)) self._triples.append(triple)
def parse(dir, dirName, file, df, originDir): root = os.path.join(dir, dirName) # Create a graph to analyze the n3 file g = Graph() try: fileObj = open(os.path.join(root, file), "r", encoding="utf8") result = g.parse(file=fileObj, format=guess_format(file)) fileObj.close() log("Parsed " + file + "\n") except Exception as e: log("Error trying to parse " + file + "\n") log(str(e) + "\n") # For each statement present in the graph obtained for subject, predicate, object_ in g: # Save the statement to the ExcelFile domain = file.replace("_", ".").split(".")[0] df = df.append( { 'Subject': subject, 'Predicate': predicate, 'Object': object_, 'Domain': domain }, ignore_index=True) return df
def __init__(self, shape): self.g = Graph() if type(shape) is Graph: self.g = shape else: self.g.parse(shape, format=guess_format(shape.name)) shape.close()
def parse_and_serialize(input_files, input_format, guess, outfile, output_format, ns_bindings, store_conn=STORE_CONNECTION, store_type=STORE_TYPE): store = plugin.get(store_type, Store)() store.open(store_conn) graph = Graph(store) for prefix, uri in ns_bindings.items(): graph.namespace_manager.bind(prefix, uri, override=False) for fpath in input_files: use_format, kws = _format_and_kws(input_format) if fpath == '-': fpath = sys.stdin elif not input_format and guess: use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT graph.parse(fpath, format=use_format, **kws) if outfile: output_format, kws = _format_and_kws(output_format) graph.serialize(destination=outfile, format=output_format, base=None, **kws) store.rollback()
def import_old_data(request): everything_graph = Graph() bind_namespaces(everything_graph) # Either gather post data (must be one project/user graph at a time) if request.method == 'POST': logger.debug('!!!!!!!!!!!!!!! views.py - import_old_data') parse_request_into_graph(request, everything_graph) add_all_users(everything_graph) # Create each user's default project # Due to the structure of the data when exported from the old system, this also # add each annotation to the project as an aggregated resource create_project(everything_graph) # or serialize from a folder, where each file is one project/user graph else: i = 0 for file_name in listdir("output/"): if file_name.startswith('.'): continue try: everything_graph.parse("output/" + file_name, format=guess_format(file_name) or 'turtle') except Exception as e: print "Failed to decode file '%s' with error message '%s'"%(file_name, e.args[-1]) else: add_all_users(everything_graph) create_project(everything_graph) return HttpResponse("I finished migrating data without errors.")
def main(): parser = argparse.ArgumentParser( description='OMIA integration test', formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( '--input', '-i', type=str, required=True, help='Location of input ttl file') args = parser.parse_args() graph = ConjunctiveGraph() graph.parse(args.input, format=rdflib_util.guess_format(args.input)) model_of = URIRef('http://purl.obolibrary.org/obo/RO_0003301') models = graph.subject_objects(model_of) model_len = len(list(models)) if model_len < EXPECTED_PAIRS: logger.error("Not enough model_of predicates in graph:" " {} expected {} check omia log for" " warnings".format(model_len, EXPECTED_PAIRS)) exit(1) else: logger.info("PASSED")
def load(self, url): src = VOCAB_SOURCE_MAP.get(str(url), url) if os.path.isfile(url): context_id = create_input_source(url).getPublicId() last_vocab_mtime = self.mtime_map.get(url) vocab_mtime = os.stat(url).st_mtime if not last_vocab_mtime or last_vocab_mtime < vocab_mtime: logger.debug("Parse file: '%s'", url) self.mtime_map[url] = vocab_mtime # use CG as workaround for json-ld always loading as dataset graph = ConjunctiveGraph() graph.parse(src, format=guess_format(src)) self.graph.remove_context(context_id) for s, p, o in graph: self.graph.add((s, p, o, context_id)) return graph else: context_id = url if any(self.graph.triples((None, None, None), context=context_id)): logger.debug("Using context <%s>" % context_id) return self.graph.get_context(context_id) cache_path = self.get_fs_path(url) if os.path.exists(cache_path): logger.debug("Load local copy of <%s> from '%s'", context_id, cache_path) return self.graph.parse(cache_path, format='turtle', publicID=context_id) else: logger.debug("Fetching <%s> to '%s'", context_id, cache_path) graph = self.graph.parse(src, format='rdfa' if url.endswith('html') else None) with open(cache_path, 'w') as f: graph.serialize(f, format='turtle') return graph
def load(self): """ Indexes the AppEnsemble-Directory for files with the AppEnsemble-Extension and ADDs them to the AppEnsemblePool. :return:None """ try: files = os.listdir(self.get_ae_folder_path()) for file in files: if file.endswith(AppEnsemble.ae_extension): identifier=file.replace(AppEnsemble.ae_extension,'') ae_tmp=AppEnsemble(identifier) self.pool[identifier]=ae_tmp filepath=os.path.join(self.get_ae_folder_path(),file) with ZipFile(filepath, "r") as ae_pkg: for name in ae_pkg.namelist(): if fnmatch.fnmatch(name, AppEnsemble.ae_filename): ae_model = ae_pkg.read(AppEnsemble.ae_filename).decode() self.parse(data=ae_model, format=util.guess_format(name)) ae_pkg.close() return None except FileNotFoundError as detail: if self._ae_folder_path != self._ae_folder_path_backup: self.log.error('AppEnsemble-Path "{}" was not found in the system! Try to use the standard path!'.format(self.get_ae_folder_path())) self.set_ae_folder_path(self._ae_folder_path_backup) else: self.log.error('AppEnsemble-Path "{}" was not found in the system!'.format(self.get_ae_folder_path())) return None
def link_casualties(input_graph, endpoint, munics): data_fields = [ {'field': 'given', 'type': 'String'}, {'field': 'family', 'type': 'String'}, # Birth place is linked, can have multiple values {'field': 'birth_place', 'type': 'Custom', 'comparator': intersection_comparator, 'has missing': True}, {'field': 'birth_begin', 'type': 'DateTime', 'has missing': True, 'fuzzy': False}, {'field': 'birth_end', 'type': 'DateTime', 'has missing': True, 'fuzzy': False}, {'field': 'death_begin', 'type': 'DateTime', 'has missing': True, 'fuzzy': False}, {'field': 'death_end', 'type': 'DateTime', 'has missing': True, 'fuzzy': False}, {'field': 'activity_end', 'type': 'Custom', 'comparator': activity_comparator, 'has missing': True}, {'field': 'rank', 'type': 'Exact', 'has missing': True}, {'field': 'rank_level', 'type': 'Price', 'has missing': True}, {'field': 'unit', 'type': 'Custom', 'comparator': intersection_comparator, 'has missing': True}, ] ranks = r.read_graph_from_sparql(endpoint, "http://ldf.fi/warsa/ranks") munics = Graph().parse(munics, format=guess_format(munics)) random.seed(42) # Initialize randomization to create deterministic results np.random.seed(42) training_links = read_person_links('input/person_links.json') person_links = link_persons(endpoint, _generate_casualties_dict(input_graph, ranks, munics), data_fields, training_links, sample_size=500000, threshold_ratio=0.5) return person_links
def main(): parser = argparse.ArgumentParser( description='OMIA integration test', formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( '--input', '-i', type=str, required=True, help='Location of input ttl file') args = parser.parse_args() graph = ConjunctiveGraph() graph.parse(args.input, format=rdflib_util.guess_format(args.input)) model_of = URIRef('http://purl.obolibrary.org/obo/RO_0003301') models = graph.subject_objects(model_of) model_len = len(list(models)) if model_len < EXPECTED_PAIRS: logger.error("Not enough model_of predicates in graph:" " {} expected {} check omia log for" " warnings".format(model_len, EXPECTED_PAIRS)) exit(1) omim_diseases = graph.objects( subject=URIRef('https://monarchinitiative.org/model/OMIA-breed:18'), predicate=model_of ) if list(omim_diseases) != [URIRef('http://purl.obolibrary.org/obo/OMIM_275220')]: logger.error("Missing breed to omim triple for {}".format('OMIA-breed:18')) exit(1) logger.info("PASSED")
def configure_database(self): """ Database configuration should be set here """ self.NS = NamespaceContainer() self.NS.RDFS = rdflib.RDFS self.NS.RDF = rdflib.RDF self.NS.OWL = rdflib.OWL self.NS.xsd = rdflib.Namespace("http://www.w3.org/2001/XMLSchema#") self.NS.dcterms = rdflib.Namespace("http://purl.org/dc/terms/") self.NS.prov = rdflib.Namespace("http://www.w3.org/ns/prov#") self.NS.skos = rdflib.Namespace("http://www.w3.org/2004/02/skos/core#") self.NS.dcat = rdflib.Namespace("http://www.w3.org/ns/dcat#") self.NS.oa = rdflib.Namespace("http://www.w3.org/ns/oa#") self.NS.dataset = rdflib.Namespace("https://cn.dataone.org/cn/v2/object/") self.NS.local = rdflib.Namespace(self.config['lod_prefix']+'/') self.NS.oboe = Namespace('http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#') self.NS.csvw = Namespace('http://www.w3.org/ns/csvw#') self.urn = rdflib.Namespace("urn:") self.nt_file = self.config['target_ontology'] self.target_graph = ConjunctiveGraph() self.target_graph.load(self.nt_file, format=guess_format(self.nt_file)) target_classes, idf = vectorize_ontology(self.target_graph) self.target_classes = target_classes self.idf = idf self.target_subtree = set(self.target_graph.transitive_subjects(self.NS.RDFS.subClassOf, self.NS.oboe.MeasurementType)) self.target_class_subtree = [x for x in self.target_classes if x.identifier in self.target_subtree and x.identifier != self.NS.oboe.MeasurementType] self.targets = dict([(x.identifier, x) for x in self.target_class_subtree])
def _loadgraph(filename): g = rdflib.Graph() # we must read the data ourself, providing a non-ascii # filename to Graph.parse fails deep in rdflib internals g.parse(data=util.readfile(filename, "rb"), format=guess_format(filename)) return g
def add_file_to_graph(graph: Graph, file: str, imports_map=None, imported: List[str] = None) -> Graph: """Returns a graph with the loaded rdf file Args: :param graph: graph to be used to parse the file :param file: Rdf file to load :param imports_map: a uri to file map :param imported: collected imports from previous parsed files Return: :return: Graph """ if imports_map is None: imports_map = {} if imported is None: imported = [] file_type = guess_format(file) if file_type is None: file_type = "json-ld" graph.parse(file, format=file_type) for obj in graph.objects(None, OWL.imports): try: local = imports_map[str(obj)] if local not in imported: imported.append(local) add_file_to_graph(graph, local, imports_map, imported) except KeyError: LOGGER.error("%s not in map", str(obj)) return graph
def load(cls, file_or_filename, format=None): """ Materialize ontology into Python class hierarchy from a given file-like object or a filename. :param file_or_filename - file-like object or local filesystem path to file containing ontology definition in one of the supported formats. :param format - the format ontology is serialized in. For list of currently supported formats (based on RDFlib which is used under the hood) see: http://rdflib.readthedocs.io/en/565/plugin_parsers.html :returns instance of the `Ontology` object which encompasses the ontology namespace for all created objects and types. """ graph = Graph() if isinstance(file_or_filename, string_types): # Load from given filename if not format: format = guess_format(file_or_filename) graph.parse(file_or_filename, format=format) else: # Load from file-like buffer if not format: raise RuntimeError( "Must supply format argument when not loading from a filename" ) graph.parse(file_or_filename, format=format) builder = OntologyBuilder(graph) namespace = builder.build_namespace() return cls(namespace, graph=graph, base_uri=builder.base_uri)
def post(self, proms_report_lodging_uri, report): """ POSTS an RDF-serialised Report object to a PROMS server instance :param proms_report_lodging_uri: the URI of the PROMS server instance's Report lodgement endpoint. Typically something like {PROMS_URI}/function/lodge-report. :param report: a pyproms Report class object, an rdflib Graph of a Report, a Report file path or a string containing RDF of a Report in turtle :return: a requests module Response class """ if isinstance(report, PromsReport): report_str = report.serialize_graph().decode('utf-8') elif isinstance(report, Graph): report_str = report.serialize(format='turtle') elif isinstance(report, str): if os.path.exists(report): g = Graph() g.parse(report, format=util.guess_format(report)) report_str = g.serialize(format='turtle') else: # assume it's an RDF string in turtle report_str = report else: # don't allow anything else raise ValueError( 'Only PromsReport objects, rdflib Graph objects, path strings to RDF files or a string of ' 'RDF in turtle format are allowd for \'report\'') # POST the Report to PROMS headers = {'Content-type': 'text/turtle'} r = requests.post(proms_report_lodging_uri, data=report_str, headers=headers) return r
def OpenGraph(file): ''' Returns a parsed RDFLib Graph object for the given file The file will be hashed and if a pickled copy is found in the TMP dir, that will be used Otherwise the graph will be computed and then saved in the TMP dir as a pickle file We also use functools.lru_cache to cache results in memory during a run :param file: filename :return: Graph ''' # if someone passed me a RDF graph rather than a file, just send it back if isinstance(file, rdflib.graph.Graph): return file BLOCKSIZE = 65536 hasher = hashlib.md5() with open(file, 'rb') as afile: buf = afile.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = afile.read(BLOCKSIZE) hash = hasher.hexdigest() pickle_file = '{}/rdf_graph.{}.pickle'.format(tempfile.gettempdir(), hash) if path.isfile(pickle_file): return pickle.load(open(pickle_file, "rb")) rdf_graph = Graph() rdf_graph.parse(file, format=util.guess_format(file)) pickle.dump(rdf_graph, open(pickle_file, 'wb')) # new graph, so to be safe clear out all cached entries memory.clear(warn=False) return rdf_graph
def convert(self, form_input, map_filename): self.form_input = form_input.form # Get map and result RDF graphs ready self.rdf_map = Graph() self.rdf_map.parse(map_filename, format=guess_format(map_filename)) self.rdf_result = Graph() self.rdf_result.namespace_manager = self.rdf_map.namespace_manager # Find node class for possible_root_node_class in self.rdf_map.objects( Literal('placeholder node_uri'), URIRef(RDF.type)): if 'placeholder' not in possible_root_node_class: self.root_node_class = possible_root_node_class if self.root_node_class is None: raise Exception('No root node class specified in ' + map_filename) # Use provided URI or generate unique URI of the new node if not self.root_node: self.root_node = URIRef(self.base_uri + str(uuid.uuid4())) self.rdf_result.add((self.root_node, RDF.type, self.root_node_class)) # Go through each property and search for entries submitted in the form for (subject, property_predicate, property_obj) in self.rdf_map: if str( subject ) == 'placeholder node_uri' and 'placeholder' in property_obj: self.add_entries_for_property(self.root_node, property_predicate, property_obj) # Also get any custom properties submitted in the form self.add_custom_property_entries(self.root_node) return self.rdf_result
def fusion(ontologies, output): global mode # Definition of namespaces # Uncomment if needed # NS_owl = Namespace("http://www.w3.org/2002/07/owl#") # NS_rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#") # NS_xsd = Namespace("http://www.w3.org/2001/XMLSchema#") # NS_rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") # NS_mcf = Namespace("http://www.mycorporisfabrica.org/ontology/mcf.owl#") # Final graph creation gMerge = ConjunctiveGraph() myPrint("Beginning additions...\n\n") for ontology in ontologies: gAdd = ConjunctiveGraph() if mode == 2 or mode == 3: myPrint("\tParsing ontology "+ontology+"...\n") gAdd.parse(ontology, format=guess_format(ontology)) if mode == 2 or mode == 3: myPrint("\tAdding ontology "+ontology+", "+str(len(gAdd))+ " triples...\n") gMerge = gMerge + gAdd if mode == 2 or mode == 3: myPrint("\tOntology "+ontology+" added !\n") myPrint("\tNew size of merged ontology : "+str(len(gMerge))+" triples\n\n") myPrint("Additions complete !\n") myPrint("Final size of merged ontology : "+str(len(gMerge))+" triples\n\n") myPrint("Saving the ontology in turtle format...\n") # Saving the merged ontology in turtle gMerge.serialize(output, format="turtle") myPrint("Saving done !\n\n")
def parse(name, link, list_): # Create a graph to analyze the n3 file g = Graph() try: format_ = link.split(".")[-1] if (format_ == "txt"): format_ = link.split(".")[-2] format_ = format_.split("?")[0] log("Parsing: " + name + format_ + "\n") result = g.parse(link, format=guess_format(name + "." + "n3")) log("Parsed : " + name + "\n") except Exception as e: log("Error trying to parse " + name + "\n") log(str(e) + "\n") return list_, 0 index = 0 # For each statement present in the graph obtained for subject, predicate, object_ in g: # Save the statement to the ExcelFile # Save the statement to the ExcelFile predicateTerm = predicate.replace("/", "#").split("#") predicateTerm = predicateTerm[len(predicateTerm) - 1] objectTerm = object_.replace("/", "#").split("#") objectTerm = objectTerm[len(objectTerm) - 1] domain = name.replace("_", ".").split(".")[0] list_.insert( index, { "Subject": subject, "Predicate": predicateTerm, "Object": objectTerm, "Domain": domain }) index += 1 return list_, index
def handle(self, *args, **options): f = options['file'][0] g = options['graph'][0] graph = settings.GRAPH rdf_format = guess_format(f) graph.parse(source=f, format=rdf_format, publicID=g) graph.commit() # need to copy a unique set of resources to the Resource table for ts in TypeStatement.objects.values_list('member', flat=True): Resource.objects.update_or_create(subject=ts) # and unique class names and predicates to their own tables for a in AssertedStatement.objects.values_list('predicate', flat=True): Predicate.objects.update_or_create(value=a) for l in LiteralStatement.objects.values_list('predicate', flat=True): Predicate.objects.update_or_create(value=l) for q in QuotedStatement.objects.values_list('predicate', flat=True): Predicate.objects.update_or_create(value=q) for k in TypeStatement.objects.values_list('klass', flat=True): Klass.objects.update_or_create(value=k) for c in TypeStatement.objects.values_list('context', flat=True): Context.objects.update_or_create(value=c) graph.close()
def parse(name, link, file, df): # Create a graph to analyze the n3 file g = Graph() try: log("Parsing: " + name + "\n") result = g.parse(link, format=guess_format(name.split("/")[-1])) log("Parsed : " + name + "\n") except Exception as e: log("Error trying to parse " + name + "\n") log(str(e) + "\n") return df # For each statement present in the graph obtained for subject, predicate, object_ in g: # Save the statement to the ExcelFile # Save the statement to the ExcelFile predicateTerm = predicate.replace("/", "#").split("#") predicateTerm = predicateTerm[len(predicateTerm) - 1] objectTerm = object_.replace("/", "#").split("#") objectTerm = objectTerm[len(objectTerm) - 1] domain = name.replace("_", ".").split(".")[0] df = df.append( { "Subject": subject, "Predicate": predicateTerm, "Object": objectTerm, "Domain": domain }, ignore_index=True) return df
def rm_main(): # Get the name of the file to serialize fileName = "" # Try to create the graph to analyze the vocabulary try: g = Graph() format_ = fileName.split(".")[-1] if (format_ == "txt"): format_ = fileName.split(".")[-2] format_ = format_.split("?")[0] result = g.parse(fileName, format=guess_format(format_)) except Exception as e: # In case of an error during the graph's initiation, print the error print(str(e) + "\n") # Get the formats that will be used for serialization strFormats = "" dest = fileName.split(".")[0] # Serialize the vocabulary in multiple formats if ("n3" in strFormats.split()): g.serialize(destination=dest + ".n3", format="n3") if ("nt" in strFormats.split()): g.serialize(destination=dest + ".nt", format="nt") if ("rdf" in strFormats.split()): g.serialize(destination=dest + ".rdf", format="pretty-xml") if ("ttl" in strFormats.split()): g.serialize(destination=dest + ".ttl", format="turtle") if ("json" in strFormats.split()): g.serialize(destination=dest + ".json-ld", format="json-ld")
def read_graph(location, result, g=None): if g is None: g = ConjunctiveGraph() graph = ConjunctiveGraph(store=g.store, identifier=result.identifier) if len(graph) == 0: data = get_content(location).read() f = guess_format(location) for fmt in [f] + _rdf_formats_to_guess: try: graph.parse(data=data, format=fmt) break except Exception as e: #print(e) pass if len(graph) == 0: print("Could not parse graph: ", location) if result[RDF.type:OWL.Ontology]: for ontology in graph.subjects(RDF.type, OWL.Ontology): imports = [ graph.resource(x) for x in graph.objects(ontology, OWL.imports) ] for i in imports: read_graph(i.identifier, i, g=g) return g
def handle(self, *args, **options): f = options['file'][0] g = options['graph'][0] graph = settings.GRAPH rdf_format = guess_format(f) graph.parse(source=f, format=rdf_format, publicID=g) graph.commit() # need to copy a unique set of resources to the Resource table for ts in TypeStatement.objects.values_list('member',flat=True): Resource.objects.update_or_create(subject=ts) # and unique class names and predicates to their own tables for a in AssertedStatement.objects.values_list('predicate',flat=True): Predicate.objects.update_or_create(value=a) for l in LiteralStatement.objects.values_list('predicate',flat=True): Predicate.objects.update_or_create(value=l) for q in QuotedStatement.objects.values_list('predicate',flat=True): Predicate.objects.update_or_create(value=q) for k in TypeStatement.objects.values_list('klass',flat=True): Klass.objects.update_or_create(value=k) for c in TypeStatement.objects.values_list('context',flat=True): Context.objects.update_or_create(value=c) graph.close()
def import_old_data(request): everything_graph = Graph() bind_namespaces(everything_graph) # Either gather post data (must be one project/user graph at a time) if request.method == 'POST': logger.debug('!!!!!!!!!!!!!!! views.py - import_old_data') parse_request_into_graph(request, everything_graph) add_all_users(everything_graph) # Create each user's default project # Due to the structure of the data when exported from the old system, this also # add each annotation to the project as an aggregated resource create_project(everything_graph) # or serialize from a folder, where each file is one project/user graph else: i = 0 for file_name in listdir("output/"): if file_name.startswith('.'): continue try: everything_graph.parse("output/" + file_name, format=guess_format(file_name) or 'turtle') except Exception as e: print "Failed to decode file '%s' with error message '%s'" % ( file_name, e.args[-1]) else: add_all_users(everything_graph) create_project(everything_graph) return HttpResponse("I finished migrating data without errors.")
def file_to_rdf_provider(input_file): """ Create RDF provider from the input file """ input_name, input_ext = os.path.splitext(os.path.basename(input_file)) graph = Graph() graph.parse(input_file, format=guess_format(input_ext)) return RDFProvider({"id": input_name.upper()}, graph)
def get_format(filename): return guess_format(filename, { 'xml': 'trix', 'ttl': 'turtle', 'nq': 'nquads', 'nt': 'nt', 'rdf': 'xml' })
def handle(self, directory, *args, **kwargs): store = rdfstore() for filename in os.listdir(directory): full_path = os.path.join(directory, filename) if os.path.isfile(full_path) and not filename.startswith('.'): context = URIRef(urllib.unquote(filename[:filename.rfind('.')])) graph = Graph(store, context) graph.parse(full_path, format=guess_format(filename))
def run(self): ontologies = self.ontologies # Definition of namespaces # Uncomment if needed NS_owl = Namespace("http://www.w3.org/2002/07/owl#") NS_rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#") NS_xsd = Namespace("http://www.w3.org/2001/XMLSchema#") NS_rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") NS_mcf = Namespace("http://www.mycorporisfabrica.org/ontology/mcf.owl#") g1 = ConjunctiveGraph() g2 = ConjunctiveGraph() g1.parse(ontologies[0], format=guess_format(ontologies[0])) g2.parse(ontologies[1], format=guess_format(ontologies[1])) listDiff = ConjunctiveGraph() listDiff = g1 ^ g2 global listNames, listSizes for s,p,o in g1.triples((None, None, None)): item = "" #item += "[[ "+str(s)+" ]]\t[[ "+str(p)+" ]]\t[[ "+str(o)+" ]]" item +=str(s)+" || "+str(p)+" || "+str(o) self.emit(SIGNAL('addListItem(QString)'), item) ontologySplit = ontologies[0].split('/') ontologyName=ontologySplit[len(ontologySplit)-1] listNames.append(ontologyName) listSizes.append(str(len(g1))) tab["Ontology"] = listNames tab["Size"] = listSizes self.emit(SIGNAL('update_table(PyQt_PyObject)'), tab) ontologySplit = ontologies[1].split('/') ontologyName=ontologySplit[len(ontologySplit)-1] listNames.append(ontologyName) listSizes.append(str(len(g2))) tab["Ontology"] = listNames tab["Size"] = listSizes self.emit(SIGNAL('update_table(PyQt_PyObject)'), tab)
def __init__(self, file_names): logger.info("Reading the data with RdfLib ...") for file_name in file_names: try: name, extension = os.path.splitext(file_name) self.ont.parse(file_name, format=util.guess_format(file_name)) except: logger.exception("Error reading file "+file_name+". Parser for "+extension[1:]+" needed.")
def _loadgraph(filename): g = rdflib.Graph() # we must read the data ourself, providing a non-ascii # filename to Graph.parse fails deep in rdflib internals format = guess_format(filename) if format == "nt": data = util.readfile(filename, "r", encoding="utf-8") else: data = util.readfile(filename, "rb") g.parse(data=data, format=format) return g
def main(): parser = argparse.ArgumentParser( description='OMIA integration test', formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( '--input', '-i', type=str, required=True, help='Location of input ttl file') args = parser.parse_args() graph = ConjunctiveGraph() graph.parse(args.input, format=rdflib_util.guess_format(args.input)) # "is model of": "RO:0003301" # is_model_of = URIRef('OBO:RO_0003301') is_model_of = URIRef('http://purl.obolibrary.org/obo/RO_0003301') # if we curie_map & globaltt here we could ... # (pfx lcl) = globaltt["is model of"].split(':') # iri = curie_map[pfx] + '_'.join((pfx, lcl)) # is_model_of = URIRef(iri) models = graph.subject_objects(is_model_of) model_len = len(set(list(models))) if model_len < EXPECTED_PAIRS: LOG.error( "Not enough <RO:is model of> predicates in graph: found {}, " "expected {} check omia log for warnings".format( model_len, EXPECTED_PAIRS)) exit(1) # else: # LOG.info( # "Found {} model_of predicates in graph, expected at least: {}".format( # model_len, EXPECTED_PAIRS)) breed = 'https://monarchinitiative.org/model/OMIA-breed:758' disease = 'http://omim.org/entry/305100' omim_diseases = graph.objects( subject=URIRef(breed), predicate=is_model_of ) if list(omim_diseases) != [URIRef(disease)]: LOG.error("Missing breed to omim triple for %s", breed) LOG.error(list(omim_diseases)) exit(1) LOG.info("PASSED")
def main(target, _help=_help, options="", stdin=True): """ A main function for tools that read RDF from files given on commandline or from STDIN (if stdin parameter is true) """ args, files = getopt.getopt(sys.argv[1:], "hf:o:" + options) dargs = dict(args) if "-h" in dargs: _help() sys.exit(-1) g = rdflib.Graph() if "-f" in dargs: f = dargs["-f"] else: f = None if "-o" in dargs: sys.stderr.write("Output to %s\n" % dargs["-o"]) out = codecs.open(dargs["-o"], "w", "utf-8") else: out = sys.stdout start = time.time() if len(files) == 0 and stdin: sys.stderr.write("Reading from stdin as %s..." % f) g.load(sys.stdin, format=f) sys.stderr.write("[done]\n") else: size = 0 for x in files: if f is None: f = guess_format(x) start1 = time.time() sys.stderr.write("Loading %s as %s... " % (x, f)) g.load(x, format=f) sys.stderr.write("done.\t(%d triples\t%.2f seconds)\n" % (len(g) - size, time.time() - start1)) size = len(g) sys.stderr.write("Loaded a total of %d triples in %.2f seconds.\n" % (len(g), time.time() - start)) target(g, out, args)
def process(triple_file, action, email, password, url, named_graph): console("\n{}\n".format('-' * 25)) console("VIVO url: {}".format(url)) # Handle named graph if named_graph != KB2: named_graph = URIRef(named_graph) fmt = guess_format(triple_file) graph = Graph() graph.parse(source=triple_file, format=fmt) console("Read {} triples and will {} to <{}>".format(len(graph), action, named_graph)) # Do the update. do_update(email, password, url, graph, named_graph, action) # Finish console("\n{}\n".format('-' * 25))
def __init_graph_conf_from_configuration(self, configfileId, known_blobs): """Init graphs with setting from config.ttl.""" try: configfile = self.repository.get(configfileId) except Exception as e: raise InvalidConfigurationError( "Blob for configfile with id {} not found in repository {}".format(configfileId, e)) content = configfile.read_raw() try: self.graphconf.parse(data=content, format='turtle') except Exception as e: raise InvalidConfigurationError( "Configfile could not be parsed {} {}".format(configfileId, e) ) nsQuit = 'http://quit.aksw.org/vocab/' query = 'SELECT DISTINCT ?graphuri ?filename ?format WHERE { ' query += ' ?graph a <' + nsQuit + 'Graph> . ' query += ' ?graph <' + nsQuit + 'graphUri> ?graphuri . ' query += ' ?graph <' + nsQuit + 'graphFile> ?filename . ' query += ' OPTIONAL { ?graph <' + nsQuit + 'hasFormat> ?format .} ' query += '}' result = self.graphconf.query(query) for row in result: filename = str(row['filename']) if row['format'] is None: format = guess_format(filename) else: format = str(row['format']) if format != 'nt': break if filename not in known_blobs.keys(): break graphuri = URIRef(str(row['graphuri'])) # we store which named graph is serialized in which file self.graphs[graphuri] = filename self.files[filename] = { 'serialization': format, 'graph': graphuri, 'oid': known_blobs[filename]}
def read_graph(location, result, g = None): if g is None: g = ConjunctiveGraph() graph = ConjunctiveGraph(store=g.store, identifier=result.identifier) if len(graph) == 0: data = get_content(location).read() f = guess_format(location) for fmt in [f] + _rdf_formats_to_guess: try: graph.parse(data=data, format=fmt) break except Exception as e: #print e pass if len(graph) == 0: print "Could not parse graph: ", location if result[RDF.type:OWL.Ontology]: for ontology in graph.subjects(RDF.type, OWL.Ontology): imports = [graph.resource(x) for x in graph.objects(ontology, OWL.imports)] for i in imports: read_graph(i.identifier, i, g = g) return g
def get_blobs_from_repository(self, rev): """Analyze all blobs of a revision. Returns ------- A triple (dictionary, list, dictionary) dict: containg names of rdf-files plus their format and oid of graph file. list: containing names of config files. dict: containing names rdf files plus format and oid. """ config_files = [] graph_files = {} graph_file_blobs = {} rdf_file_blobs = {} try: commit = self.repository.revparse_single(rev) except Exception: return graph_files, config_files, rdf_file_blobs # Collect graph files, rdf files and config files for entry in commit.tree: if entry.type == 'blob': format = guess_format(entry.name) if format is None and entry.name.endswith('.graph'): graph_file_blobs[entry.name] = entry.id elif format is not None and format == 'nt': rdf_file_blobs[entry.name] = (entry.id, format) elif format is not None and entry.name == 'config.ttl': config_files.append(str(entry.id)) # collect pairs of rdf files and graph files for filename in rdf_file_blobs.keys(): if filename + '.graph' in graph_file_blobs.keys(): graph_file_blob_id = graph_file_blobs[filename + '.graph'] graph_files[filename] = (rdf_file_blobs[filename][1], str(graph_file_blob_id)) return graph_files, config_files, rdf_file_blobs
def parse_and_serialize(input_files, input_format, guess, outfile, output_format, ns_bindings, store_conn=STORE_CONNECTION, store_type=STORE_TYPE): store = plugin.get(store_type, Store)() store.open(store_conn) graph = Graph(store) for prefix, uri in ns_bindings.items(): graph.namespace_manager.bind(prefix, uri, override=False) for fpath in input_files: use_format, kws = _format_and_kws(input_format) if fpath == '-': fpath = sys.stdin elif not input_format and guess: use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT graph.parse(fpath, format=use_format, **kws) if outfile: output_format, kws = _format_and_kws(output_format) graph.serialize( destination=outfile, format=output_format, base=None, **kws) store.rollback()
import sys from rdflib import Graph, Namespace, RDF from rdflib.util import guess_format # namespaces TERO = Namespace("http://www.yso.fi/onto/tero/") YSO = Namespace("http://www.yso.fi/onto/yso/") TEROYSO = Namespace("http://www.yso.fi/onto/tero/p") TEROMETA = Namespace("http://www.yso.fi/onto/tero-meta/") SKOS = Namespace("http://www.w3.org/2004/02/skos/core#") # input graph g = Graph() for fn in sys.argv[1:]: g.parse(fn, format=guess_format(fn)) g.namespace_manager.bind('tero',TERO) g.namespace_manager.bind('terometa',TEROMETA) out = Graph() for prefix,ns in g.namespace_manager.namespaces(): out.namespace_manager.bind(prefix,ns) def switch(res): if res.startswith(TEROYSO): return YSO[res.replace(TEROYSO, 'p')] return res for s,p,o in g: out.add((switch(s), p, switch(o)))
def run(self): global log today = datetime.now() log = open("./logs/fusionAdv_"+str(today.month)+"_"+str(today.day)+"_"+str(today.year)+"_"+str(today.hour)+"_"+str(today.minute)+"_"+str(today.second)+".log", "w") ontologies = self.ontologies output = self.output tps0 = time.clock() self.myPrint("Fusion process begins...\n...\n") owl = Namespace("http://www.w3.org/2002/07/owl#") rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#") xsd = Namespace("http://www.w3.org/2001/XMLSchema#") rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") mcf = Namespace("http://www.mycorporisfabrica.org/ontology/mcf.owl#") self.myPrint("Parsing the first graph...\n") g1 = ConjunctiveGraph() g1.parse(ontologies[0], format=guess_format(ontologies[0])) diff = ConjunctiveGraph() toDel = ConjunctiveGraph() self.myPrint("Parsing done !\n\n") self.myPrint("Parsing differences list...\n") listDiff = self.diff for item in listDiff: itemSplit = item.split(" || ") s = itemSplit[0] p = itemSplit[1] o = itemSplit[2] msg="Adding triple : " msg=msg+s+" || "+p+" || "+o+"\n" self.myPrint(msg) diff.add((URIRef(s),URIRef(p),URIRef(o))) self.myPrint("Parsing done !\n\n") self.myPrint("Parsing triples to remove from the final graph...\n") listToDel = self.toDel for item in listToDel: itemSplit = item.split(" || ") s = itemSplit[0] p = itemSplit[1] o = itemSplit[2] msg = "Removing triple : " msg = msg + s+" || "+p+" || "+o+"\n" self.myPrint(msg) toDel.add((URIRef(s),URIRef(p),URIRef(o))) self.myPrint("Parsing done !\n\n") self.myPrint("Final merge processing...\n") gMerge = ConjunctiveGraph() gMerge = g1 + diff gMerge = gMerge - toDel self.myPrint("Merge process complete !\n\n") global listNames, listSizes ontologySplit = output.split('/') ontologyName=ontologySplit[len(ontologySplit)-1] listNames.append(ontologyName) listSizes.append(str(len(gMerge))) tab["Ontology"] = listNames tab["Size"] = listSizes self.emit(SIGNAL('update_table(PyQt_PyObject)'), tab) self.myPrint("Saving the ontology...\n") # Saving the merged ontology extension = output[len(output)-4:] f = "" if(extension == ".ttl"): f = "turtle" elif(extension == ".rdf"): f = "xml" else: f="xml" output = output + ".rdf" gMerge.serialize(output, format=f) self.myPrint("Saving done !\n\n") tps1 = time.clock() self.myPrint("Fusion advanced complete.\n") execTime = self.prettyTime(tps1-tps0) self.myPrint("\nFusion advanced executing time : "+execTime+"\n") log.close()
def rdf_inspect(file_names, verbose=1): logger.info("Reading the data with RdfLib ...") memg = rdflib.Graph() for file_name in file_names: name, extension = os.path.splitext(file_name) memg.parse(file_name, format=util.guess_format(file_name)) print("Graph has %s statements." % len(memg)) pred_set = Set() for pred in memg.predicates(None, None): pred_set.add(pred) print("Graph has %s distinct predicates." % len(pred_set)) types = [] properties = [] relations = [] names = [] for pred in pred_set: if pred in [URIRef(u'http://www.w3.org/2004/02/skos/core#prefLabel'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#label'), URIRef(u'http://www.geonames.org/ontology#name'), URIRef(u'http://xmlns.com/foaf/0.1/name'), URIRef(u'http://purl.org/dc/elements/1.1/title'), URIRef(u'http://dbpedia.org/ontology/personName'), URIRef(u'http://reegle.info/schema#projectTitle')]: names.append(pred) properties.append(pred) elif pred == URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'): types.append(pred) else: (s, p, o) = memg.triples((None, pred, None)).next() o_uri = ("%s" % o).lower() if isinstance(o, rdflib.term.Literal) \ or o_uri.endswith("jpg") \ or o_uri.endswith("png") \ or o_uri.endswith("pdf") \ or o_uri.endswith("doc") \ or p == URIRef(u'http://xmlns.com/foaf/0.1/homepage'): properties.append(p) else: relations.append(p) lod2graph_mapping(file_out, types, properties, relations, names) if verbose: logger.info("Types of RDF relations:") logger.info("Properties: %d" %len(properties)) for pred in properties: (s, p, o) = memg.triples((None, pred, None)).next() logger.info("%s \t %s %s %s" % (pred, s, p, o)) logger.info("Relations: %d" %len(relations)) for pred in relations: (s, p, o) = memg.triples((None, pred, None)).next() logger.info("%s \t %s %s %s" % (pred, s, p, o)) logger.info("Types: %d" % len(types)) for pred in types: (s, p, o) = memg.triples((None, pred, None)).next() logger.info("%s \t %s %s %s" % (pred, s, p, o)) logger.info("Names: %d" % len(names)) for pred in names: (s, p, o) = memg.triples((None, pred, None)).next() logger.info("%s \t %s %s %s" % (pred, s, p, o))
""" Combine the RDF files into a graph. """ import glob from rdflib import Graph from rdflib.util import guess_format from utils import ns_mgr g = Graph() g.namespace_manager = ns_mgr for item in glob.glob('data/rdf/*'): if item == 'all.ttl': continue format = guess_format(item) g.parse(item, format='turtle') print g.serialize(format='turtle')