def construct_knowledge_graph(self) -> None: """Builds a full knowledge graph. Please note that the process to build this version of the knowledge graph does not include running a reasoner. The full build includes the following steps: (1) Process relation/inverse relations; (2) Merge ontologies; (3) Process node metadata; (4) Create graph subsets; (5) Add master edge list to merged ontologies; (6) Decode OWL-encoded classes; (7) Output knowledge graphs and create edge lists and (8) Extract and write node metadata. Returns: None. """ log_str = '### Starting Knowledge Graph Build: FULL ###'; print('\n' + log_str) logger.info('*' * 10 + 'PKT STEP: CONSTRUCTING KNOWLEDGE GRAPH' + '*' * 10 + '\n' + log_str) # STEP 1: PROCESS RELATION AND INVERSE RELATION DATA log_str = '*** Loading Relations Data ***'; print(log_str); logger.info(log_str) self.reverse_relation_processor() # STEP 2: MERGE ONTOLOGIES if self.merged_ont_kg in glob.glob(self.write_location + '/*.owl'): log_str = '*** Loading Merged Ontologies ***'; print(log_str); logger.info(log_str) self.graph = Graph().parse(self.merged_ont_kg, format='xml') else: log_str = '*** Merging Ontology Data ***'; print(log_str); logger.info(log_str) merges_ontologies(self.ontologies, self.merged_ont_kg.split('/')[-1], self.owl_tools) self.graph.parse(self.merged_ont_kg, format='xml') stats = 'Merged Ontologies {}'.format(derives_graph_statistics(self.graph)); print(stats); logger.info(stats) # STEP 3: PROCESS NODE METADATA log_str = '*** Loading Node Metadata Data ***'; print(log_str); logger.info(log_str) meta = Metadata(self.kg_version, self.write_location, self.full_kg, self.node_data, self.node_dict) if self.node_data: meta.metadata_processor(); meta.extract_metadata(self.graph) # STEP 4: CREATE GRAPH SUBSETS log_str = '*** Splitting Graph ***'; print(log_str); logger.info(log_str) f = self.write_location; self.graph, annotation_triples = splits_knowledge_graph(self.graph) s = 'Merged Ontologies - Logic Subset {}'.format(derives_graph_statistics(self.graph)); print(s); logger.info(s) kg_owl = '_'.join(self.full_kg.split('_')[0:-1]) + '_OWL.owl'; kg_owl_main = kg_owl[:-8] + '.owl' annot, logic, full = kg_owl[:-4] + '_AnnotationsOnly.nt', kg_owl[:-4] + '_LogicOnly.nt', kg_owl[:-4] + '.nt' appends_to_existing_file(annotation_triples, f + annot); appends_to_existing_file(self.graph, f + logic) del annotation_triples # STEP 5: ADD EDGE DATA TO KNOWLEDGE GRAPH DATA log_str = '*** Building Knowledge Graph Edges ***'; print('\n' + log_str); logger.info(log_str) self.ont_classes = gets_ontology_classes(self.graph); self.obj_properties = gets_object_properties(self.graph) try: ray.init() except RuntimeError: pass args = {'construction': self.construct_approach, 'edge_dict': self.edge_dict, 'node_data': self.node_data, 'rel_dict': self.relations_dict, 'inverse_dict': self.inverse_relations_dict, 'kg_owl': kg_owl, 'ont_cls': self.ont_classes, 'obj_props': self.obj_properties, 'metadata': meta.creates_node_metadata, 'write_loc': self.write_location} edges = sublist_creator({k: len(v['edge_list']) for k, v in self.edge_dict.items()}, self.cpus) actors = [ray.remote(self.EdgeConstructor).remote(args) for _ in range(self.cpus)] # type: ignore for i in range(0, len(edges)): [actors[i].creates_new_edges.remote(j) for j in edges[i]] # type: ignore _ = ray.wait([x.graph_getter.remote() for x in actors], num_returns=len(actors)) res = ray.get([x.graph_getter.remote() for x in actors]); g1 = [x[0] for x in res]; g2 = [x[1] for x in res] error_dicts = dict(ChainMap(*ray.get([x.error_dict_getter.remote() for x in actors]))); del actors if len(error_dicts.keys()) > 0: # output error logs log_file = glob.glob(self.res_dir + '/construction*')[0] + '/subclass_map_log.json' logger.info('See log: {}'.format(log_file)); outputs_dictionary_data(error_dicts, log_file) # STEP 6: DECODE OWL SEMANTICS results = [set(x for y in [set(x) for x in [self.graph] + g1] for x in y), None, None] stats = 'Full Logic {}'.format(derives_graph_statistics(results[0])); print(stats); logger.info(stats) s1 = convert_to_networkx(self.write_location, kg_owl[:-4], results[0], True) if s1 is not None: log_stats = 'Full Logic Subset (OWL) {}'.format(s1); logger.info(log_stats); print(log_stats) # aggregates processed owl-nets output derived when constructing non-ontology edges if self.decode_owl is not None: graphs = [updates_pkt_namespace_identifiers(self.graph, self.construct_approach)] + g2 owlnets = OwlNets(graphs, self.write_location, kg_owl_main, self.construct_approach, self.owl_tools) results = [results[0]] + list(owlnets.runs_owlnets(self.cpus)) # STEP 7: WRITE OUT KNOWLEDGE GRAPH METADATA AND CREATE EDGE LISTS log_str = '*** Writing Knowledge Graph Edge Lists ***'; print('\n' + log_str); logger.info(log_str) f_prefix = ['_OWL', '_OWLNETS', '_OWLNETS_' + self.construct_approach.upper() + '_purified'] for x in range(0, len(results)): graph = results[x]; p_str = 'OWL' if x == 0 else 'OWL-NETS' if x == 1 else 'Purified OWL-NETS' if graph is not None: log_str = '*** Processing {} Graph ***'.format(p_str); print('\n' + log_str); logger.info(log_str) triple_list_file = kg_owl[:-8] + f_prefix[x] + '_Triples_Integers.txt' triple_map = triple_list_file[:-5] + '_Identifier_Map.json' node_int_map = maps_ids_to_integers(graph, self.write_location, triple_list_file, triple_map) # STEP 8: EXTRACT AND WRITE NODE METADATA meta.full_kg = kg_owl[:-8] + f_prefix[x] + '.owl' if self.node_data: meta.output_metadata(node_int_map, graph) # deduplicate logic and annotation files, merge them, and print final stats deduplicates_file(f + annot); deduplicates_file(f + logic); merges_files(f + annot, f + logic, f + full) str1 = '\nLoading Full (Logic + Annotation) Graph'; print('\n' + str1); logger.info(str1) graph = Graph().parse(f + full, format='nt'); str2 = 'Deriving Stats'; print('\n' + str2); logger.info(str2) s = 'Full (Logic + Annotation) {}'.format(derives_graph_statistics(graph)); print('\n' + s); logger.info(s) return None
def construct_knowledge_graph(self) -> None: """Builds a post-closure knowledge graph. This build is recommended when one has previously performed a "partial" knowledge graph build and then ran a reasoner over it. This build type inputs the closed partially built knowledge graph and completes the build process. The post-closure build utilizes the following steps: (1) Process relation and inverse relation data; (2) Load closed knowledge graph; (3) Process node metadata; (4) Create graph subsets; (5) Decode OWL-encoded classes; (6) Output knowledge graph files and create edge lists; and (7) Extract and write node metadata. Returns: None. Raises: OSError: If closed knowledge graph file does not exist. TypeError: If the closed knowledge graph file is empty. """ log_str = '### Starting Knowledge Graph Build: POST-CLOSURE ###'; print('\n' + log_str) logger.info('*' * 10 + 'PKT STEP: CONSTRUCTING KNOWLEDGE GRAPH' + '*' * 10 + '\n' + log_str) # STEP 1: PROCESS RELATION AND INVERSE RELATION DATA log_str = '*** Loading Relations Data ***'; print(log_str); logger.info(log_str) self.reverse_relation_processor() # STEP 2: LOAD CLOSED KNOWLEDGE GRAPH closed_kg = glob.glob(self.write_location + '/*.owl') if len(closed_kg) == 0: logs = 'KG file does not exist!'; logger.error('OSError: ' + logs); raise OSError(logs) elif os.stat(closed_kg[0]).st_size == 0: logs = '{} is empty'.format(closed_kg); logger.error('TypeError: ' + logs); raise TypeError(logs) else: log_str = '*** Loading Closed Knowledge Graph ***'; print(log_str); logger.info(log_str) os.rename(closed_kg[0], self.write_location + self.full_kg) # rename closed kg file self.graph = Graph().parse(self.write_location + self.full_kg, format='xml') stats = 'Input {}'.format(derives_graph_statistics(self.graph)); print(stats); logger.info(stats) # STEP 3: PROCESS NODE METADATA log_str = '*** Loading Node Metadata Data ***'; print(log_str); logger.info(log_str) meta = Metadata(self.kg_version, self.write_location, self.full_kg, self.node_data, self.node_dict) if self.node_data: meta.metadata_processor(); meta.extract_metadata(self.graph) # STEP 4: CREATE GRAPH SUBSETS log_str = '*** Splitting Graph ***'; print(log_str); logger.info(log_str) _ = self.write_location; self.graph, annotation_triples = splits_knowledge_graph(self.graph) stats = 'Merged Logic Subset {}'.format(derives_graph_statistics(self.graph)); print(stats); logger.info(stats) kg_owl = '_'.join(self.full_kg.split('_')[0:-1]) + '_OWL.owl'; kg_owl_main = kg_owl[:-8] + '.owl' annot, logic, full = kg_owl[:-4] + '_AnnotationsOnly.nt', kg_owl[:-4] + '_LogicOnly.nt', kg_owl[:-4] + '.nt' appends_to_existing_file(annotation_triples, _ + annot); appends_to_existing_file(self.graph, _ + logic) del annotation_triples # STEP 5: DECODE OWL SEMANTICS results = [set(self.graph), None, None] stats = 'Full Logic {}'.format(derives_graph_statistics(results[0])); print(stats); logger.info(stats) logger.info('*** Converting Knowledge Graph to Networkx MultiDiGraph ***') s = convert_to_networkx(self.write_location, kg_owl[:-4], results[0], True) if s is not None: log_stats = 'Full Logic Subset (OWL) {}'.format(s); logger.info(log_stats); print(log_stats) if self.decode_owl: self.graph = updates_pkt_namespace_identifiers(self.graph, self.construct_approach) owlnets = OwlNets(self.graph, self.write_location, kg_owl_main, self.construct_approach, self.owl_tools) results = [results[0]] + list(owlnets.runs_owlnets(self.cpus)) # STEP 7: WRITE OUT KNOWLEDGE GRAPH METADATA AND CREATE EDGE LISTS log_str = '*** Writing Knowledge Graph Edge Lists ***'; print('\n' + log_str); logger.info(log_str) f_prefix = ['_OWL', '_OWLNETS', '_OWLNETS_' + self.construct_approach.upper() + '_purified'] for x in range(0, len(results)): graph = results[x]; p_str = 'OWL' if x == 0 else 'OWL-NETS' if x == 1 else 'Purified OWL-NETS' if graph is not None: log_str = '*** Processing {} Graph ***'.format(p_str); print(log_str); logger.info(log_str) triple_list_file = kg_owl[:-8] + f_prefix[x] + '_Triples_Integers.txt' triple_map = triple_list_file[:-5] + '_Identifier_Map.json' node_int_map = maps_ids_to_integers(graph, self.write_location, triple_list_file, triple_map) # STEP 8: EXTRACT AND WRITE NODE METADATA meta.full_kg = kg_owl[:-8] + f_prefix[x] + '.owl' if self.node_data: meta.output_metadata(node_int_map, graph) # deduplicate logic and annotation files and then merge them deduplicates_file(_ + annot); deduplicates_file(_ + logic); merges_files(_ + annot, _ + logic, _ + full) return None