def process_qnode(self, kw: KgtkWriter, current_process_node_id: str, each_node_attributes: EACH_NODE_ATTRIBUTES) -> bool: interesting_qnode: bool = False if each_node_attributes: for k in each_node_attributes: if each_node_attributes[k]: interesting_qnode = True break if not interesting_qnode: return False concat_sentence: str explanation: str concat_sentence, explanation = self.attribute_to_sentence( each_node_attributes, current_process_node_id) if self.explain: kw.write([ current_process_node_id, self.sentence_label, KgtkFormat.stringify(concat_sentence), KgtkFormat.stringify(explanation) ]) else: kw.write([ current_process_node_id, self.sentence_label, KgtkFormat.stringify(concat_sentence) ]) return True
def produce_node_labels(event): if '\t' in event: event = event.split('\t')[0] e1 = event.lower() e1 = e1.rstrip('.').strip() e2 = remove_people_mentions(e1) while ' ' in e2: e2 = e2.replace(' ', ' ') if e1 != e2 and e2: return '|'.join( [KgtkFormat.stringify(e1), KgtkFormat.stringify(e2)]) else: return KgtkFormat.stringify(e1)
def edge2KGTK(edge: Tuple[str, str, str]) -> pd.Series: """ Gets the edge as triple of subject, object, predicate and converts the edge to the KGTK format Args: edge: Tuple[str, str, str] input edge Returns: pd.Series pandas Series with keys according to KGTK format at https://docs.google.com/document/d/1fbbqgyX0N2EdxLam6hatfke1R-nZWkoN6M1oB_f4aQo/edit#heading=h.a5nlqev5bmm4 """ s, p, o = edge def clean(e: str) -> str: out = e.split(':')[-1].replace('_', ' ') return KgtkFormat.stringify( re.sub("([a-z])([A-Z])", "\g<1> \g<2>", out).strip().lower()) return pd.Series({ 'node1': s, 'relation': p, 'node2': o, 'node1;label': clean(s), 'node2;label': clean(o), 'relation;label': clean(p), 'relation;dimension': '', 'source': KgtkFormat.stringify('FN'), 'sentence': '' })
def create_edge(node1, node1_lbl, node2, node2_lbl, rel, rel_lbl, image_id): my_row = [ node1, rel, node2, '|'.join(node1_lbl), '|'.join(node2_lbl), rel_lbl, '', KgtkFormat.stringify('VG'), '' ] return my_row
def create_edges(data, labels, rel, rel_label): all_rows=[] source=KgtkFormat.stringify('WN') for node1, v in data.items(): for node2 in v: node1_preflabel=labels[node1].split('|')[0] node2_preflabel=labels[node2].split('|')[0] a_row=['wn:' + node1, rel, 'wn:' + node2, labels[node1], labels[node2], rel_label, "", source, ''] all_rows.append(a_row) return all_rows
def row_to_edge(row, cols): edge = {} edge['node1'] = row[2] edge['relation'] = row[1] edge['node2'] = row[3] edge['node1_label'] = make_node_label(row[2]) edge['node2_label'] = make_node_label(row[3]) edge['relation_label'] = make_rel_label(row[1]) edge['relation_dimension'] = '' metadata = json.loads(row[4]) edge['source'] = KgtkFormat.stringify('CN') if 'surfaceText' in metadata.keys(): edge['sentence'] = KgtkFormat.stringify( metadata['surfaceText'].replace('\\', '')) else: edge['sentence'] = '' edge_list = [edge[col] for col in cols] return '\t'.join(edge_list) + '\n'
def extract(input_file, output_file, source): rows=[] with open(output_file, 'w') as w: columns=['id', 'node1', 'relation', 'node2', 'node1;label', 'node2;label','relation;label', 'relation;dimension', 'source', 'sentence'] w.write(print_edge(columns)) with open(input_file, 'r') as f: header=next(f) for line in f: data=line.split('\t') data[1]=data[1].replace('same', 'Same') id='-'.join(data[:3]) new_row=[id, *data[:3], "", "", "", "", KgtkFormat.stringify(source), ""] w.write(print_edge(new_row))
def produce_rel_label(rel): mapping = { 'xAttr': 'person x has attribute', 'oAttr': 'others have attribute', 'xReact': 'person x feels', 'oReact': 'others feel', 'xIntent': 'person x wants', 'xWant': 'person x wants', 'oWant': 'others want', 'xNeed': 'person x needs', 'xEffect': 'effect on person x', 'oEffect': 'the effect on others' } return KgtkFormat.stringify(mapping[rel])
def row_to_edge(node1, rel, node2, source, cols): edge = {} prefix = source.lower() edge['node1'] = prefix + ':' + node1 edge['relation'] = rel edge['node2'] = prefix + ':' + node2 edge['node1;label'] = make_node_label(node1) edge['node2;label'] = make_node_label(node2) edge['relation;label'] = make_rel_label(rel) edge['relation;dimension'] = '' edge['source'] = KgtkFormat.stringify(source) edge['sentence'] = '' edge_list = [edge[col] for col in cols] return edge_list
def run(input_file: KGTKFiles, output_file: KGTKFiles): # import modules locally import sys # type: ignore from kgtk.exceptions import kgtk_exception_auto_handler, KGTKException import csv import re import json from pathlib import Path from string import Template import pandas as pd from kgtk.kgtkformat import KgtkFormat from kgtk.io.kgtkwriter import KgtkWriter def make_node(x): und_x = x.replace(' ', '_') pref_und_x = 'at:%s' % und_x return pref_und_x def remove_people_mentions(event): e = event.replace('personx', '').strip() e = e.replace('persony', '').strip() e = e.replace('person x', '').strip() e = e.replace('person y', '').strip() e = e.replace('the ___', '') e = e.replace('___', '') e = e.replace("'s", '') e = e.replace('to y', '') return e.strip() def produce_node_labels(event): if '\t' in event: event = event.split('\t')[0] e1 = event.lower() e1 = e1.rstrip('.').strip() e2 = remove_people_mentions(e1) while ' ' in e2: e2 = e2.replace(' ', ' ') if e1 != e2 and e2: return '|'.join( [KgtkFormat.stringify(e1), KgtkFormat.stringify(e2)]) else: return KgtkFormat.stringify(e1) def produce_rel_label(rel): mapping = { 'xAttr': 'person x has attribute', 'oAttr': 'others have attribute', 'xReact': 'person x feels', 'oReact': 'others feel', 'xIntent': 'person x wants', 'xWant': 'person x wants', 'oWant': 'others want', 'xNeed': 'person x needs', 'xEffect': 'effect on person x', 'oEffect': 'the effect on others' } return KgtkFormat.stringify(mapping[rel]) try: filename: Path = KGTKArgumentParser.get_input_file(input_file) out_columns = [ 'node1', 'relation', 'node2', 'node1;label', 'node2;label', 'relation;label', 'relation;dimension', 'source', 'sentence' ] output_kgtk_file: Path = KGTKArgumentParser.get_output_file( output_file) ew: KgtkWriter = KgtkWriter.open( out_columns, output_kgtk_file, #mode=input_kr.mode, require_all_columns=False, prohibit_extra_columns=True, fill_missing_columns=True, gzip_in_parallel=False, #verbose=self.verbose, #very_verbose=self.very_verbose ) df = pd.read_csv(filename, index_col=0) df.iloc[:, :9] = df.iloc[:, :9].apply( lambda col: col.apply(json.loads)) df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) for event, row in df.iterrows(): event_label = produce_node_labels(event) first_event_label = KgtkFormat.unstringify( event_label.split('|')[0] if '|' in event_label else event_label) n1 = make_node(first_event_label) for c in df.columns: for v in row[c]: if v == 'none': continue value_label = produce_node_labels(v) first_value_label = KgtkFormat.unstringify( value_label.split('|')[0] if '|' in value_label else value_label) n2 = make_node(first_value_label) rel_label = produce_rel_label(c) sentence = '' relation = make_node(c) this_row = [ n1, relation, n2, event_label, value_label, rel_label, '', KgtkFormat.stringify('AT'), sentence ] ew.write(this_row) # Clean up. ew.close() except Exception as e: raise KGTKException('Error: ' + str(e))
def make_node_label(node): return KgtkFormat.stringify(node.strip().split('/')[3].replace( '_', ' '))
def make_rel_label(rel): return KgtkFormat.stringify(split_camel_case(rel.split('/')[-1]))
def implode_language_qualified_string( self, input_line_count: int, row: typing.List[str], implosion: typing.Mapping[str, int], type_name: str, ) -> typing.Tuple[str, bool]: valid: bool = True text_idx: int = implosion[KgtkValueFields.TEXT_FIELD_NAME] text_val: str = row[text_idx] if len(text_val) == 0: valid = False if self.verbose: print("Input line %d: data type '%s': %s field is empty" % (input_line_count, type_name, KgtkValueFields.TEXT_FIELD_NAME), file=self.error_file, flush=True) elif len(text_val) == 1: valid = False if self.verbose: print("Input line %d: data type '%s': %s field is too short" % (input_line_count, type_name, KgtkValueFields.TEXT_FIELD_NAME), file=self.error_file, flush=True) else: if not text_val.startswith('"'): valid = False if self.verbose: print( "Input line %d: data type '%s': %s field does not start with a double quote" % (input_line_count, type_name, KgtkValueFields.TEXT_FIELD_NAME), file=self.error_file, flush=True) if not text_val.endswith('"'): valid = False if self.verbose: print( "Input line %d: data type '%s': %s field does not end with a double quote" % (input_line_count, type_name, KgtkValueFields.TEXT_FIELD_NAME), file=self.error_file, flush=True) language_idx: int = implosion[KgtkValueFields.LANGUAGE_FIELD_NAME] language_val: str = self.unwrap(row[language_idx]) if len(language_val) == 0: valid = False if self.verbose: print("Input line %d: data type '%s': %s field is empty" % (input_line_count, type_name, KgtkValueFields.LANGUAGE_FIELD_NAME), file=self.error_file, flush=True) suf_idx: int = implosion[KgtkValueFields.LANGUAGE_SUFFIX_FIELD_NAME] suf: str = self.unwrap(row[suf_idx]) if suf_idx >= 0 else "" if len(suf) > 0 and not suf.startswith("-"): # As a siecial favor, we'll accept language suffixes that do not # start with a dash. We'll prepend the dash. suf = "-" + suf value: str = "" if valid: # This subterfuge uses Python's literal parser to parse the string. if not self.escape_pipes: # ast.literal_eval(...) doesn't treat backslash pipe (\|) as an escaped pipe (|). # (this is documented behavior) so we will remove escaped pipes manually. text_val = text_val.replace('\\|', '|') value = KgtkFormat.stringify(ast.literal_eval(text_val), language=language_val, language_suffix=suf) if valid and self.validate: kv: KgtkValue = KgtkValue(value, options=self.value_options) valid = kv.is_language_qualified_string(validate=True) if not valid: if self.verbose: print( "Input line %d: data type '%s': imploded value '%s' is not a valid language qualified string." % (input_line_count, type_name, value), file=self.error_file, flush=True) return value, valid
sources=['AT', 'RG', 'CN'] identity_rel='mw:SameAs' lbl2ids=defaultdict(set) with open(input_file, 'r') as f: header=next(f) for line in f: data=line.split('\t') if check_source(data[8], sources): node1=data[1] node2=data[3] if lexical_node(node1): node1_label=data[4] lbl2ids[node1_label].add(node1) if lexical_node(node2): node2_label=data[5] lbl2ids[node2_label].add(node2) print(len(lbl2ids)) with open('tmp/lexical_mappings.tsv', 'w') as w: w.write(header) for label, ids in lbl2ids.items(): if len(ids)<=1: continue list_ids=list(ids) for i in range(len(list_ids)-1): edge_id='%s-%s-%s-1' % (list_ids[i], identity_rel, list_ids[i+1]) row=[edge_id, list_ids[i], identity_rel, list_ids[i+1], '', '', '', '', KgtkFormat.stringify('LEX'), ''] w.write('\t'.join(row) + '\n')
def clean(e: str) -> str: out = e.split(':')[-1].replace('_', ' ') return KgtkFormat.stringify( re.sub("([a-z])([A-Z])", "\g<1> \g<2>", out).strip().lower())
def implode_string( self, input_line_count: int, row: typing.List[str], implosion: typing.Mapping[str, int], type_name: str, ) -> typing.Tuple[str, bool]: valid: bool = True if KgtkValueFields.LANGUAGE_FIELD_NAME in implosion: language_idx: int = implosion[KgtkValueFields.LANGUAGE_FIELD_NAME] if language_idx >= 0: language_val: str = self.unwrap(row[language_idx]) if len(language_val) > 0: if self.general_strings: return self.implode_language_qualified_string( input_line_count, row, implosion, type_name) else: valid = False if self.verbose: print( "Input line %d: data type '%s': %s field is not empty" % (input_line_count, type_name, KgtkValueFields.LANGUAGE_FIELD_NAME), file=self.error_file, flush=True) text_idx: int = implosion[KgtkValueFields.TEXT_FIELD_NAME] text_val: str = row[text_idx] if len(text_val) == 0: valid = False if self.verbose: print("Input line %d: data type '%s': %s field is empty" % (input_line_count, type_name, KgtkValueFields.TEXT_FIELD_NAME), file=self.error_file, flush=True) elif len(text_val) == 1: valid = False if self.verbose: print("Input line %d: data type '%s': %s field is too short" % (input_line_count, type_name, KgtkValueFields.TEXT_FIELD_NAME), file=self.error_file, flush=True) else: if not text_val.startswith('"'): valid = False if self.verbose: print( "Input line %d: data type '%s': %s field does not start with a double quote" % (input_line_count, type_name, KgtkValueFields.TEXT_FIELD_NAME), file=self.error_file, flush=True) if not text_val.endswith('"'): valid = False if self.verbose: print( "Input line %d: data type '%s': %s field does not end with a double quote" % (input_line_count, type_name, KgtkValueFields.TEXT_FIELD_NAME), file=self.error_file, flush=True) value: str = "" if valid: # This subterfuge uses Python's literal parser to parse the string. if not self.escape_pipes: # ast.literal_eval(...) doesn't treat backslash pipe (\|) as an escaped pipe (|). # (this is documented behavior) so we will remove escaped pipes manually. text_val = text_val.replace('\\|', '|') value = KgtkFormat.stringify(ast.literal_eval(text_val)) if valid and self.validate: kv: KgtkValue = KgtkValue(value, options=self.value_options) valid = kv.is_string(validate=True) if not valid: if self.verbose: print( "Input line %d: data type '%s': imploded value '%s' is not a valid string." % (input_line_count, type_name, value), file=self.error_file, flush=True) return value, valid
def run(input_file: KGTKFiles, attr_syn_file: KGTKFiles): # import modules locally import sys # type: ignore from kgtk.exceptions import kgtk_exception_auto_handler import csv import json import re from pathlib import Path from collections import defaultdict from kgtk.kgtkformat import KgtkFormat out_columns = [ 'node1', 'relation', 'node2', 'node1_label', 'node2_label', 'relation_label', 'relation_dimension', 'source', 'sentence' ] proximity_relation = '/r/LocatedNear' property_relation = 'mw:MayHaveProperty' property_relation_label = KgtkFormat.stringify('may have property') capableof_relation = '/r/CapableOf' capableof_relation_label = KgtkFormat.stringify('capable of') def create_edge(node1, node1_lbl, node2, node2_lbl, rel, rel_lbl, image_id): my_row = [ node1, rel, node2, '|'.join(node1_lbl), '|'.join(node2_lbl), rel_lbl, '', KgtkFormat.stringify('VG'), '' ] return '\t'.join(my_row) + '\n' def header_to_edge(row): row = [r.replace('_', ';') for r in row] return '\t'.join(row) + '\n' def create_uri(ns, rel): return '%s:%s' % (ns, rel) try: scene_graph_filename: Path = KGTKArgumentParser.get_input_file( input_file) attr_synsets_filename: Path = KGTKArgumentParser.get_input_file( attr_syn_file) with open(scene_graph_filename, 'r') as f: images_data = json.load(f) with open(attr_synsets_filename, 'r') as f: attr_synsets = json.load(f) sys.stdout.write(header_to_edge(out_columns)) for counter, an_image in enumerate(images_data): image_id = str(an_image['image_id']) # OBJECTS objid2names = defaultdict(list) objid2syns = {} rows = [] for o in an_image['objects']: obj_id = o['object_id'] o_synset = o['synsets'] objid2syns[obj_id] = o_synset for name in o['names']: name = name.strip().lower().rstrip('.') if not name: continue objid2names[obj_id].append(KgtkFormat.stringify(name)) # ATTRIBUTES if 'attributes' in o.keys(): for attr in o['attributes']: attr = attr.lower() if attr in attr_synsets: asyn = attr_synsets[attr] apos = asyn.split('.')[1] if apos != 'n': if apos == 'v': # verb for osyn in o_synset: if osyn != asyn: edge_row = create_edge( 'wn:' + osyn, objid2names[obj_id], 'wn:' + asyn, [KgtkFormat.stringify(attr)], capableof_relation, capableof_relation_label, image_id) if edge_row not in rows: rows.append(edge_row) else: #adjective for osyn in o_synset: if osyn != asyn: edge_row = create_edge( 'wn:' + osyn, objid2names[obj_id], 'wn:' + asyn, [KgtkFormat.stringify(attr)], property_relation, property_relation_label, image_id) if edge_row not in rows: rows.append(edge_row) # RELATIONS for rel in an_image['relationships']: #synsets=rel['synsets'] relation_label = KgtkFormat.stringify( rel['predicate'].lower().strip().strip('.')) sub_id = rel['subject_id'] sub_names = objid2names[sub_id] sub_syns = objid2syns[sub_id] obj_id = rel['object_id'] obj_names = objid2names[obj_id] obj_syns = objid2syns[obj_id] for ssyn in sub_syns: for osyn in obj_syns: if osyn != ssyn: edge_row = create_edge('wn:' + ssyn, sub_names, 'wn:' + osyn, obj_names, proximity_relation, relation_label, image_id) if edge_row not in rows: rows.append(edge_row) for a_row in rows: sys.stdout.write(a_row) except Exception as e: kgtk_exception_auto_handler(e)
def run(output_file: KGTKFiles): # import modules locally import sys # type: ignore from kgtk.exceptions import kgtk_exception_auto_handler import json import nltk nltk.download("wordnet") from nltk.corpus import wordnet as wn from kgtk.kgtkformat import KgtkFormat from kgtk.io.kgtkwriter import KgtkWriter def obtain_wordnet_lemmas(syn): lemmas = [] for lemma in syn.lemma_names(): lemmas.append(KgtkFormat.stringify(lemma.replace('_', ' '))) return lemmas def obtain_hypernyms(syn): hyps = [] for hypernym in syn.hypernyms(): hyps.append(hypernym.name()) return hyps def obtain_member_holonyms(syn): hols = [] for hol in syn.member_holonyms(): hols.append(hol.name()) return hols def obtain_part_holonyms(syn): hols = [] for hol in syn.part_holonyms(): hols.append(hol.name()) return hols def obtain_substance_meronyms(syn): hols = [] for hol in syn.substance_meronyms(): hols.append(hol.name()) return hols def get_wn_data(): syns = list(wn.all_synsets()) all_labels = {} all_hyps = {} all_members = {} all_parts = {} all_subs = {} for syn in syns: syn_name = syn.name() lemmas = obtain_wordnet_lemmas(syn) all_labels[syn_name] = '|'.join(lemmas) hypernyms = obtain_hypernyms(syn) if len(hypernyms): all_hyps[syn_name] = hypernyms member_holonyms = obtain_member_holonyms(syn) if len(member_holonyms): all_members[syn_name] = member_holonyms part_holonyms = obtain_part_holonyms(syn) if len(part_holonyms): all_parts[syn_name] = part_holonyms substance_meronyms = obtain_substance_meronyms(syn) if len(substance_meronyms): all_subs[syn_name] = substance_meronyms return all_labels, all_hyps, all_members, all_parts, all_subs def create_edges(data, labels, rel, rel_label): all_rows = [] source = KgtkFormat.stringify('WN') for node1, v in data.items(): for node2 in v: node1_preflabel = labels[node1].split('|')[0] node2_preflabel = labels[node2].split('|')[0] a_row = [ 'wn:' + node1, rel, 'wn:' + node2, labels[node1], labels[node2], rel_label, "", source, '' ] all_rows.append(a_row) return all_rows try: out_columns = [ 'node1', 'relation', 'node2', 'node1;label', 'node2;label', 'relation;label', 'relation;dimension', 'source', 'sentence' ] output_kgtk_file: Path = KGTKArgumentParser.get_output_file( output_file) ew: KgtkWriter = KgtkWriter.open( out_columns, output_kgtk_file, #mode=input_kr.mode, require_all_columns=False, prohibit_extra_columns=True, fill_missing_columns=True, gzip_in_parallel=False, #verbose=self.verbose, #very_verbose=self.very_verbose ) all_labels, all_hyps, all_members, all_parts, all_subs = get_wn_data() hyp_edges = create_edges(all_hyps, all_labels, '/r/IsA', KgtkFormat.stringify('is a')) member_edges = create_edges(all_members, all_labels, '/r/PartOf', KgtkFormat.stringify('is a part of')) part_edges = create_edges(all_parts, all_labels, '/r/PartOf', KgtkFormat.stringify('is a part of')) sub_edges = create_edges(all_subs, all_labels, '/r/MadeOf', KgtkFormat.stringify('is made of')) all_edges = hyp_edges + member_edges + part_edges + sub_edges for edge in all_edges: ew.write(edge) # Clean up. ew.close() except Exception as e: kgtk_exception_auto_handler(e)
def run(input_file: KGTKFiles, attr_syn_file: KGTKFiles, output_file: KGTKFiles): # import modules locally import sys # type: ignore from kgtk.exceptions import kgtk_exception_auto_handler import csv import json import re from pathlib import Path from collections import defaultdict from kgtk.kgtkformat import KgtkFormat from kgtk.io.kgtkwriter import KgtkWriter def create_edge(node1, node1_lbl, node2, node2_lbl, rel, rel_lbl, image_id): my_row = [ node1, rel, node2, '|'.join(node1_lbl), '|'.join(node2_lbl), rel_lbl, '', KgtkFormat.stringify('VG'), '' ] return my_row try: scene_graph_filename: Path = KGTKArgumentParser.get_input_file( input_file) attr_synsets_filename: Path = KGTKArgumentParser.get_input_file( attr_syn_file) out_columns = [ 'node1', 'relation', 'node2', 'node1;label', 'node2;label', 'relation;label', 'relation;dimension', 'source', 'sentence' ] output_kgtk_file: Path = KGTKArgumentParser.get_output_file( output_file) ew: KgtkWriter = KgtkWriter.open( out_columns, output_kgtk_file, #mode=input_kr.mode, require_all_columns=False, prohibit_extra_columns=True, fill_missing_columns=True, gzip_in_parallel=False, #verbose=self.verbose, #very_verbose=self.very_verbose ) proximity_relation = '/r/LocatedNear' property_relation = 'mw:MayHaveProperty' property_relation_label = KgtkFormat.stringify('may have property') capableof_relation = '/r/CapableOf' capableof_relation_label = KgtkFormat.stringify('capable of') with open(scene_graph_filename, 'r') as f: images_data = json.load(f) with open(attr_synsets_filename, 'r') as f: attr_synsets = json.load(f) for counter, an_image in enumerate(images_data): image_id = str(an_image['image_id']) # OBJECTS objid2names = defaultdict(list) objid2syns = {} rows = [] for o in an_image['objects']: obj_id = o['object_id'] o_synset = o['synsets'] objid2syns[obj_id] = o_synset for name in o['names']: name = name.strip().lower().rstrip('.') if not name: continue objid2names[obj_id].append(KgtkFormat.stringify(name)) # ATTRIBUTES if 'attributes' in o.keys(): for attr in o['attributes']: attr = attr.lower() if attr in attr_synsets: asyn = attr_synsets[attr] apos = asyn.split('.')[1] if apos != 'n': if apos == 'v': # verb for osyn in o_synset: if osyn != asyn: edge_row = create_edge( 'wn:' + osyn, objid2names[obj_id], 'wn:' + asyn, [KgtkFormat.stringify(attr)], capableof_relation, capableof_relation_label, image_id) if edge_row not in rows: rows.append(edge_row) else: #adjective for osyn in o_synset: if osyn != asyn: edge_row = create_edge( 'wn:' + osyn, objid2names[obj_id], 'wn:' + asyn, [KgtkFormat.stringify(attr)], property_relation, property_relation_label, image_id) if edge_row not in rows: rows.append(edge_row) # RELATIONS for rel in an_image['relationships']: #synsets=rel['synsets'] relation_label = KgtkFormat.stringify( rel['predicate'].lower().strip().strip('.')) sub_id = rel['subject_id'] sub_names = objid2names[sub_id] sub_syns = objid2syns[sub_id] obj_id = rel['object_id'] obj_names = objid2names[obj_id] obj_syns = objid2syns[obj_id] for ssyn in sub_syns: for osyn in obj_syns: if osyn != ssyn: edge_row = create_edge('wn:' + ssyn, sub_names, 'wn:' + osyn, obj_names, proximity_relation, relation_label, image_id) if edge_row not in rows: rows.append(edge_row) for a_row in rows: ew.write(a_row) # Clean up ew.close() except Exception as e: kgtk_exception_auto_handler(e)
def obtain_wordnet_lemmas(syn): lemmas = [] for lemma in syn.lemma_names(): lemmas.append(KgtkFormat.stringify(lemma.replace('_', ' '))) return lemmas
def make_node_label(node): return KgtkFormat.stringify(node[3:])