"w") as f: for e in projection.entityToPreferredLabels: for v in projection.entityToPreferredLabels[e]: f.write("%s preferred_label %s\n" % (e, v)) for a in annotations: f.write("%s\n" % " ".join(a)) # read URI document # two parts: walks, axioms (if the axiom file exists) walk_sentences, axiom_sentences, URI_Doc = list(), list(), list() if "URI_Doc" in config["DOCUMENT"] and config["DOCUMENT"]["URI_Doc"] == "yes": print("\nGenerate URI document ...") # walker_type=config['DOCUMENT']['walker'] walks_ = get_rdf2vec_walks( onto_file=ontology_file, walker_type=config["DOCUMENT"]["walker"], walk_depth=int(config["DOCUMENT"]["walk_depth"]), classes=entities, ) print("Extracted %d walks for %d seed entities" % (len(walks_), len(entities))) walk_sentences += [list(map(str, x)) for x in walks_] axiom_file = os.path.join(config["DOCUMENT"]["cache_dir"], "axioms.txt") if os.path.exists(axiom_file): for line in open(axiom_file).readlines(): axiom_sentence = [item for item in line.strip().split()] axiom_sentences.append(axiom_sentence) print("Extracted %d axiom sentences" % len(axiom_sentences)) URI_Doc = walk_sentences + axiom_sentences
print("\n 1.Extract corpus and learning embedding ... \n") classes = [line.strip() for line in open(FLAGS.class_file).readlines()] candidate_num = len(classes) uri_label = dict() annotations = list() for line in open(FLAGS.annotation_file).readlines(): tmp = line.strip().split() if tmp[1] == 'http://www.w3.org/2000/01/rdf-schema#label': uri_label[tmp[0]] = pre_process_words(tmp[2:]) elif tmp[0] in classes: annotations.append(tmp) walk_sentences, axiom_sentences = list(), list() if FLAGS.URI_Doc.lower() == 'yes': walks_ = get_rdf2vec_walks(onto_file=FLAGS.onto_file, walker_type=FLAGS.walker, walk_depth=FLAGS.walk_depth, classes=classes) print('Extracted {} walks for {} classes!'.format(len(walks_), len(classes))) walk_sentences += [list(map(str, x)) for x in walks_] for line in open(FLAGS.axiom_file).readlines(): axiom_sentence = [item for item in line.strip().split()] axiom_sentences.append(axiom_sentence) print('Extracted %d axiom sentences' % len(axiom_sentences)) URI_Doc = walk_sentences + axiom_sentences Lit_Doc = list() if FLAGS.Lit_Doc.lower() == 'yes': for annotation in annotations: processed_words = pre_process_words(annotation[2:]) if len(processed_words) > 0: Lit_Doc.append(uri_label[annotation[0]] + processed_words) print('Extracted %d literal annotations' % len(Lit_Doc))
encoding='utf-8') as f: for e in projection.entityToPreferredLabels: for v in projection.entityToPreferredLabels[e]: f.write('%s preferred_label %s\n' % (e, v)) for a in annotations: f.write('%s\n' % ' '.join(a)) # read URI document # two parts: walks, axioms (if the axiom file exists) walk_sentences, axiom_sentences, URI_Doc = list(), list(), list() if 'URI_Doc' in config['DOCUMENT'] and config['DOCUMENT']['URI_Doc'] == 'yes': print('\nGenerate URI document ...') #walker_type=config['DOCUMENT']['walker'] walks_ = get_rdf2vec_walks(onto_file=ontology_file, walker_type=config['DOCUMENT']['walker'], walk_depth=int( config['DOCUMENT']['walk_depth']), classes=entities) print('Extracted %d walks for %d seed entities' % (len(walks_), len(entities))) walk_sentences += [list(map(str, x)) for x in walks_] axiom_file = os.path.join(config['DOCUMENT']['cache_dir'], 'axioms.txt') if os.path.exists(axiom_file): for line in open(axiom_file).readlines(): axiom_sentence = [item for item in line.strip().split()] axiom_sentences.append(axiom_sentence) print('Extracted %d axiom sentences' % len(axiom_sentences)) URI_Doc = walk_sentences + axiom_sentences