def __init__(self, logger, positive_rel_filepath, negative_rel_filepath, vocab): self.logger = logger self.pos_relations_parents = [] self.pos_relations_children = [] rels = Relations(positive_rel_filepath, reverse=False) for node_parent, node_child in rels: assert node_parent != node_child node_parent_idx = vocab[node_parent].index node_child_idx = vocab[node_child].index self.pos_relations_parents.append(node_parent_idx) self.pos_relations_children.append(node_child_idx) self.neg_relations_parents = [] self.neg_relations_children = [] rels = Relations(negative_rel_filepath, reverse=False) for node_parent, node_child in rels: assert node_parent != node_child node_parent_idx = vocab[node_parent].index node_child_idx = vocab[node_child].index self.neg_relations_parents.append(node_parent_idx) self.neg_relations_children.append(node_child_idx) logger.info('eval datasets file pos = ' + positive_rel_filepath + ' neg = ' + negative_rel_filepath + '; eval num rels pos = ' + str(len(self.pos_relations_parents)) + ' neg = ' + str(len(self.neg_relations_parents)))
def apply(self, action): if action.name == "shift": token = self.buffer.consume() sg = action.argv.get() if self.stage == "COLLECT": Resources.phrasetable[token.word+"_"+token.pos][action.argv.get(None, Variables())] += 1 if token.ne == "ORGANIZATION" and token.word not in Resources.seen_org: Resources.seen_org.append(token.word) Resources.forg.write(token.word) for node in sg.nodes: if node.isConst == False and node.concept.strip() != "": Resources.forg.write(" " + node.concept) Resources.forg.write("\n") test = [] for n in sg.nodes: if len([r for r in sg.relations if r[1] == n]) == 0: # push only root self.stack.push(n) test.append(n) break tmprels = Relations() for n1, n2, label in sg.relations: self.stack.relations.add(n1, n2, label) tmprels.add(n1, n2, label) self.counter += 1 if len(sg.nodes) == 0: graph = "NULL" elif tmprels == Relations(): graph = "(" + sg.nodes[0].concept + ")" else: graph, _, _ = tostring.to_string(tmprels.triples(), "TOP") elif action.name == "reduce": node = self.stack.pop() if action.argv is not None: s, label, _ = action.argv self.stack.relations.add(node, s, label) elif action.name == "larc": label = action.argv child = self.stack.get(1) top = self.stack.top() assert (top is not None and child is not None) self.stack.relations.add(top, child, label) self.stack.pop(1) elif action.name == "rarc": label = action.argv child = self.stack.get(1) top = self.stack.top() assert (top is not None and child is not None) self.stack.relations.add(child, top, label) else: raise ValueError("action not defined")
def build_model(self): train_path = self.input()["data"]["train"].path train_data = Relations(train_path, reverse=False) cls = self.get_model_class() model = cls(train_data=train_data, dim=self.dim, init_range=(self.init_range_min, self.init_range_max), lr=self.lr, opt=self.opt, # rsgd or exp_map burn_in=self.burn_in, seed=self.seed, num_negative=self.num_negative, neg_sampl_strategy=self.neg_sampl_strategy, where_not_to_sample=self.where_not_to_sample, neg_edges_attach=self.neg_edges_attach, always_v_in_neg = self.always_v_in_neg, neg_sampling_power=self.neg_sampling_power, logger=self.logger, # model-specific parameters **self.model_parameters ) return model
def initialise_app(max_relations_to_load): """Precomputes values shared across requests to this app. The registry property is intended for storing these precomputed values, so as to avoid global variables. """ # Connect to the database: db = DatabaseConnection(path_config='db_config.yaml') schema = db.get_latest_schema('prod_') db.execute('SET search_path to ' + schema + ';') app.registry['db'] = db # Retrieve list of relationship edges: q = """ SELECT eid, eid_relation, stakeholder_type_id FROM related LIMIT %s; """ q_data = [max_relations_to_load] edge_list = [] for row in db.query(q, q_data): edge_type = row['stakeholder_type_id'] or 0 edge_list.append((row['eid'], row['eid_relation'], +1 * edge_type)) edge_list.append((row['eid_relation'], row['eid'], -1 * edge_type)) # Construct Relations object from the edge list: relations = Relations(edge_list) app.registry['relations'] = relations # TEMP: Construct Relations using old database data: db_old = DatabaseConnection(path_config='db_config_old.yaml', search_path='mysql') app.registry['db_old'] = db_old q = """SELECT eid1, eid2, length FROM related LIMIT %s;""" q_data = [max_relations_to_load] edge_list_old = [] for row in db_old.query(q, q_data): edge_list_old.append((row['eid1'], row['eid2'], float(row['length']))) edge_list_old.append((row['eid2'], row['eid1'], float(row['length']))) relations_old = Relations(edge_list_old) app.registry['relations_old'] = relations_old
def __init__(self, embs, relations, tokens, dependencies, alignments, oracle, hooks, variables, stage, rules): self.semicol_gen_and = False self.hooks = hooks self.variables = variables self.buffer = Buffer(embs, tokens, alignments) self.embs = embs self.stage = stage self.dependencies = Dependencies([(self.buffer.tokens[i1],label,self.buffer.tokens[i2]) for (i1,label,i2) in dependencies]) self.stack = Stack(embs) self.oracle = oracle self.rules = rules if relations is not None: self.gold = Relations(copy.deepcopy(relations)) else: self.gold = None self.sentence = " ".join([t.word for t in tokens]) self.counter = 0
def _initialise_relations(db, max_relations_to_load): """Returns Relations object build from edges in database `db`.""" # Retrieve list of relationship edges: q = """ SELECT eid, eid_relation, stakeholder_type_id FROM related WHERE eid <> eid_relation LIMIT %s; """ q_data = [max_relations_to_load] edge_list = [] for row in db.query(q, q_data): edge_type = row['stakeholder_type_id'] or 0 edge_list.append((row['eid'], row['eid_relation'], +1 * edge_type)) edge_list.append((row['eid_relation'], row['eid'], -1 * edge_type)) print('[OK] Received %d edges.' % (len(edge_list))) # Construct and return Relations object from the edge list: return Relations(edge_list)
def download_from_wikidata() -> None: parser = argparse.ArgumentParser() parser.add_argument("--datapath", default=None, type=str, required=True, help="") parser.add_argument("--outpath", default=None, type=str, required=True, help="") parser.add_argument("--use", action="store_true", help="") args = parser.parse_args() t = Relations(args.datapath) filenames = t.get_available_filenames() t.load_data(filenames) entities = t.get_all_entities(["obj_uri", "sub_uri"]) base_url = "https://www.wikidata.org/wiki/Special:EntityData/{}.json" for entity in tqdm.tqdm(entities): download_entity(base_url.format(entity), os.path.join(args.outpath, entity + ".json"))
def __init__(self, embs): root = Node(True) self.embs = embs self.nodes = [root] self.relations = Relations()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--data", default=None, type=str, required=True, help="") parser.add_argument("--entities", default=None, type=str, required=True, help="") parser.add_argument("--outpath", default=None, type=str, required=True, help="") parser.add_argument("--languagemapping", default=None, type=str, required=True, help="") args = parser.parse_args() lang2translateid = load_languagemapping(args.languagemapping) for lang in lang2translateid: t = Relations(args.data) filenames = t.get_available_filenames() t.load_data(filenames) count = collections.Counter() logfile = open(os.path.join(args.outpath, lang + ".log"), "w") for filename, relations in t.data.items(): LOG.info("Processing relation: {}".format(filename)) outdirectory = os.path.join(args.outpath, lang) os.makedirs(outdirectory, exist_ok=True) with open(os.path.join(outdirectory, filename + ".jsonl"), "w") as fout: for relation in relations: count["in_file"] += 1 if ("sub_uri" in relation and "obj_uri" in relation and "sub_label" in relation and "obj_label" in relation): count["available"] += 1 obj_uri = relation["obj_uri"] sub_uri = relation["sub_uri"] # load entitiy information obj_surface = get_entity_surface( args.entities, obj_uri, lang) sub_surface = get_entity_surface( args.entities, sub_uri, lang) # write out if obj_surface and sub_surface: count["converted"] += 1 to_write = { "sub_uri": sub_uri, "obj_uri": obj_uri, "obj_label": obj_surface, "sub_label": sub_surface, "from_english": False } else: # use english surface forms to_write = { "sub_uri": sub_uri, "obj_uri": obj_uri, "obj_label": relation["obj_label"], "sub_label": relation["sub_label"], "from_english": True } fout.write(json.dumps(to_write) + "\n") summary = "{}|{}|{}|(converted/available/in_file)".format( count["converted"], count["available"], count["in_file"]) LOG.info(summary) logfile.write("{}|{}\n".format(filename, summary)) logfile.close()
def __init__(self, relations): self.gold = Relations(copy.deepcopy(relations))