Пример #1
0
    def __init__(self, logger, positive_rel_filepath, negative_rel_filepath,
                 vocab):
        self.logger = logger

        self.pos_relations_parents = []
        self.pos_relations_children = []
        rels = Relations(positive_rel_filepath, reverse=False)
        for node_parent, node_child in rels:
            assert node_parent != node_child
            node_parent_idx = vocab[node_parent].index
            node_child_idx = vocab[node_child].index
            self.pos_relations_parents.append(node_parent_idx)
            self.pos_relations_children.append(node_child_idx)

        self.neg_relations_parents = []
        self.neg_relations_children = []
        rels = Relations(negative_rel_filepath, reverse=False)
        for node_parent, node_child in rels:
            assert node_parent != node_child
            node_parent_idx = vocab[node_parent].index
            node_child_idx = vocab[node_child].index
            self.neg_relations_parents.append(node_parent_idx)
            self.neg_relations_children.append(node_child_idx)

        logger.info('eval datasets file pos = ' + positive_rel_filepath +
                    '  neg = ' + negative_rel_filepath +
                    '; eval num rels pos = ' +
                    str(len(self.pos_relations_parents)) + '  neg = ' +
                    str(len(self.neg_relations_parents)))
Пример #2
0
    def apply(self, action):
        if action.name == "shift":
            token = self.buffer.consume()
            sg = action.argv.get()
            if self.stage == "COLLECT":
                Resources.phrasetable[token.word+"_"+token.pos][action.argv.get(None, Variables())] += 1
                if token.ne == "ORGANIZATION" and token.word not in Resources.seen_org:
                    Resources.seen_org.append(token.word)
                    Resources.forg.write(token.word)
                    for node in sg.nodes:
                        if node.isConst == False and node.concept.strip() != "":
                            Resources.forg.write(" " + node.concept)
                    Resources.forg.write("\n")

            test = []
            for n in sg.nodes:
                if len([r for r in sg.relations if r[1] == n]) == 0: # push only root
                    self.stack.push(n)
                    test.append(n)
                    break

            tmprels = Relations()
            for n1, n2, label in sg.relations:
                    self.stack.relations.add(n1, n2, label)
                    tmprels.add(n1, n2, label)
            self.counter += 1
            if len(sg.nodes) == 0:
                graph = "NULL"
            elif tmprels == Relations():
                graph = "(" + sg.nodes[0].concept + ")"
            else:
                graph, _, _ = tostring.to_string(tmprels.triples(), "TOP")
        elif action.name == "reduce":
            node = self.stack.pop()
            if action.argv is not None:
                s, label, _ = action.argv
                self.stack.relations.add(node, s, label)

        elif action.name == "larc":
            label = action.argv
            child = self.stack.get(1)
            top = self.stack.top()
            assert (top is not None and child is not None)

            self.stack.relations.add(top, child, label)
            self.stack.pop(1)

        elif action.name == "rarc":
            label = action.argv
            child = self.stack.get(1)
            top = self.stack.top()
            assert (top is not None and child is not None)

            self.stack.relations.add(child, top, label)

        else:
            raise ValueError("action not defined")
Пример #3
0
    def build_model(self):
        train_path = self.input()["data"]["train"].path
        train_data = Relations(train_path, reverse=False)

        cls = self.get_model_class()

        model = cls(train_data=train_data,
                dim=self.dim,
                init_range=(self.init_range_min, self.init_range_max),
                lr=self.lr,
                opt=self.opt,  # rsgd or exp_map
                burn_in=self.burn_in,
                seed=self.seed,

                num_negative=self.num_negative,
                neg_sampl_strategy=self.neg_sampl_strategy,
                where_not_to_sample=self.where_not_to_sample,
                neg_edges_attach=self.neg_edges_attach,
                always_v_in_neg = self.always_v_in_neg,
                neg_sampling_power=self.neg_sampling_power,

                logger=self.logger,

                # model-specific parameters
                **self.model_parameters
                )

        return model
Пример #4
0
def initialise_app(max_relations_to_load):
    """Precomputes values shared across requests to this app.

  The registry property is intended for storing these precomputed
  values, so as to avoid global variables.
  """

    # Connect to the database:
    db = DatabaseConnection(path_config='db_config.yaml')
    schema = db.get_latest_schema('prod_')
    db.execute('SET search_path to ' + schema + ';')
    app.registry['db'] = db

    # Retrieve list of relationship edges:
    q = """
      SELECT eid, eid_relation, stakeholder_type_id
      FROM related
      LIMIT %s;
      """
    q_data = [max_relations_to_load]
    edge_list = []
    for row in db.query(q, q_data):
        edge_type = row['stakeholder_type_id'] or 0
        edge_list.append((row['eid'], row['eid_relation'], +1 * edge_type))
        edge_list.append((row['eid_relation'], row['eid'], -1 * edge_type))

    # Construct Relations object from the edge list:
    relations = Relations(edge_list)
    app.registry['relations'] = relations

    # TEMP: Construct Relations using old database data:
    db_old = DatabaseConnection(path_config='db_config_old.yaml',
                                search_path='mysql')
    app.registry['db_old'] = db_old
    q = """SELECT eid1, eid2, length FROM related LIMIT %s;"""
    q_data = [max_relations_to_load]
    edge_list_old = []
    for row in db_old.query(q, q_data):
        edge_list_old.append((row['eid1'], row['eid2'], float(row['length'])))
        edge_list_old.append((row['eid2'], row['eid1'], float(row['length'])))
    relations_old = Relations(edge_list_old)
    app.registry['relations_old'] = relations_old
Пример #5
0
 def __init__(self, embs, relations, tokens, dependencies, alignments, oracle, hooks, variables, stage, rules):
     self.semicol_gen_and = False
     self.hooks = hooks
     self.variables = variables
     self.buffer = Buffer(embs, tokens, alignments)
     self.embs = embs
     self.stage = stage
     self.dependencies = Dependencies([(self.buffer.tokens[i1],label,self.buffer.tokens[i2]) for (i1,label,i2) in dependencies])
     self.stack = Stack(embs)
     self.oracle = oracle
     self.rules = rules
     if relations is not None:
         self.gold = Relations(copy.deepcopy(relations))
     else:
         self.gold = None
     self.sentence = " ".join([t.word for t in tokens])
     self.counter = 0
Пример #6
0
def _initialise_relations(db, max_relations_to_load):
    """Returns Relations object build from edges in database `db`."""

    # Retrieve list of relationship edges:
    q = """
      SELECT eid, eid_relation, stakeholder_type_id
      FROM related WHERE eid <> eid_relation
      LIMIT %s;
      """
    q_data = [max_relations_to_load]
    edge_list = []
    for row in db.query(q, q_data):
        edge_type = row['stakeholder_type_id'] or 0
        edge_list.append((row['eid'], row['eid_relation'], +1 * edge_type))
        edge_list.append((row['eid_relation'], row['eid'], -1 * edge_type))
    print('[OK] Received %d edges.' % (len(edge_list)))

    # Construct and return Relations object from the edge list:
    return Relations(edge_list)
Пример #7
0
def download_from_wikidata() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--datapath",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    parser.add_argument("--outpath",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    parser.add_argument("--use", action="store_true", help="")
    args = parser.parse_args()
    t = Relations(args.datapath)
    filenames = t.get_available_filenames()
    t.load_data(filenames)
    entities = t.get_all_entities(["obj_uri", "sub_uri"])
    base_url = "https://www.wikidata.org/wiki/Special:EntityData/{}.json"
    for entity in tqdm.tqdm(entities):
        download_entity(base_url.format(entity),
                        os.path.join(args.outpath, entity + ".json"))
Пример #8
0
 def __init__(self, embs):
     root = Node(True)
     self.embs = embs
     self.nodes = [root]
     self.relations = Relations()
Пример #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--data",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    parser.add_argument("--entities",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    parser.add_argument("--outpath",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    parser.add_argument("--languagemapping",
                        default=None,
                        type=str,
                        required=True,
                        help="")
    args = parser.parse_args()
    lang2translateid = load_languagemapping(args.languagemapping)

    for lang in lang2translateid:
        t = Relations(args.data)
        filenames = t.get_available_filenames()
        t.load_data(filenames)
        count = collections.Counter()
        logfile = open(os.path.join(args.outpath, lang + ".log"), "w")
        for filename, relations in t.data.items():
            LOG.info("Processing relation: {}".format(filename))
            outdirectory = os.path.join(args.outpath, lang)
            os.makedirs(outdirectory, exist_ok=True)
            with open(os.path.join(outdirectory, filename + ".jsonl"),
                      "w") as fout:
                for relation in relations:
                    count["in_file"] += 1
                    if ("sub_uri" in relation and "obj_uri" in relation
                            and "sub_label" in relation
                            and "obj_label" in relation):
                        count["available"] += 1
                        obj_uri = relation["obj_uri"]
                        sub_uri = relation["sub_uri"]
                        # load entitiy information
                        obj_surface = get_entity_surface(
                            args.entities, obj_uri, lang)
                        sub_surface = get_entity_surface(
                            args.entities, sub_uri, lang)
                        # write out
                        if obj_surface and sub_surface:
                            count["converted"] += 1
                            to_write = {
                                "sub_uri": sub_uri,
                                "obj_uri": obj_uri,
                                "obj_label": obj_surface,
                                "sub_label": sub_surface,
                                "from_english": False
                            }
                        else:
                            # use english surface forms
                            to_write = {
                                "sub_uri": sub_uri,
                                "obj_uri": obj_uri,
                                "obj_label": relation["obj_label"],
                                "sub_label": relation["sub_label"],
                                "from_english": True
                            }
                        fout.write(json.dumps(to_write) + "\n")
            summary = "{}|{}|{}|(converted/available/in_file)".format(
                count["converted"], count["available"], count["in_file"])
            LOG.info(summary)
            logfile.write("{}|{}\n".format(filename, summary))
        logfile.close()
Пример #10
0
 def __init__(self, relations):
     self.gold = Relations(copy.deepcopy(relations))