Пример #1
0
    def load_triples(self, triples_in: Union[np.ndarray, List[np.ndarray]]):
        """
        Loads triples from arrays, creates mappings and returns the mapped triples
        :param data_paths: The paths for all files that are going to be used for training and testing
        :return: List where each items represents the mapped triples of a file
        """

        if isinstance(triples_in, np.ndarray):
            all_triples = triples_in
            self.entity_label_to_id, self.relation_label_to_id = create_mappings(
                triples=all_triples)
            mapped_triples, _, _ = create_mapped_triples(
                triples=all_triples,
                entity_label_to_id=self.entity_label_to_id,
                relation_label_to_id=self.relation_label_to_id)
        else:
            all_triples: np.ndarray = np.concatenate(triples_in, axis=0)
            self.entity_label_to_id, self.relation_label_to_id = create_mappings(
                triples=all_triples)
            mapped_triples = [
                create_mapped_triples(
                    triples,
                    entity_label_to_id=self.entity_label_to_id,
                    relation_label_to_id=self.relation_label_to_id)[0]
                for triples in triples_in
            ]

        self.num_entities = len(self.entity_label_to_id)
        self.num_relations = len(self.relation_label_to_id)

        return mapped_triples
Пример #2
0
    def _handle_train_and_test(self, train_pos, test_pos) -> Tuple[np.ndarray, np.ndarray]:
        """"""
        all_triples: np.ndarray = np.concatenate([train_pos, test_pos], axis=0)
        self.entity_label_to_id, self.relation_label_to_id = create_mappings(triples=all_triples)

        mapped_pos_train_triples, _, _ = create_mapped_triples(
            triples=train_pos,
            entity_label_to_id=self.entity_label_to_id,
            relation_label_to_id=self.relation_label_to_id,
        )

        mapped_pos_test_triples, _, _ = create_mapped_triples(
            triples=test_pos,
            entity_label_to_id=self.entity_label_to_id,
            relation_label_to_id=self.relation_label_to_id,
        )

        return mapped_pos_train_triples, mapped_pos_test_triples
Пример #3
0
    def _get_train_triples(self):
        train_pos = load_data(self.config[pkc.TRAINING_SET_PATH])

        self.entity_label_to_id, self.relation_label_to_id = create_mappings(triples=train_pos)

        mapped_pos_train_triples, _, _ = create_mapped_triples(
            triples=train_pos,
            entity_label_to_id=self.entity_label_to_id,
            relation_label_to_id=self.relation_label_to_id,
        )

        return mapped_pos_train_triples
Пример #4
0
    def map_triples(self, triples_in: Union[np.ndarray, List[np.ndarray]]):
        """
        Loads triples and returns the mapped triples given the mappings of the model
        :param data_paths: The paths for the triples files that should be mapped
        :return: List where each items represents the mapped triples of a file
        """

        if isinstance(triples_in, np.ndarray):
            mapped_triples, _, _ = create_mapped_triples(
                triples=triples_in,
                entity_label_to_id=self.entity_label_to_id,
                relation_label_to_id=self.relation_label_to_id)
        else:
            mapped_triples = [
                create_mapped_triples(
                    triples=triples,
                    entity_label_to_id=self.entity_label_to_id,
                    relation_label_to_id=self.relation_label_to_id)[0]
                for triples in triples_in
            ]

        return mapped_triples
Пример #5
0
def _get_train_triples(data_path: str):
    train_pos = load_data(data_path)

    entity_label_to_id, relation_label_to_id = create_mappings(
        triples=train_pos)

    mapped_pos_train_triples, _, _ = create_mapped_triples(
        triples=train_pos,
        entity_label_to_id=entity_label_to_id,
        relation_label_to_id=relation_label_to_id,
    )

    return mapped_pos_train_triples, entity_label_to_id, relation_label_to_id
Пример #6
0
def make_predictions(
    kge_model,
    entities,
    relations,
    entity_to_id,
    rel_to_id,
    device,
    blacklist_path=None,
):
    all_entity_pairs = np.array(list(product(entities, entities)))

    if relations.size == 1:
        all_triples = create_triples(entity_pairs=all_entity_pairs,
                                     relation=relations)
    else:
        all_triples = create_triples(entity_pairs=all_entity_pairs,
                                     relation=relations[0])

        for relation in relations[1:]:
            triples = create_triples(entity_pairs=all_entity_pairs,
                                     relation=relation)
            all_triples = np.append(all_triples, triples, axis=0)

    if blacklist_path is not None:
        blacklisted_triples = load_data(blacklist_path)
        blacklisted_triples = pd.DataFrame(data=blacklisted_triples)
        all_triples = pd.DataFrame(data=all_triples)
        merged = all_triples.merge(blacklisted_triples,
                                   indicator=True,
                                   how='outer')
        merged = merged[merged['_merge'] == 'left_only'].values
        all_triples = np.array(merged[:, :-1], dtype=np.str)

    mapped_triples, _, _ = create_mapped_triples(
        all_triples,
        entity_label_to_id=entity_to_id,
        relation_label_to_id=rel_to_id)

    mapped_triples = torch.tensor(mapped_triples,
                                  dtype=torch.long,
                                  device=device)

    id_to_entity = {value: key for key, value in entity_to_id.items()}
    id_to_relation = {value: key for key, value in rel_to_id.items()}
    subject_column = np.vectorize(id_to_entity.get)(mapped_triples[:, 0:1])
    predicate_column = np.vectorize(id_to_relation.get)(mapped_triples[:, 1:2])
    object_column = np.vectorize(id_to_entity.get)(mapped_triples[:, 2:3])

    triples_ordered_by_ids = np.concatenate(
        [subject_column, predicate_column, object_column], axis=1)

    predicted_scores = kge_model.predict(mapped_triples)

    _, sorted_indices = torch.sort(torch.tensor(predicted_scores,
                                                dtype=torch.float),
                                   descending=False)

    sorted_indices = sorted_indices.cpu().numpy()

    ranked_triples = triples_ordered_by_ids[sorted_indices, :]

    subs = np.reshape(ranked_triples[:, 0:1], newshape=(-1))
    objs = np.reshape(ranked_triples[:, 2:3], newshape=(-1))

    indices = np.where(subs != objs)[0]

    ranked_scores = np.reshape(predicted_scores[sorted_indices],
                               newshape=(-1, 1))
    ranked_triples = np.concatenate([ranked_triples, ranked_scores], axis=1)

    return ranked_triples[indices, :]