Пример #1
0
    def eval(
        self,
        scores: Scores,
        batch_lhs: EntityList,
        batch_rhs: EntityList,
        batch_rel: Union[int, LongTensorType],
    ) -> Stats:
        # Assume dynamic relations.
        assert isinstance(batch_rel, torch.LongTensor)

        _, _, lhs_neg_scores, rhs_neg_scores = scores
        b = batch_lhs.size(0)
        for idx in range(b):
            # Assume non-featurized.
            cur_lhs = batch_lhs.to_tensor()[idx].item()
            cur_rel = batch_rel[idx].item()
            # Assume non-featurized.
            cur_rhs = batch_rhs.to_tensor()[idx].item()

            rhs_edges_filtered = self.lhs_map[cur_lhs, cur_rel]
            lhs_edges_filtered = self.rhs_map[cur_rhs, cur_rel]
            assert cur_lhs in lhs_edges_filtered
            assert cur_rhs in rhs_edges_filtered

            # The rank is computed as the number of non-negative margins (as
            # that means a negative with at least as good a score as a positive)
            # so to avoid counting positives we give them a negative margin.
            lhs_neg_scores[idx][lhs_edges_filtered] = -1e9
            rhs_neg_scores[idx][rhs_edges_filtered] = -1e9

        return super().eval(scores, batch_lhs, batch_rhs, batch_rel)
Пример #2
0
    def cat(cls, edge_lists: Sequence["EdgeList"]) -> "EdgeList":
        cat_lhs = EntityList.cat([el.lhs for el in edge_lists])
        cat_rhs = EntityList.cat([el.rhs for el in edge_lists])

        if any(el.has_weight() for el in edge_lists):
            if not all(el.has_weight() for el in edge_lists):
                raise RuntimeError(
                    "Can't concatenate edgelists with and without weight field."
                )
            cat_weight = torch.cat(
                [el.weight.expand((len(el), )) for el in edge_lists])
        else:
            cat_weight = None

        if all(el.has_scalar_relation_type() for el in edge_lists):
            rel_types = {el.get_relation_type_as_scalar() for el in edge_lists}
            if len(rel_types) == 1:
                (rel_type, ) = rel_types
                return cls(
                    cat_lhs,
                    cat_rhs,
                    torch.tensor(rel_type, dtype=torch.long),
                    cat_weight,
                )
        cat_rel = torch.cat([el.rel.expand((len(el), )) for el in edge_lists])

        return cls(cat_lhs, cat_rhs, cat_rel, cat_weight)
 def test_from_tensor(self):
     self.assertEqual(
         EntityList.from_tensor(torch.tensor([3, 4], dtype=torch.long)),
         EntityList(
             torch.tensor([3, 4], dtype=torch.long), TensorList.empty(num_tensors=2)
         ),
     )
Пример #4
0
 def test_empty(self):
     self.assertEqual(
         group_by_relation_type(
             torch.empty((0, ), dtype=torch.long),
             EntityList.empty(),
             EntityList.empty(),
         ),
         [],
     )
Пример #5
0
    def load_chunk_of_edges(
        self,
        lhs_p: Partition,
        rhs_p: Partition,
        chunk_idx: int = 0,
        num_chunks: int = 1,
        shared: bool = False,
    ) -> EdgeList:
        file_path = self.get_edges_file(lhs_p, rhs_p)
        try:
            with h5py.File(file_path, "r") as hf:
                if hf.attrs.get(FORMAT_VERSION_ATTR, None) != FORMAT_VERSION:
                    raise RuntimeError(
                        f"Version mismatch in edge file {file_path}")
                lhs_ds = hf["lhs"]
                rhs_ds = hf["rhs"]
                rel_ds = hf["rel"]

                num_edges = rel_ds.len()
                chunk_size = div_roundup(num_edges, num_chunks)
                begin = chunk_idx * chunk_size
                end = min((chunk_idx + 1) * chunk_size, num_edges)
                chunk_size = end - begin

                allocator = allocate_shared_tensor if shared else torch.empty
                lhs = allocator((chunk_size, ), dtype=torch.long)
                rhs = allocator((chunk_size, ), dtype=torch.long)
                rel = allocator((chunk_size, ), dtype=torch.long)

                # Needed because https://github.com/h5py/h5py/issues/870.
                if chunk_size > 0:
                    lhs_ds.read_direct(lhs.numpy(),
                                       source_sel=np.s_[begin:end])
                    rhs_ds.read_direct(rhs.numpy(),
                                       source_sel=np.s_[begin:end])
                    rel_ds.read_direct(rel.numpy(),
                                       source_sel=np.s_[begin:end])

                lhsd = self.read_dynamic(hf, "lhsd", begin, end, shared=shared)
                rhsd = self.read_dynamic(hf, "rhsd", begin, end, shared=shared)

                if "weight" in hf:
                    weight_ds = hf["weight"]
                    weight = allocator((chunk_size, ), dtype=torch.long)
                    if chunk_size > 0:
                        weight_ds.read_direct(weight.numpy(),
                                              source_sel=np.s_[begin:end])
                else:
                    weight = None
                return EdgeList(EntityList(lhs, lhsd), EntityList(rhs, rhsd),
                                rel, weight)
        except OSError as err:
            # h5py refuses to make it easy to figure out what went wrong. The errno
            # attribute is set to None. See https://github.com/h5py/h5py/issues/493.
            if f"errno = {errno.ENOENT}" in str(err):
                raise CouldNotLoadData() from err
            raise err
Пример #6
0
def append_to_file(data, appender):
    lhs_offsets, rhs_offsets, rel_ids = zip(*data)
    appender.append_edges(
        EdgeList(
            EntityList.from_tensor(torch.tensor(lhs_offsets, dtype=torch.long)),
            EntityList.from_tensor(torch.tensor(rhs_offsets, dtype=torch.long)),
            torch.tensor(rel_ids, dtype=torch.long),
        )
    )
Пример #7
0
 def test_get_relation_type_as_scalar(self):
     self.assertEqual(
         EdgeList(
             EntityList.from_tensor(torch.tensor([3, 4], dtype=torch.long)),
             EntityList.from_tensor(torch.tensor([0, 2], dtype=torch.long)),
             torch.tensor(3, dtype=torch.long),
         ).get_relation_type_as_scalar(),
         3,
     )
Пример #8
0
 def test_empty(self):
     self.assertEqual(
         EdgeList.empty(),
         EdgeList(
             EntityList.empty(),
             EntityList.empty(),
             torch.empty((0, ), dtype=torch.long),
         ),
     )
 def test_getitem_int(self):
     self.assertEqual(
         EntityList(
             torch.tensor([3, 4, 1, 0], dtype=torch.long),
             tensor_list_from_lists([[2, 1], [0], [], [3, 4, 5]]),
         )[-3],
         EntityList(torch.tensor([4], dtype=torch.long),
                    tensor_list_from_lists([[0]])),
     )
Пример #10
0
def generate_edge_path_files_fast(
    edge_file_in: Path,
    edge_path_out: Path,
    edge_storage: AbstractEdgeStorage,
    entities_by_type: Dict[str, Dictionary],
    relation_types: Dictionary,
    relation_configs: List[RelationSchema],
    edgelist_reader: EdgelistReader,
) -> None:
    processed = 0
    skipped = 0

    log("Taking the fast train!")
    data = []
    for lhs_word, rhs_word, rel_word in edgelist_reader.read(edge_file_in):
        if rel_word is None:
            rel_id = 0
        else:
            try:
                rel_id = relation_types.get_id(rel_word)
            except KeyError:
                # Ignore edges whose relation type is not known.
                skipped += 1
                continue

        lhs_type = relation_configs[rel_id].lhs
        rhs_type = relation_configs[rel_id].rhs

        try:
            _, lhs_offset = entities_by_type[lhs_type].get_partition(lhs_word)
            _, rhs_offset = entities_by_type[rhs_type].get_partition(rhs_word)
        except KeyError:
            # Ignore edges whose entities are not known.
            skipped += 1
            continue

        data.append((lhs_offset, rhs_offset, rel_id))

        processed = processed + 1
        if processed % 100000 == 0:
            log(f"- Processed {processed} edges so far...")

    lhs_offsets, rhs_offsets, rel_ids = zip(*data)
    edge_list = EdgeList(
        EntityList.from_tensor(torch.tensor(list(lhs_offsets), dtype=torch.long)),
        EntityList.from_tensor(torch.tensor(list(rhs_offsets), dtype=torch.long)),
        torch.tensor(list(rel_ids), dtype=torch.long),
    )
    edge_storage.save_edges(0, 0, edge_list)

    log(f"- Processed {processed} edges in total")
    if skipped > 0:
        log(
            f"- Skipped {skipped} edges because their relation type or "
            f"entities were unknown (either not given in the config or "
            f"filtered out as too rare)."
        )
Пример #11
0
 def cat(cls, edge_lists: Sequence["EdgeList"]) -> "EdgeList":
     cat_lhs = EntityList.cat([el.lhs for el in edge_lists])
     cat_rhs = EntityList.cat([el.rhs for el in edge_lists])
     if all(el.has_scalar_relation_type() for el in edge_lists):
         rel_types = {el.get_relation_type_as_scalar() for el in edge_lists}
         if len(rel_types) == 1:
             (rel_type,) = rel_types
             return cls(cat_lhs, cat_rhs, torch.tensor(rel_type, dtype=torch.long))
     cat_rel = torch.cat([el.rel.expand((len(el),)) for el in edge_lists])
     return EdgeList(cat_lhs, cat_rhs, cat_rel)
 def test_getitem_longtensor(self):
     self.assertEqual(
         EntityList(
             torch.tensor([3, 4, 1, 0], dtype=torch.long),
             tensor_list_from_lists([[2, 1], [0], [], [3, 4, 5]]),
         )[torch.tensor([2, 0])],
         EntityList(
             torch.tensor([1, 3], dtype=torch.long),
             tensor_list_from_lists([[], [2, 1]]),
         ),
     )
Пример #13
0
def append_to_file(data, appender):
    lhs_offsets, rhs_offsets, rel_ids, weights = zip(*data)
    weights = torch.tensor(weights) if weights[0] is not None else None
    appender.append_edges(
        EdgeList(
            EntityList.from_tensor(torch.tensor(lhs_offsets,
                                                dtype=torch.long)),
            EntityList.from_tensor(torch.tensor(rhs_offsets,
                                                dtype=torch.long)),
            torch.tensor(rel_ids, dtype=torch.long),
            weights,
        ))
Пример #14
0
 def test_get_relation_type_as_vector(self):
     self.assertTrue(
         torch.equal(
             EdgeList(
                 EntityList.from_tensor(
                     torch.tensor([3, 4], dtype=torch.long)),
                 EntityList.from_tensor(
                     torch.tensor([0, 2], dtype=torch.long)),
                 torch.tensor([2, 0], dtype=torch.long),
             ).get_relation_type_as_vector(),
             torch.tensor([2, 0], dtype=torch.long),
         ))
Пример #15
0
 def test_len(self):
     self.assertEqual(
         len(
             EdgeList(
                 EntityList.from_tensor(
                     torch.tensor([3, 4], dtype=torch.long)),
                 EntityList.from_tensor(
                     torch.tensor([0, 2], dtype=torch.long)),
                 torch.tensor([2, 0], dtype=torch.long),
             )),
         2,
     )
Пример #16
0
 def test_has_scalar_relation_type(self):
     self.assertTrue(
         EdgeList(
             EntityList.from_tensor(torch.tensor([3, 4], dtype=torch.long)),
             EntityList.from_tensor(torch.tensor([0, 2], dtype=torch.long)),
             torch.tensor(3, dtype=torch.long),
         ).has_scalar_relation_type())
     self.assertFalse(
         EdgeList(
             EntityList.from_tensor(torch.tensor([3, 4], dtype=torch.long)),
             EntityList.from_tensor(torch.tensor([0, 2], dtype=torch.long)),
             torch.tensor([2, 0], dtype=torch.long),
         ).has_scalar_relation_type())
 def test_cat(self):
     tensor_1 = torch.tensor([2, 3], dtype=torch.long)
     tensor_2 = torch.tensor([0, 1], dtype=torch.long)
     tensor_sum = torch.tensor([2, 3, 0, 1], dtype=torch.long)
     tensor_list_1 = tensor_list_from_lists([[3, 4], [0]])
     tensor_list_2 = tensor_list_from_lists([[1, 2, 0], []])
     tensor_list_sum = tensor_list_from_lists([[3, 4], [0], [1, 2, 0], []])
     self.assertEqual(
         EntityList.cat([
             EntityList(tensor_1, tensor_list_1),
             EntityList(tensor_2, tensor_list_2),
         ]),
         EntityList(tensor_sum, tensor_list_sum),
     )
Пример #18
0
 def test_basic(self):
     self.assertEqual(
         group_by_relation_type(
             torch.tensor([1, 0, 0, 1, 2, 2, 0, 0, 2, 2], dtype=torch.long),
             EntityList.from_tensor(
                 torch.tensor([93, 24, 13, 31, 70, 66, 77, 38, 5, 5],
                              dtype=torch.long)),
             EntityList.from_tensor(
                 torch.tensor([90, 75, 9, 25, 23, 31, 49, 64, 42, 50],
                              dtype=torch.long)),
         ),
         [
             (
                 EntityList.from_tensor(
                     torch.tensor([24, 13, 77, 38], dtype=torch.long)),
                 EntityList.from_tensor(
                     torch.tensor([75, 9, 49, 64], dtype=torch.long)),
                 0,
             ),
             (
                 EntityList.from_tensor(
                     torch.tensor([93, 31], dtype=torch.long)),
                 EntityList.from_tensor(
                     torch.tensor([90, 25], dtype=torch.long)),
                 1,
             ),
             (
                 EntityList.from_tensor(
                     torch.tensor([70, 66, 5, 5], dtype=torch.long)),
                 EntityList.from_tensor(
                     torch.tensor([23, 31, 42, 50], dtype=torch.long)),
                 2,
             ),
         ],
     )
Пример #19
0
 def test_getitem_int(self):
     self.assertEqual(
         EdgeList(
             EntityList.from_tensor(
                 torch.tensor([3, 4, 1, 0], dtype=torch.long)),
             EntityList.from_tensor(
                 torch.tensor([0, 2, 1, 3], dtype=torch.long)),
             torch.tensor([1, 1, 3, 0], dtype=torch.long),
         )[-3],
         EdgeList(
             EntityList.from_tensor(torch.tensor([4], dtype=torch.long)),
             EntityList.from_tensor(torch.tensor([2], dtype=torch.long)),
             torch.tensor(1, dtype=torch.long),
         ),
     )
Пример #20
0
 def test_getitem_longtensor(self):
     self.assertEqual(
         EdgeList(
             EntityList.from_tensor(
                 torch.tensor([3, 4, 1, 0], dtype=torch.long)),
             EntityList.from_tensor(
                 torch.tensor([0, 2, 1, 3], dtype=torch.long)),
             torch.tensor([1, 1, 3, 0], dtype=torch.long),
         )[torch.tensor([2, 0])],
         EdgeList(
             EntityList.from_tensor(torch.tensor([1, 3], dtype=torch.long)),
             EntityList.from_tensor(torch.tensor([1, 0], dtype=torch.long)),
             torch.tensor([3, 1], dtype=torch.long),
         ),
     )
Пример #21
0
 def test_forward(self):
     embeddings = torch.tensor(
         [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]], requires_grad=True
     )
     module = FeaturizedEmbedding(weight=embeddings)
     result = module(
         EntityList.from_tensor_list(
             TensorList(
                 torch.tensor([0, 1, 3, 6, 6]), torch.tensor([0, 2, 1, 0, 1, 0])
             )
         )
     )
     self.assertTensorEqual(
         result,
         torch.tensor(
             [
                 [1.0000, 1.0000, 1.0000],
                 [2.5000, 2.5000, 2.5000],
                 [1.3333, 1.3333, 1.3333],
                 [0.0000, 0.0000, 0.0000],
             ]
         ),
     )
     result.sum().backward()
     self.assertTrue((embeddings.grad.to_dense() != 0).any())
Пример #22
0
 def test_empty(self):
     embeddings = torch.empty((0, 3))
     module = SimpleEmbedding(weight=embeddings)
     self.assertTensorEqual(
         module(EntityList.from_tensor(torch.empty((0, ),
                                                   dtype=torch.long))),
         torch.empty((0, 3)))
Пример #23
0
    def load_chunk_of_edges(
        self,
        lhs_p: int,
        rhs_p: int,
        chunk_idx: int = 0,
        num_chunks: int = 1,
    ) -> EdgeList:
        file_path = self.get_edges_file(lhs_p, rhs_p)
        try:
            with h5py.File(file_path, "r") as hf:
                if hf.attrs.get(FORMAT_VERSION_ATTR, None) != FORMAT_VERSION:
                    raise RuntimeError(
                        f"Version mismatch in edge file {file_path}")
                lhs_ds = hf["lhs"]
                rhs_ds = hf["rhs"]
                rel_ds = hf["rel"]

                num_edges = rel_ds.len()
                begin = int(chunk_idx * num_edges / num_chunks)
                end = int((chunk_idx + 1) * num_edges / num_chunks)
                chunk_size = end - begin

                lhs = torch.empty((chunk_size, ), dtype=torch.long)
                rhs = torch.empty((chunk_size, ), dtype=torch.long)
                rel = torch.empty((chunk_size, ), dtype=torch.long)

                # Needed because https://github.com/h5py/h5py/issues/870.
                if chunk_size > 0:
                    lhs_ds.read_direct(lhs.numpy(),
                                       source_sel=np.s_[begin:end])
                    rhs_ds.read_direct(rhs.numpy(),
                                       source_sel=np.s_[begin:end])
                    rel_ds.read_direct(rel.numpy(),
                                       source_sel=np.s_[begin:end])

                lhsd = self.read_dynamic(hf, "lhsd", begin, end)
                rhsd = self.read_dynamic(hf, "rhsd", begin, end)

                return EdgeList(EntityList(lhs, lhsd), EntityList(rhs, rhsd),
                                rel)
        except OSError as err:
            # h5py refuses to make it easy to figure out what went wrong. The errno
            # attribute is set to None. See https://github.com/h5py/h5py/issues/493.
            if f"errno = {errno.ENOENT}" in str(err):
                raise CouldNotLoadData() from err
            raise err
 def test_to_tensor_list(self):
     self.assertEqual(
         EntityList(
             torch.tensor([-1, -1], dtype=torch.long),
             tensor_list_from_lists([[3, 4], [0]]),
         ).to_tensor_list(),
         tensor_list_from_lists([[3, 4], [0]]),
     )
Пример #25
0
 def test_len(self):
     self.assertEqual(
         len(EntityList(
             torch.tensor([3, 4], dtype=torch.long),
             tensor_list_from_lists([[], [2, 1, 0]]),
         )),
         2,
     )
 def test_to_tensor(self):
     self.assertTrue(
         torch.equal(
             EntityList(
                 torch.tensor([2, 3], dtype=torch.long),
                 tensor_list_from_lists([[], []]),
             ).to_tensor(),
             torch.tensor([2, 3], dtype=torch.long),
         ))
Пример #27
0
 def test_empty(self):
     embeddings = torch.empty((0, 3))
     module = FeaturizedEmbedding(weight=embeddings)
     self.assertTensorEqual(
         module(
             EntityList.from_tensor_list(
                 TensorList(torch.zeros((1, ), dtype=torch.long),
                            torch.empty((0, ), dtype=torch.long)))),
         torch.empty((0, 3)))
Пример #28
0
 def test_constant(self):
     self.assertEqual(
         group_by_relation_type(
             torch.tensor([3, 3, 3, 3], dtype=torch.long),
             EntityList.from_tensor(
                 torch.tensor([93, 24, 13, 31], dtype=torch.long)),
             EntityList.from_tensor(
                 torch.tensor([90, 75, 9, 25], dtype=torch.long)),
         ),
         [
             (
                 EntityList.from_tensor(
                     torch.tensor([93, 24, 13, 31], dtype=torch.long)),
                 EntityList.from_tensor(
                     torch.tensor([90, 75, 9, 25], dtype=torch.long)),
                 3,
             ),
         ],
     )
Пример #29
0
 def test_forward(self):
     embeddings = torch.tensor(
         [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]], requires_grad=True
     )
     module = SimpleEmbedding(weight=embeddings)
     result = module(EntityList.from_tensor(torch.tensor([2, 0, 0])))
     self.assertTensorEqual(
         result, torch.tensor([[3.0, 3.0, 3.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])
     )
     result.sum().backward()
     self.assertTrue((embeddings.grad.to_dense() != 0).any())
 def test_equal(self):
     el = EntityList(
         torch.tensor([3, 4], dtype=torch.long),
         tensor_list_from_lists([[], [2, 1, 0]]),
     )
     self.assertEqual(el, el)
     self.assertNotEqual(
         el,
         EntityList(
             torch.tensor([4, 2], dtype=torch.long),
             tensor_list_from_lists([[], [2, 1, 0]]),
         ),
     )
     self.assertNotEqual(
         el,
         EntityList(
             torch.tensor([3, 4], dtype=torch.long),
             tensor_list_from_lists([[3], [2, 0]]),
         ),
     )