Exemplo n.º 1
0
    def _process_one_batch(
        self, model: MultiRelationEmbedder, batch_edges: EdgeList
    ) -> Stats:
        model.zero_grad()

        scores, reg = model(batch_edges)

        loss = self.calc_loss(scores, batch_edges)

        stats = Stats(
            loss=float(loss),
            reg=float(reg) if reg is not None else 0.0,
            violators_lhs=int((scores.lhs_neg > scores.lhs_pos.unsqueeze(1)).sum()),
            violators_rhs=int((scores.rhs_neg > scores.rhs_pos.unsqueeze(1)).sum()),
            count=len(batch_edges),
        )
        if reg is not None:
            (loss + reg).backward()
        else:
            loss.backward()
        self.model_optimizer.step(closure=None)
        for optimizer in self.unpartitioned_optimizers.values():
            optimizer.step(closure=None)
        for optimizer in self.partitioned_optimizers.values():
            optimizer.step(closure=None)

        return stats
Exemplo n.º 2
0
    def process_one_batch(
        self,
        model: MultiRelationEmbedder,
        batch_edges: EdgeList,
    ) -> Stats:
        model.zero_grad()

        scores = model(batch_edges)

        lhs_loss = self.loss_fn(scores.lhs_pos, scores.lhs_neg)
        rhs_loss = self.loss_fn(scores.rhs_pos, scores.rhs_neg)
        relation = self.relations[batch_edges.get_relation_type_as_scalar(
        ) if batch_edges.has_scalar_relation_type() else 0]
        loss = relation.weight * (lhs_loss + rhs_loss)

        stats = Stats(
            loss=float(loss),
            violators_lhs=int(
                (scores.lhs_neg > scores.lhs_pos.unsqueeze(1)).sum()),
            violators_rhs=int(
                (scores.rhs_neg > scores.rhs_pos.unsqueeze(1)).sum()),
            count=len(batch_edges))

        loss.backward()
        self.global_optimizer.step(closure=None)
        for optimizer in self.entity_optimizers.values():
            optimizer.step(closure=None)

        return stats
Exemplo n.º 3
0
    def _process_one_batch(self, model: MultiRelationEmbedder,
                           batch_edges: EdgeList) -> Stats:
        # Tricky: this isbasically like calling `model.zero_grad()` except
        # that `zero_grad` calls `p.grad.zero_()`. When we perform infrequent
        # global L2 regularization, it converts the embedding gradients to dense,
        # and then they can never convert back to sparse gradients unless we set
        # them to `None` again here.
        for p in model.parameters():
            p.grad = None

        scores, reg = model(batch_edges)

        loss = self.calc_loss(scores, batch_edges)

        stats = Stats(
            loss=float(loss),
            reg=float(reg) if reg is not None else 0.0,
            violators_lhs=int(
                (scores.lhs_neg > scores.lhs_pos.unsqueeze(1)).sum()),
            violators_rhs=int(
                (scores.rhs_neg > scores.rhs_pos.unsqueeze(1)).sum()),
            count=len(batch_edges),
        )
        if reg is not None:
            loss = loss + reg
        if model.wd > 0 and random.random() < 1. / model.wd_interval:
            loss = loss + model.wd * model.wd_interval * model.l2_norm()
        loss.backward()
        self.model_optimizer.step(closure=None)
        for optimizer in self.unpartitioned_optimizers.values():
            optimizer.step(closure=None)
        for optimizer in self.partitioned_optimizers.values():
            optimizer.step(closure=None)

        return stats
Exemplo n.º 4
0
    def do_one_job(  # noqa
        self,
        lhs_types: Set[str],
        rhs_types: Set[str],
        lhs_part: Partition,
        rhs_part: Partition,
        lhs_subpart: SubPartition,
        rhs_subpart: SubPartition,
        next_lhs_subpart: Optional[SubPartition],
        next_rhs_subpart: Optional[SubPartition],
        model: MultiRelationEmbedder,
        trainer: Trainer,
        all_embs: Dict[Tuple[EntityName, Partition], FloatTensorType],
        subpart_slices: Dict[Tuple[EntityName, Partition, SubPartition],
                             slice],
        subbuckets: Dict[Tuple[int, int], Tuple[LongTensorType, LongTensorType,
                                                LongTensorType]],
        batch_size: int,
        lr: float,
    ) -> Stats:
        tk = TimeKeeper()

        for embeddings in all_embs.values():
            assert embeddings.is_pinned()

        occurrences: Dict[Tuple[EntityName, Partition, SubPartition],
                          Set[Side]] = defaultdict(set)
        for entity_name in lhs_types:
            occurrences[entity_name, lhs_part, lhs_subpart].add(Side.LHS)
        for entity_name in rhs_types:
            occurrences[entity_name, rhs_part, rhs_subpart].add(Side.RHS)

        if lhs_part != rhs_part:  # Bipartite
            assert all(len(v) == 1 for v in occurrences.values())

        tk.start("copy_to_device")
        for entity_name, part, subpart in occurrences.keys():
            if (entity_name, part, subpart) in self.sub_holder:
                continue
            embeddings = all_embs[entity_name, part]
            optimizer = trainer.partitioned_optimizers[entity_name, part]
            subpart_slice = subpart_slices[entity_name, part, subpart]

            # TODO have two permanent storages on GPU and move stuff in and out
            # from them
            # logger.info(f"GPU #{self.gpu_idx} allocating {(subpart_slice.stop - subpart_slice.start) * embeddings.shape[1] * 4:,} bytes")
            gpu_embeddings = torch.empty(
                (subpart_slice.stop - subpart_slice.start,
                 embeddings.shape[1]),
                dtype=torch.float32,
                device=self.my_device,
            )
            gpu_embeddings.copy_(embeddings[subpart_slice], non_blocking=True)
            gpu_embeddings = torch.nn.Parameter(gpu_embeddings)
            gpu_optimizer = RowAdagrad([gpu_embeddings], lr=lr)
            (cpu_state, ) = optimizer.state.values()
            (gpu_state, ) = gpu_optimizer.state.values()
            # logger.info(f"GPU #{self.gpu_idx} allocating {(subpart_slice.stop - subpart_slice.start) * 4:,} bytes")
            gpu_state["sum"].copy_(cpu_state["sum"][subpart_slice],
                                   non_blocking=True)

            self.sub_holder[entity_name, part, subpart] = (
                gpu_embeddings,
                gpu_optimizer,
            )
        logger.debug(
            f"Time spent copying subparts to GPU: {tk.stop('copy_to_device'):.4f} s"
        )

        for (
            (entity_name, part, subpart),
            (gpu_embeddings, gpu_optimizer),
        ) in self.sub_holder.items():
            for side in occurrences[entity_name, part, subpart]:
                model.set_embeddings(entity_name, side, gpu_embeddings)
                trainer.partitioned_optimizers[entity_name, part,
                                               subpart] = gpu_optimizer

        tk.start("translate_edges")
        num_edges = subbuckets[lhs_subpart, rhs_subpart][0].shape[0]
        edge_perm = torch.randperm(num_edges)
        edges_lhs, edges_rhs, edges_rel = subbuckets[lhs_subpart, rhs_subpart]
        _C.shuffle(edges_lhs, edge_perm, os.cpu_count())
        _C.shuffle(edges_rhs, edge_perm, os.cpu_count())
        _C.shuffle(edges_rel, edge_perm, os.cpu_count())
        assert edges_lhs.is_pinned()
        assert edges_rhs.is_pinned()
        assert edges_rel.is_pinned()
        gpu_edges = EdgeList(
            EntityList.from_tensor(edges_lhs),
            EntityList.from_tensor(edges_rhs),
            edges_rel,
        ).to(self.my_device, non_blocking=True)
        logger.debug(f"GPU #{self.gpu_idx} got {num_edges} edges")
        logger.debug(
            f"Time spent copying edges to GPU: {tk.stop('translate_edges'):.4f} s"
        )

        tk.start("processing")
        stats = process_in_batches(batch_size=batch_size,
                                   model=model,
                                   batch_processor=trainer,
                                   edges=gpu_edges)
        logger.debug(f"Time spent processing: {tk.stop('processing'):.4f} s")

        next_occurrences: Dict[Tuple[EntityName, Partition, SubPartition],
                               Set[Side]] = defaultdict(set)
        if next_lhs_subpart is not None:
            for entity_name in lhs_types:
                next_occurrences[entity_name, lhs_part,
                                 next_lhs_subpart].add(Side.LHS)
        if next_rhs_subpart is not None:
            for entity_name in rhs_types:
                next_occurrences[entity_name, rhs_part,
                                 next_rhs_subpart].add(Side.RHS)

        tk.start("copy_from_device")
        for (entity_name, part,
             subpart), (gpu_embeddings,
                        gpu_optimizer) in list(self.sub_holder.items()):
            if (entity_name, part, subpart) in next_occurrences:
                continue
            embeddings = all_embs[entity_name, part]
            optimizer = trainer.partitioned_optimizers[entity_name, part]
            subpart_slice = subpart_slices[entity_name, part, subpart]

            embeddings[subpart_slice].data.copy_(gpu_embeddings.detach(),
                                                 non_blocking=True)
            del gpu_embeddings
            (cpu_state, ) = optimizer.state.values()
            (gpu_state, ) = gpu_optimizer.state.values()
            cpu_state["sum"][subpart_slice].copy_(gpu_state["sum"],
                                                  non_blocking=True)
            del gpu_state["sum"]
            del self.sub_holder[entity_name, part, subpart]
        logger.debug(
            f"Time spent copying subparts from GPU: {tk.stop('copy_from_device'):.4f} s"
        )

        logger.debug(
            f"do_one_job: Time unaccounted for: {tk.unaccounted():.4f} s")

        return stats