Exemplo n.º 1
0
    def _backward(self, loss):
        self.optimizer.zero_grad()
        loss.backward()

        if self.should_clip_gradients:
            clip_gradients(self.model, self.current_iteration, self.writer, self.config)

            # visualization of parameters and their grads' distribution
            if self.current_iteration % 100 == 0 and hasattr(self, "tb_writer"):
                data = {key: [] for key in secs}
                grad = {key: [] for key in secs}
                for p in self.model.named_parameters():
                    if p[1].data is not None and p[1].grad is not None and p[1].data.shape != torch.Size([]):
                        for sec in secs:
                            if sec in p[0]:
                                data[sec].append(p[1].data.flatten())
                                grad[sec].append(p[1].grad.flatten())
                for sec in secs:
                    if len(data[sec]) != 0 and len(grad[sec]) != 0:
                        self.tb_writer.add_histogram(sec + "_data_dis", torch.cat(data[sec], dim=0),
                                                     global_step=self.current_iteration)
                        self.tb_writer.add_histogram(sec + "_grad_dis", torch.cat(grad[sec], dim=0),
                                                     global_step=self.current_iteration)

        self.optimizer.step()
        self.profile("Backward time")
Exemplo n.º 2
0
    def _backward(self, loss):
        self.optimizer.zero_grad()
        loss.backward()

        if self.should_clip_gradients:
            clip_gradients(self.model, self.current_iteration, self.writer, self.config)

        self.optimizer.step()
        self.profile("Backward time")
Exemplo n.º 3
0
    def _backward(self, loss):
        self.optimizer.zero_grad()

        # self.model.context_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.retain_grad()
        # self.model.image_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.retain_grad()
        loss.backward()

        # print(self.model.context_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.sum())
        # print(self.model.image_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.sum())
        # print(self.model.context_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.grad.sum())
        # print(self.model.image_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.grad.sum())

        if self.should_clip_gradients:
            clip_gradients(self.model, self.current_iteration, self.writer, self.config)

        self.optimizer.step()
        self.profile("Backward time")