Exemplo n.º 1
0
    def train_emb(self,
                  images,
                  captions,
                  lengths,
                  ids=None,
                  instance_ids=None,
                  *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        img_emb, cap_emb = self.forward_emb(images, captions, lengths)
        # measure accuracy and record loss
        self.optimizer.zero_grad()

        l_list = [int(i) for i in lengths]
        mask = Variable(
            torch.ByteTensor([
                i * [1] + (max_length + 3 - i) * [0] for i in l_list
            ])).cuda()
        loss = self.forward_loss(img_emb, cap_emb, instance_ids, mask)
        # compute gradient and do optimization
        loss.backward()

        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)

        if self.embed_mask is not None:
            for i, mask in enumerate(self.embed_mask):
                if mask:
                    self.txt_enc.module.embed.weight.grad.data[i].zero_()
        self.optimizer.step()
Exemplo n.º 2
0
    def train_emb(self, videos_1, videos_2, ids=None, *args):
        """One training step given images and captions.
        """
        self.Eiters += 1

        # zero the gradient buffers
        self.optimizer.zero_grad()

        # compute the embeddings
        videos_emb_1 = self.forward_emb(videos_1)
        videos_emb_2 = self.forward_emb(videos_2)

        # measure accuracy and record loss
        # self.optimizer.zero_grad()
        loss = self.forward_loss(videos_emb_1, videos_emb_2)
        # loss_value = loss.item()
        loss_value = loss.data[0]

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()

        return videos_emb_1.size(0), loss_value
Exemplo n.º 3
0
    def train_emb(self, oimages, images, captions, lengths, ids=None, *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        img_emb, cap_emb, oimg_emb, scores, decode_lengths, captions = self.forward_emb(
            oimages, images, captions, lengths)

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        loss_vse = self.forward_loss(img_emb, cap_emb)
        #print loss_vse
        loss_de = self.forward_decode_loss(scores, decode_lengths, captions)
        #print loss_de

        loss = loss_vse + loss_de
        self.logger.update('La', loss.data[0], captions.size(0))

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
Exemplo n.º 4
0
    def train_emb(self,
                  images,
                  captions,
                  bboxes,
                  depends,
                  lengths,
                  ids=None,
                  *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        img_emb, cap_emb, cap_lens = self.forward_emb(images, captions,
                                                      lengths)

        scores = self.forward_sim(img_emb, cap_emb, bboxes, depends, cap_lens)

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        loss = self.forward_loss(scores)

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
Exemplo n.º 5
0
    def train_emb(self, images, captions, lengths, ids=None, pre=False, *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        print('Eiters:{}, lr:{}'.format(self.Eiters,
                                        self.optimizer.param_groups[0]['lr']))

        # compute the embeddings
        img_emb, cap_emb = self.forward_emb(images, captions, lengths)

        # measure accuracy and record loss
        if pre:
            self.pre_optimizer.zero_grad()
            self.MI_pre_opt.zero_grad()
        else:
            self.optimizer.zero_grad()
            self.MI_opt.zero_grad()
        loss = self.forward_loss(img_emb, cap_emb)

        # compute gradient and do SGD step
        loss.backward(retain_graph=True)

        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        if pre:
            self.pre_optimizer.step()
            self.MI_pre_opt.step()
        else:
            self.optimizer.step()
            self.MI_opt.step()

        return img_emb, cap_emb
Exemplo n.º 6
0
    def train_emb(self, videos, captions, lengths, *args):
        """One training step given videos and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        vid_emb, cap_emb = self.forward_emb(videos, captions, False)

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        loss = self.forward_loss(cap_emb, vid_emb)

        if torch.__version__ == '0.3.1':
            loss_value = loss.data[0]
        else:
            loss_value = loss.item()

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()

        return vid_emb.size(0), loss_value
Exemplo n.º 7
0
    def _train_on_batch(self, batch: Tuple) -> Tuple:
        """
        Compute loss depending on settings, compute gradients and apply optimization step.

        Args:
            batch: batch of training data
        """
        # evaluate loss
        batch_x, batch_y, input_lengths, target_lengths = batch

        if self.custom_model_eval:
            loss, model_output = self.loss(batch, self.model)
        else:
            model_output = self.model(batch_x, input_lengths)
            loss = self.loss(model_output, batch_y)

        self.optimizer.zero_grad()  # reset gradients
        loss.backward()  # backpropagation

        # gradient clipping
        if self.clip_grads is not None:
            grads.clip_grad_norm(self.model.parameters(), self.clip_grads)

        grad_norm = self._comp_gradients()  # compute average gradient norm

        self.optimizer.step()  # apply optimization step
        return loss, model_output, grad_norm
Exemplo n.º 8
0
 def step(self):
     params = []
     for group in self.optimizer.param_groups:
         for p in group['params']:
             params.append(p)
     clip_grad_norm(params, self.grad_clip)
     self.optimizer.step()
Exemplo n.º 9
0
    def train_emb(self,
                  video_whole,
                  video_part,
                  captions_whole,
                  captions_part,
                  lengths_whole,
                  lengths_part,
                  ids=None,
                  *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        img_emb, cap_emb = self.forward_emb(video_whole, video_part,
                                            captions_whole, captions_part,
                                            lengths_whole, lengths_part)

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        loss = self.forward_loss(img_emb, cap_emb)

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
Exemplo n.º 10
0
    def _train_on_batch(self, batch):
        """ Compute loss, compute gradients and apply optimization step for given batch. """
        # run lr scheduler
        if self.scheduler is not None:
            self.scheduler.step()

        # evaluate loss
        if self._custom_model_eval:  # custom evaluation
            loss, model_output = self.loss(batch, self.model)
        else:  # regular supervised learning
            batch_x, batch_y = batch
            model_output = self.model(batch_x)
            loss = self.loss(model_output, batch_y)

        self.optimizer.zero_grad()  # reset gradients
        loss.backward()  # backpropagation

        # gradient clipping
        if self._clip_grads is not None:
            Grads.clip_grad_norm(self.model.parameters(), self._clip_grads)

        grad_norm = self._comp_gradient_norm()  # compute average gradient norm

        self.optimizer.step()  # apply optimization step
        return loss, model_output, grad_norm
Exemplo n.º 11
0
    def train_emb(self,
                  images,
                  captions,
                  concept_labels,
                  concept_input_embs,
                  lengths,
                  ids=None,
                  *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])
        self.logger.update('GCN_lr', self.optimizer.param_groups[4]['lr'])

        # compute the embeddings
        '''! change for adding input w2v dict for GCN attribute predictor'''
        v_emb, t_emb, predict_score_v, predict_score_t = self.forward_emb(
            images, captions, concept_labels, concept_input_embs, lengths,
            self.fuse_weight)
        # measure accuracy and record loss
        self.optimizer.zero_grad()
        loss = self.forward_loss(v_emb, t_emb, predict_score_v,
                                 predict_score_t, self.dataset_name)

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
    def train_emb(self, images, captions, lengths, caption_masks, images_lengths, images_masks, query_id, query, num_boxes, boxes, class_labels, *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        #img_emb, cap_emb, GCN_img_emd = self.forward_emb(images, captions, lengths, images_masks, caption_masks, boxes)
        img_emb, cap_emb, cap_lens, im_masks, GCN_img_emd, class_scores = self.forward_emb(images, captions, lengths, images_masks, caption_masks, boxes)


        # calcualte captioning loss
        self.optimizer.zero_grad()

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        #loss = self.forward_loss(img_emb, cap_emb, query_id)
        loss = self.forward_loss(img_emb, cap_emb, cap_lens, im_masks, query_id, class_scores, class_labels)

        self.logger.update('Le', loss.item(), img_emb.size(0))

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
Exemplo n.º 13
0
    def train_emb(self,
                  images,
                  captions,
                  target_mask,
                  vision_mask,
                  ids=None,
                  *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # measure accuracy and record loss
        scores = self.forward_emb(images, captions, target_mask, vision_mask)
        # measure accuracy and record loss

        self.optimizer.zero_grad()
        if scores is not None:
            loss = scores.sum()
            self.logger.update('Le', loss, images.size(0))
        else:
            return
        # compute gradient and do SGD step
        #loss.backward()
        with amp.scale_loss(loss, self.optimizer) as scaled_loss:
            scaled_loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
Exemplo n.º 14
0
    def train_emb(self, images, captions, lengths, ids, caption_labels,
                  caption_masks, *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        img_emb, cap_emb, GCN_img_emd = self.forward_emb(
            images, captions, lengths)

        # calcualte captioning loss
        self.optimizer.zero_grad()

        caption_loss = self.calcualte_caption_loss(GCN_img_emd, caption_labels,
                                                   caption_masks)

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        retrieval_loss = self.forward_loss(img_emb, cap_emb)

        loss = retrieval_loss + caption_loss

        self.logger.update('Le_caption', caption_loss.data[0], img_emb.size(0))
        self.logger.update('Le', loss.data[0], img_emb.size(0))

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
Exemplo n.º 15
0
 def after_train_iter(self, runner):
     runner.optimizer.zero_grad()
     runner.outputs['loss'].backward()
     clip_grad_norm(
         filter(lambda p: p.requires_grad, runner.model.parameters()),
         max_norm=self.max_norm,
         norm_type=self.norm_type)
     runner.optimizer.step()
Exemplo n.º 16
0
    def train_emb(self, images, captions, lengths, ids=None, *args):
        # compute the embeddings
        img_emb, cap_emb = self.forward_emb(images, captions, lengths)

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        loss = self.forward_loss(img_emb, cap_emb)

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
    def forward(model, data, training=True, optimizer=None):
        use_cuda = 'cuda' in type
        loss = nn.CrossEntropyLoss()
        perplexity = AverageMeter()
        batch_time = AverageMeter()
        data_time = AverageMeter()

        if training:
            model.train()
        else:
            model.eval()

        end = time.time()
        for i, (imgs, (captions, lengths)) in enumerate(data):
            data_time.update(time.time() - end)
            if use_cuda:
                imgs = imgs.cuda()
                captions = captions.cuda(async=True)
            imgs = Variable(imgs, volatile=not training)
            captions = Variable(captions, volatile=not training)
            input_captions = captions[:-1]
            target_captions = pack_padded_sequence(captions, lengths)[0]

            pred, _ = model(imgs, input_captions, lengths)
            err = loss(pred, target_captions)
            perplexity.update(math.exp(err.data[0]))

            if training:
                optimizer.zero_grad()
                err.backward()
                clip_grad_norm(model.rnn.parameters(), grad_clip)
                optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i % print_freq == 0:
                logging.info(
                    '{phase} - Epoch: [{0}][{1}/{2}]\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                    'Perplexity {perp.val:.4f} ({perp.avg:.4f})'.format(
                        epoch,
                        i,
                        len(data),
                        phase='TRAINING' if training else 'EVALUATING',
                        batch_time=batch_time,
                        data_time=data_time,
                        perp=perplexity))

        return perplexity.avg
Exemplo n.º 18
0
 def optimize_loop_wrapper(*args, **kwargs):
     if not 'optimizers' in kwargs:
         raise ValueError(
             "When using @optimize, must pass in list of optimizers")
     for opt in kwargs['optimizers']:
         opt.zero_grad()
     loss = f(*args, **kwargs)
     loss.backward()
     for opt in kwargs['optimizers']:
         all_params = (p for group in opt.param_groups
                       for p in group['params'])
         clip_grad_norm(all_params, max_norm=5.0, norm_type=2)
         opt.step()
     return loss.data[0]
    def train_emb(self, images, captions, lengths):
        """One training step given images and captions.
        """
        self.Eiters += 1
        # compute the embeddings
        img_emb, cap_emb = self.forward_emb(images, captions, lengths)

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        loss = self.forward_loss(img_emb, cap_emb)
        print('loss', loss.item())
        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
Exemplo n.º 20
0
def train(batch_size, data_size, in_channels, start_epoch, num_epochs,
          learning_rate, train_loader, test_loader, model, writer, use_gpu,
          model_save_format):
    num_batches = len(train_loader)

    optimizer = torch.optim.Adam(model.parameters(),
                                 betas=(0.999, 0.999999),
                                 lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5)
    global_step = start_epoch * num_batches
    model.train()
    for epoch in range(start_epoch, start_epoch + num_epochs):
        scheduler.step()
        print("Learning Rate:", scheduler.get_lr()[0])
        writer.add_scalar("train/learning_rate",
                          scheduler.get_lr()[0], global_step)

        start = time.clock()
        for batch_index, (x, logo_image, y,
                          logo_index) in enumerate(train_loader):
            x = x.type(torch.FloatTensor)
            y = y.type(torch.FloatTensor)
            logo_image = logo_image.type(torch.FloatTensor)

            global_step = batch_index + epoch * num_batches

            x = Variable(x)
            y = Variable(y)
            if torch.cuda.is_available() and use_gpu:
                x = x.cuda()
                y = y.cuda()

            optimizer.zero_grad()

            output = model(x)

            loss = model.loss(output, y)
            if (loss == loss).all():
                loss.backward()
            else:
                print("broken")

            print("Total Gradient Norm:",
                  clip_grad.clip_grad_norm(model.parameters(), 100))
            optimizer.step()

            writer.add_scalar("train/loss", loss.data[0], global_step)

            print("Epoch: {}\tBatch: {}/{}\tLoss: {:10.6f}".format(
                epoch, batch_index, num_batches, loss.data[0]))
            if cv2.waitKey(1) == 96:
                raise KeyboardInterrupt("Interrupted")

        print("Epoch %d Took %f seconds" % (epoch, time.clock() - start))
        loss, accuracy = test(test_loader, model, writer, global_step, use_gpu)
        torch.save(model, model_save_format % (epoch, loss, accuracy))
Exemplo n.º 21
0
    def train_emb(self, images, captions, lengths, ids=None, *args):
        #One training step given images and captions.
        self.Eiters += 1
        self.logger.update('iterations', self.Eiters)
        self.logger.update('current learning rate',
                           self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        img_emb, cap_emb, _ = self.forward_emb(images, captions, lengths)

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        loss = self.forward_loss(img_emb, cap_emb, None, None, None)

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
Exemplo n.º 22
0
    def finish_episode(self):

        if self.verbose:
            print 'Inside finish episode :'

        R = 0
        saved_actions = self.policy.saved_actions
        policy_losses = []
        value_losses = []
        rewards = []

        for r in self.policy.rewards[::-1]:
            R = r + self.gamma * R
            rewards.insert(0, R)

        rewards = torch.tensor(rewards).to(self.device)
        rewards = (rewards - rewards.mean()) / (rewards.std() + self.eps)

        if self.verbose:
            print 'rewards :', rewards

        for (log_prob, value), r in zip(saved_actions, rewards):
            reward = r - value.item()
            policy_losses.append(-log_prob * reward)
            # print value.shape
            # print torch.tensor([r]).to(device).shape
            value_losses.append(
                F.smooth_l1_loss(
                    value,
                    torch.tensor([r]).to(self.device).unsqueeze(0)))

        self.optimizer.zero_grad()
        loss = torch.stack(policy_losses).sum() + \
            torch.stack(value_losses).sum()
        loss.backward()
        clip_grad.clip_grad_norm(self.policy.parameters(), 100)
        self.optimizer.step()

        del self.policy.rewards[:]
        del self.policy.saved_actions[:]

        return loss
Exemplo n.º 23
0
    def train_embed_2(self,
                      video_feat1,
                      video_feat2,
                      captions,
                      length,
                      vids=None):

        self.batch_num += 1
        # video_feats_1, video_feats_2, cap_embed_1, cap_embed_2 = self.froward_emb(video_feat1,video_feat2,captions, length)
        video_feats_1, cap_embed_1 = self.forward_emb(video_feat2, captions,
                                                      length)
        self.optimizer.zero_grad()
        loss = self.forward_loss(video_feats_1, cap_embed_1)
        loss.backward()
        clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
        # pdb.set_trace()
        ttemp = loss.cpu().data.tolist()
        # print(ttemp)
        self.batch_loss.append(ttemp)
Exemplo n.º 24
0
    def _train_on_batch(self, batch):
        """ Compute loss depending on settings, compute gradients and apply optimization step. """
        # evaluate loss
        batch_x, batch_y = batch
        if self._custom_model_eval:
            loss, model_output = self.loss(batch, self.model)
        else:
            model_output = self.model(batch_x)
            loss = self.loss(model_output, batch_y)

        self.optimizer.zero_grad()  # reset gradients
        loss.backward()  # backpropagation

        # gradient clipping
        if self._clip_grads is not None:
            Grads.clip_grad_norm(self.model.parameters(), self._clip_grads)

        grad_norm = self._comp_gradients()  # compute average gradient norm

        self.optimizer.step()  # apply optimization step
        return loss, model_output, grad_norm
Exemplo n.º 25
0
    def train_emb(self, videos, captions, lengths, cap_ids, *args):
        """One training step given videos and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])
        #print(cap_ids)

        # compute the embeddings
        vid_emb, cap_emb = self.forward_emb(videos, captions, volatile=False)

        # Output shape for MSR_VTT:
        # vid_emb.shape = torch.Size([128, 2048])
        # cap_emb.shape = torch.Size([128, 2048])

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        # print(cap_ids)
        loss, pos_score, neg_score = self.forward_loss(cap_emb,
                                                       vid_emb,
                                                       cap_ids=cap_ids)

        if torch.__version__ == '0.3.1':
            loss_value = loss.data[0]
            pos_value = pos_score.data[0]
            neg_value = neg_score.data[0]
        else:
            loss_value = loss.item()
            pos_value = pos_score.item()
            neg_value = neg_score.item()

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()

        return vid_emb.size(0), loss_value, pos_value, neg_value
Exemplo n.º 26
0
    def train_emb(self,
                  train_with_audio,
                  images,
                  captions,
                  audios,
                  lengths,
                  ids=None,
                  *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        img_emb, cap_emb, aud_emb = self.forward_emb(images,
                                                     captions,
                                                     audios,
                                                     lengths=lengths)

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        if train_with_audio:
            # img_emb, aud_emb = self.shared_layer(img_emb, aud_emb)
            img_emb, aud_emb = attention(self.embed_size, True, img_emb,
                                         aud_emb)
            loss = self.forward_loss(img_emb, aud_emb)
        else:
            # img_emb, cap_emb = self.shared_layer(img_emb, cap_emb)
            img_emb, cap_emb = attention(self.embed_size, False, img_emb,
                                         cap_emb)
            loss = self.forward_loss(img_emb, cap_emb)

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
Exemplo n.º 27
0
 def _clip_grad_norm(self) -> None:
     clip_norm_params = list(
         filter(lambda parm: parm.requires_grad and parm.grad is not None,
                self.trainer.model.parameters()))
     if len(clip_norm_params) == 0:
         return
     else:
         if hasattr(self._grad_clip, 'clip_norm_mode'):
             scale = self._scaler.get_scale() if self._user_scale else 1.0
             max_norm = self._grad_clip.max_grad_l2_norm * scale
             grad_norm = clip_grad.clip_grad_norm(clip_norm_params,
                                                  max_norm)
         else:
             grad_norm = clip_grad.clip_grad_norm_(clip_norm_params,
                                                   **self._grad_clip)
         self.trainer.log_buffer.put_scalar('grad_norm', float(grad_norm))
Exemplo n.º 28
0
    def train_forwad(self, feature, label):
        self.Eiters += 1
        if torch.cuda.is_available():
            feature = feature.cuda()
            label = label.cuda()

        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        out = self.forward(feature)
        loss = self.forward_loss(out, label)
        loss.backward()
        if self.Eiters % self.iter_size == 0:
            if self.iter_size != 1:
                for g in self.optimizer.param_groups:
                    for p in g['params']:
                        p.grad /= self.iter_size
            if self.grad_clip > 0:
                total_norm = clip_grad_norm(self.params, self.grad_clip)
                if total_norm > self.grad_clip:
                    print('clipping gradient: {} with coef {}'.format(
                        total_norm, self.grad_clip / total_norm))
            self.optimizer.step()
            self.optimizer.zero_grad()
Exemplo n.º 29
0
 def _step(self, closure=None):
     """Gradient clipping aware step()."""
     clip_grad_norm(self.params, self.gclip)
     self.optim.step(closure)
def train(args, model_args, lrate):

    print("Copying the dataset to the current node's  dir...")

    tmp = '/Tmp/vermavik/'
    home = '/u/vermavik/'
    """
    tmp='/tmp/vermav1/'
    home='/u/79/vermav1/unix/'
    """

    dataset = args.dataset
    data_source_dir = home + 'data/' + dataset + '/'
    """
    if not os.path.exists(data_source_dir):
        os.makedirs(data_source_dir)
    data_target_dir = tmp+'data/CelebA/'
    copy_tree(data_source_dir, data_target_dir)
    """
    ### set up the experiment directories########

    exp_name = experiment_name(dataset=args.dataset,
                               act=args.activation,
                               meta_steps=args.meta_steps,
                               sigma=args.sigma,
                               temperature_factor=args.temperature_factor,
                               alpha1=args.alpha1,
                               alpha2=args.alpha2,
                               alpha3=args.alpha3,
                               grad_norm_max=args.grad_max_norm,
                               epochs=args.epochs,
                               job_id=args.job_id,
                               add_name=args.add_name)

    #temp_model_dir = tmp+'experiments/HVWB/'+dataset+'/model/'+ exp_name
    #temp_result_dir = tmp+'experiments/HVWB/'+dataset+'/results/'+ exp_name
    model_dir = home + 'experiments/HVWB/' + dataset + '/model/' + exp_name
    result_dir = home + 'experiments/HVWB/' + dataset + '/results/' + exp_name

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    #if not os.path.exists(temp_result_dir):
    #    os.makedirs(temp_result_dir)
    """   
    #copy_script_to_folder(os.path.abspath(__file__), temp_result_dir)
    result_path = os.path.join(temp_result_dir , 'out.txt')
    filep = open(result_path, 'w')
    
    out_str = str(args)
    print(out_str)
    filep.write(out_str + '\n') 
    
      
    #torch.backends.cudnn.enabled = False # slower but repeatable
    torch.backends.cudnn.enabled = True # faster but not repeatable
                      
    out_str = 'initial seed = ' + str(args.manualSeed)
    print(out_str)
    filep.write(out_str + '\n\n')
    """
    #model_id = '/data/lisatmp4/anirudhg/minst_walk_back/walkback_'
    """
    model_id = '/data/lisatmp4/anirudhg/celebA_latent_walkback/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 =  '../celebA_logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2  + '/log.jsonl.gz', formatter=None)
    """
    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()
    print args

    ## load the training data

    print 'loading mnist'
    train_loader, test_loader = load_mnist(
        data_aug=0,
        batch_size=100,
        test_batch_size=100,
        cuda=True,
        data_target_dir="/u/vermavik/DARC/mnist")
    #train_loader, unlabelled_loader, test_loader = get_mnist(location="/u/vermavik/DARC/mnist", batch_size=64, labels_per_class=100)
    n_colors = 1
    spatial_width = 28

    for batch_idx, (data, target) in enumerate(train_loader):

        Xbatch = data.numpy()
        #print Xbatch
        scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
        shft = -np.mean(Xbatch * scl)

        break  ### TO DO : calculate statistics on whole data

    print "Width", WIDTH, spatial_width

    model = Net(args)
    if args.cuda:
        model.cuda()
    loss_fn = nn.BCELoss()
    if args.optimizer == 'sgd':
        optimizer_encoder = optim.SGD(model.encoder_params,
                                      lr=args.lr,
                                      momentum=args.momentum,
                                      weight_decay=0)
        optimizer_transition = optim.SGD(model.transition_params,
                                         lr=args.lr,
                                         momentum=args.momentum,
                                         weight_decay=0)
        optimizer_decoder = optim.SGD(model.decoder_params,
                                      lr=args.lr,
                                      momentum=args.momentum,
                                      weight_decay=0)
    elif args.optimizer == 'adam':
        optimizer_encoder = optim.Adam(model.parameters(),
                                       lr=args.lr,
                                       betas=(0.9, 0.999),
                                       eps=1e-08,
                                       weight_decay=0)
        optimizer_transition = optim.Adam(model.transition_params,
                                          lr=args.lr,
                                          betas=(0.9, 0.999),
                                          eps=1e-08,
                                          weight_decay=0)
        optimizer_decoder = optim.Adam(model.decoder_params,
                                       lr=args.lr,
                                       betas=(0.9, 0.999),
                                       eps=1e-08,
                                       weight_decay=0)
    uidx = 0
    estop = False
    bad_counter = 0
    #batch_index = 1
    n_samples = 0
    print 'Number of steps....'
    print args.num_steps
    print "Number of metasteps...."
    print args.meta_steps
    print 'Done'
    count_sample = 1

    #### for saving metrics for all steps ###
    train_loss = []
    train_x_loss = []
    train_log_p_reverse = []
    train_kld = []

    #### for saving metrics for each step individually ###
    train_loss_each_step = [[]]
    train_x_loss_each_step = [[]]
    train_log_p_reverse_each_step = [[]]
    #train_kld_each_step = [[]]
    for i in range(args.meta_steps - 1):
        train_loss_each_step.append([])
        train_x_loss_each_step.append([])
        train_log_p_reverse_each_step.append([])
        #train_kld_each_step.append([])

    for epoch in range(args.epochs):
        print('epoch', epoch)
        for batch_idx, (data, target) in enumerate(train_loader):
            #batch_idx = 0
            #for (data, target), (u, _) in zip(cycle(train_loader), unlabelled_loader):
            #    batch_idx +=1
            if args.cuda:
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data), Variable(target)
            data = data.view(-1, 1 * 28 * 28)
            #print data.shape
            t0 = time.time()
            #batch_index += 1
            n_samples += data.data.shape[0]
            #print (n_samples)
            temperature_forward = args.temperature
            meta_cost = []
            x = data
            z = None
            encode = True
            for meta_step in range(0, args.meta_steps):
                #print ('meta_step', meta_step)
                #print encode
                loss, x_loss, log_p_reverse, KLD, z, z_tilde, x_tilde = compute_loss(
                    x,
                    z,
                    model,
                    loss_fn,
                    temperature_forward,
                    meta_step,
                    encode=encode)
                #meta_cost.append(loss)
                #print compute_param_norm(model.conv_x_z_1.weight.data)
                optimizer_encoder.zero_grad()
                optimizer_transition.zero_grad()
                optimizer_decoder.zero_grad()
                loss.backward()
                total_norm = clip_grad_norm(model.parameters(),
                                            args.grad_max_norm)
                #print ('step', meta_step, total_norm)
                if encode == True:
                    optimizer_encoder.step()
                optimizer_transition.step()
                optimizer_decoder.step()

                #print ('step', meta_step, clip_grad_norm(model.parameters(), 1000000))
                ### store metrics#######
                train_loss.append(loss.data[0])
                train_x_loss.append(x_loss.data[0])
                train_log_p_reverse.append(-log_p_reverse.data[0])
                if KLD is not None:
                    train_kld.append(KLD.data[0])

                #### store metrices for each step separately###
                train_loss_each_step[meta_step].append(loss.data[0])
                train_x_loss_each_step[meta_step].append(x_loss.data[0])
                train_log_p_reverse_each_step[meta_step].append(
                    -log_p_reverse.data[0])
                #if KLD is not None:
                #    train_kld_each_step[meta_step].append(KLD.data[0])

                if args.meta_steps > 1:
                    #data, _, _, _, _, _, _ = forward_diffusion(data, model, loss_fn,temperature_forward,meta_step)
                    #data = data.view(-1,3, 64,64)
                    #data = Variable(data.data, requires_grad=False)
                    x = Variable(x_tilde.data, requires_grad=False)
                    z = Variable(z_tilde.data, requires_grad=False)
                    if args.encode_every_step == 0:
                        encode = False
                    temperature_forward *= args.temperature_factor

                #print loss.data
            #print loss.data

            #cost = sum(meta_cost) / len(meta_cost)
            #print cost
            #gradient_updates_ = get_grads(data_use[0],args.temperature)

            if np.isnan(loss.data.cpu()[0]) or np.isinf(loss.data.cpu()[0]):
                print loss.data
                print 'NaN detected'
                return 1.

            #batch_idx=0
            if batch_idx % 100 == 0:
                plot_loss(model_dir, train_loss, train_x_loss,
                          train_log_p_reverse, train_kld, train_loss_each_step,
                          train_x_loss_each_step,
                          train_log_p_reverse_each_step, args.meta_steps)

                count_sample += 1
                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))
                temperature_forward = args.temperature
                #print 'this'

                data_forward_diffusion = data
                for num_step in range(args.num_steps * args.meta_steps):
                    #print "Forward temperature", temperature_forward
                    data_forward_diffusion, _, _, _, _, _, _ = forward_diffusion(
                        data_forward_diffusion, model, loss_fn,
                        temperature_forward, num_step)
                    #print data_forward_diffusion.shape
                    #data_forward_diffusion = np.asarray(data).astype('float32').reshape(args.batch_size, INPUT_SIZE)
                    data_ = data_forward_diffusion.view(
                        -1, 1, spatial_width, spatial_width
                    )  #reshape(args.batch_size, n_colors, WIDTH, WIDTH)
                    if num_step % 2 == 1:
                        plot_images(
                            data_.data.cpu().numpy(),
                            model_dir + '/' + "batch_" + str(batch_idx) +
                            '_corrupted_' + 'epoch_' + str(epoch) +
                            '_time_step_' + str(num_step))

                    temperature_forward = temperature_forward * args.temperature_factor

                print "PLOTTING ORIGINAL IMAGE"
                temp = data.view(-1, 1, spatial_width, spatial_width)
                plot_images(
                    temp.data.cpu().numpy(), model_dir + '/' + 'orig_' +
                    'epoch_' + str(epoch) + '_batch_index_' + str(batch_idx))

                print "DONE PLOTTING ORIGINAL IMAGE"
                '''

                temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1 ))

                for i in range(args.num_steps*args.meta_steps + args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation  = f_sample(x_data, temperature)
                    print 'On backward step number, using temperature', i, temperature
                    reverse_time(scl, shft, x_data, model_dir + '/'+ "batch_" + str(batch_index) + '_samples_backward_' + 'epoch_' + str(count_sample) + '_time_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

                '''

                #print 'this'

                if args.noise == "gaussian":
                    z_sampled = np.random.normal(
                        0.0, 1.0,
                        size=(args.batch_size, args.nl))  #.clip(0.0, 1.0)
                else:
                    z_sampled = np.random.binomial(1,
                                                   0.5,
                                                   size=(args.batch_size,
                                                         args.nl))

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                z = torch.from_numpy(np.asarray(z_sampled).astype('float32'))
                if args.cuda:
                    z = z.cuda()
                    z = Variable(z)
                for i in range(args.num_steps *
                               args.meta_steps):  # + args.extra_steps):
                    z_new_to_x, z_to_x, z_new = model.sample(
                        z, temperature,
                        args.num_steps * args.meta_steps - i - 1)
                    #print 'On step number, using temperature', i, temperature
                    if i % 2 == 1:
                        reverse_time(
                            scl, shft,
                            z_new_to_x.data.cpu().numpy(), model_dir +
                            '/batch_index_' + str(batch_idx) + '_inference_' +
                            'epoch_' + str(epoch) + '_step_' + str(i))

                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor
                    z = z_new
Exemplo n.º 31
0
def main():
    args = parser.parse_args()
    print(args)
    # for now, batch_size should match number of gpus
    assert(args.batch_size==torch.cuda.device_count())

    # create model
    model = detector(arch=args.cnn_arch,
                 base_cnn_pkl_file=args.cnn_pkl,
                 mapping_file=args.cnn_mapping,
                 output_prob=False,
                 return_rois=False,
                 return_img_features=False)
    model = model.cuda()

    # freeze part of the net
    stop_grad=['conv1','bn1','relu','maxpool','layer1']
    model_no_grad=torch.nn.Sequential(*[getattr(model.model,l) for l in stop_grad])
    for param in model_no_grad.parameters():
        param.requires_grad = False

    # define  optimizer
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()),
                                lr=args.base_lr,
                                momentum=args.momentum,
                                weight_decay=args.wd)

    # create dataset
    train_dataset = CocoDataset(ann_file=args.dset_ann,
                          img_dir=args.dset_path,
                          proposal_file=args.dset_rois,
                          mode='train',
                          sample_transform=preprocess_sample(target_sizes=[800],
                                                             sample_proposals_for_training=True))
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size,shuffle=False, num_workers=args.workers, collate_fn=collate_custom)

    training_stats = TrainingStats(losses=['loss_cls','loss_bbox'],
                                   metrics=['accuracy_cls'],
                                   solver_max_iters=args.max_iter)

    iter = args.start_iter

    print('starting training')

    while iter<args.max_iter:
        for i, batch in enumerate(train_loader):

            if args.batch_size==1:
                batch = to_cuda_variable(batch,volatile=False)
            else:
                # when using multiple GPUs convert to cuda later in data_parallel and list_to_tensor
                batch = to_variable(batch,volatile=False)             
                

            # update lr
            lr = get_lr_at_iter(iter)
            adjust_learning_rate(optimizer, lr)

            # start measuring time
            training_stats.IterTic()

            # forward pass            
            if args.batch_size==1:
                cls_score,bbox_pred=model(batch['image'],batch['rois'])
                list_to_tensor = lambda x: x                
            else:
                cls_score,bbox_pred=data_parallel(model,(batch['image'],batch['rois'])) # run model distributed over gpus and concatenate outputs for all batch
                # convert gt data from lists to concatenated tensors
                list_to_tensor = lambda x: torch.cat(tuple([i.cuda() for i in x]),0)

            cls_labels = list_to_tensor(batch['labels_int32']).long()
            bbox_targets = list_to_tensor(batch['bbox_targets'])
            bbox_inside_weights = list_to_tensor(batch['bbox_inside_weights'])
            bbox_outside_weights = list_to_tensor(batch['bbox_outside_weights'])            
            
            # compute loss
            loss_cls=cross_entropy(cls_score,cls_labels)
            loss_bbox=smooth_L1(bbox_pred,bbox_targets,bbox_inside_weights,bbox_outside_weights)
                                  
            # compute classification accuracy (for stats reporting)
            acc = accuracy(cls_score,cls_labels)

            # get final loss
            loss = loss_cls + loss_bbox

            # update
            optimizer.zero_grad()
            loss.backward()
            # Without gradient clipping I get inf's and NaNs. 
            # it seems that in Caffe the SGD solver performs grad clipping by default. 
            # https://github.com/BVLC/caffe/blob/master/src/caffe/solvers/sgd_solver.cpp
            # it also seems that Matterport's Mask R-CNN required grad clipping as well 
            # (see README in https://github.com/matterport/Mask_RCNN)            
            # the value max_norm=35 was taken from here https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto
            clip_grad_norm(filter(lambda p: p.requires_grad, model.parameters()), max_norm=35, norm_type=2) 
            optimizer.step()

            # stats
            training_stats.IterToc()
            
            training_stats.UpdateIterStats(losses_dict={'loss_cls': loss_cls.data.cpu().numpy().item(),
                                                        'loss_bbox': loss_bbox.data.cpu().numpy().item()},
                                           metrics_dict={'accuracy_cls':acc.data.cpu().numpy().item()})

            training_stats.LogIterStats(iter, lr)
            # save checkpoint
            if (iter+1)%args.checkpoint_period == 0:
                save_checkpoint({
                    'iter': iter,
                    'args': args,
                    'state_dict': model.state_dict(),
                    'optimizer' : optimizer.state_dict(),
                }, args.checkpoint_fn)

            if iter == args.start_iter + 20: # training_stats.LOG_PERIOD=20
                # Reset the iteration timer to remove outliers from the first few
                # SGD iterations
                training_stats.ResetIterTimer()

            # allow finishing in the middle of an epoch
            if iter>args.max_iter:
                break
            # advance iteration
            iter+=1
Exemplo n.º 32
0
 def step(self, closure=None):
     """Gradient clipping aware step()."""
     if self.gclip > 0:
         clip_grad_norm(self.params, self.gclip)
     self.optim.step(closure)