示例#1
0
    def _process_batch(self, input, targets, phase):

        nodes = input.nodes.to(self.device)
        edge_sources = input.edge_sources.to(self.device)
        edge_targets = input.edge_targets.to(self.device)
        edge_distance = input.edge_distance.to(self.device)
        graph_indices = input.graph_indices.to(self.device)
        node_counts = input.node_counts.to(self.device)
        combine_sets = input.combine_sets.to(self.device)
        plane_wave = input.plane_wave.to(self.device)
        targets = targets.to(self.device)
        self.optimizer.zero_grad()

        with torch.set_grad_enabled(phase == 'train'):
            outputs = self.model(nodes, edge_sources, edge_targets,
                                 edge_distance, graph_indices, node_counts,
                                 combine_sets, plane_wave)
            metric_tensors = [
                metric.add_batch_metric(outputs, targets)
                for metric in self.metrics
            ]
            if phase == 'train':
                loss = metric_tensors[0]
                loss.backward()
                if self.clip_value is not None:
                    clip_grad_value_(self.model.parameters(), self.clip_value)
                self.optimizer.step()

        return metric_tensors, outputs
示例#2
0
    def update(self, ob_no, ac_na, reward_n, next_ob_no, terminal_n):
        # everything else should be numpy arrays up til this point
        ob_no = np.array(ob_no)
        ac_na = ptu.from_numpy(ac_na).to(torch.long)
        next_ob_no = np.array(next_ob_no)
        reward_n = ptu.from_numpy(reward_n)
        terminal_n = ptu.from_numpy(terminal_n)

        ac_na = ac_na.to(self.device)
        q = torch.gather(self.q_net(ob_no), 1, ac_na.unsqueeze(1)).squeeze()
        #print('q', q.shape)
        ac_qmax = torch.argmax(self.q_net(next_ob_no), dim=1).unsqueeze(1)

        # next_ob_no = next_ob_no.to(self.device)
        q_target = self.q_net_target(next_ob_no)
        q_target_plug_in = q_target.gather(1, ac_qmax).squeeze()
        terminal_n = terminal_n.to(self.device)
        reward_n = reward_n.to(self.device)
        target = reward_n + q_target_plug_in * (
            torch.logical_not(terminal_n)).detach()

        loss = self.loss(q, target)
        self.optimizer.zero_grad()
        loss.backward()
        utils.clip_grad_value_(self.q_decoder.parameters(),
                               self.grad_norm_clipping)
        self.optimizer.step()
示例#3
0
def train_model(model, train_iter, val_iter, max_epoch, last_epoch=0):
    optimizer = tr.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()))
    # ,lr=0.001
    for epoch_now in range(1 + last_epoch, 1 + max_epoch):
        model.train()
        start_time = time()
        learning_rate_dacay(optimizer, epoch_now)
        step = len(train_iter) // 10
        for i, batch in enumerate(train_iter):
            text = batch.w[0]
            optimizer.zero_grad()
            y_true = [batch.cate1_id, batch.cate2_id, batch.cate3_id]
            y_pred = model(text.to(DEVICE))
            nll_loss_list = [
                loss_fn(y_pred[i], y_true[i].to(DEVICE)) for i in range(3)
            ]
            tot_loss = wei_criterion(nll_loss_list)
            tot_loss.backward()
            clip_grad_value_(model.parameters(), GRAD_CLIP)
            optimizer.step()
            if i % step == 0:
                print(i * BATCH_SIZE, end=' ', flush=True)
                util.log_and_print(nll_loss_list + [tot_loss])

        print("\n %.1f min,turns:%d  " %
              ((time() - start_time) / 60, epoch_now))

        pred_list = util.get_pred_list(model, val_iter)
        res = util.creterion_val(pred_list)
        util.log_and_print(res)
        tr.save(model.state_dict(), prodirectory + "/{}.pth".format(epoch_now))
示例#4
0
 def step(self, q, d, match, l2_ratio=0):
     """
     Do a training step.
     
     Parameters
     ----------
     q : torch.Tensor
         A batch of queries.
     d : torch.Tensor
         A batch of documents.
     match : torch.Tensor (dtype=torch.bool)
         A matrix (2D tensor) with match[i,j] indicating if q[i] match with d[j]
         
     Returns
     -------
     loss : torch.Tensor (size 1)
         The loss (negative mutual information) of the current batch.
     """
     self.zero_grad()
     qsign = torch.tanh(self.fq(q))
     dsign = torch.tanh(self.fd(d))
     sh = torch_soft_hamming(
         qsign[:, None], dsign[None, :])  #shape = (#queries, #documents)
     bins = self.kernel(sh)
     pos_cat = bins[match].mean(dim=0)
     neg_cat = bins[~match].mean(dim=0)
     loss = -mi_categorical_bernoulli(pos_cat, neg_cat, self.match_prob)
     loss.backward()
     clip_grad_value_(self.parameters(), 5)
     self.optim.step()
     return loss
示例#5
0
    def step(self, transitions):
        r"""Performs a single learning step.

        :param transitions: a list of :class:`rlcc.Transition`
        :type transitions: :class:`rlcc.Transition`
        """
        # Compute actor loss
        loss = self.loss(transitions)
        # Minimize the loss
        self.optimizer.zero_grad()
        loss.backward()
        # Clip the gradient
        if self.clip_norm:
            nn_utils.clip_grad_norm_(self.local.parameters(), self.clip_norm)
        if self.clip_value:
            nn_utils.clip_grad_value_(self.local.parameters(), self.clip_value)
        # Optimize
        self.optimizer.step()
        # Notify the observers
        for obs in self.observers:
            obs(transitions=transitions,
                local=self.local,
                loss=loss,
                learning_steps=self.learning_steps)
        # soft updates
        soft_update(self.target, self.local, self.tau)

        # update counts
        self.learning_steps += 1
示例#6
0
def train_gen_pg_each(policy, env, epoch, optimizer, num_clicks, recom_number, max_length, batch_size=256, total_size=10000):
    policy.train()
    env.eval()
    print('\nTRAINING : Epoch ' + str(epoch))
    all_costs = []
    logs = []
    decay=0.95
    last_time = time.time()
    if epoch>1:
        optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * decay
    print('Learning rate : {0}'.format(optimizer.param_groups[0]['lr']))
    for stidx in range(0, total_size, total_size):

        real_size = total_size
        
        batch_replay = ReplayMemory(env, policy, real_size, max_length, num_clicks, recom_number)
        batch_replay.gen_sample(real_size)
        click_batch, reward_batch, action_batch, prob_batch = Variable(batch_replay.clicks), Variable(batch_replay.rewards), Variable(batch_replay.actions), Variable(batch_replay.probs, requires_grad = True) 
        value_batch = env.value(reward_batch)
        loss = -(torch.log(prob_batch) * (value_batch)).sum()
        all_costs.append(loss.data.cpu().numpy())
        optimizer.zero_grad()
        loss.backward()
        clip_grad_value_(filter(lambda p: p.requires_grad, policy.parameters()), 1)
        optimizer.step()
        if len(all_costs) == 10000:
            logs.append( '{0} ; loss {1} ; seq/s {2}'.format(stidx, round(np.mean(all_costs),2), int(len(all_costs) * batch_size / (time.time() - last_time))))
            print(logs[-1])
            last_time = time.time()
            all_costs = []
    return all_costs, reward_batch.float().sum(1).mean().data.cpu().numpy()
示例#7
0
    def update(self, ob_no, ac_na, next_ob_no, reward_n, terminal_n):
        """
            Update the parameters of the critic.
            let sum_of_path_lengths be the sum of the lengths of the paths sampled from
                Agent.sample_trajectories
            let num_paths be the number of paths sampled from Agent.sample_trajectories
            arguments:
                ob_no: shape: (sum_of_path_lengths, ob_dim)
                next_ob_no: shape: (sum_of_path_lengths, ob_dim). The observation after taking one step forward
                reward_n: length: sum_of_path_lengths. Each element in reward_n is a scalar containing
                    the reward for each timestep
                terminal_n: length: sum_of_path_lengths. Each element in terminal_n is either 1 if the episode ended
                    at that timestep of 0 if the episode did not end
            returns:
                nothing
        """
        ob_no = ptu.from_numpy(ob_no)
        ac_na = ptu.from_numpy(ac_na).to(torch.long)
        next_ob_no = ptu.from_numpy(next_ob_no)
        reward_n = ptu.from_numpy(reward_n)
        terminal_n = ptu.from_numpy(terminal_n)

        #print(ob_no)

        qa_t_values = self.q_net(ob_no)
        q_t_values = torch.gather(qa_t_values, 1,
                                  ac_na.unsqueeze(1)).squeeze(1)

        # TODO compute the Q-values from the target network
        qa_tp1_values = self.q_net_target(next_ob_no)

        if self.double_q:
            # You must fill this part for Q2 of the Q-learning portion of the homework.
            # In double Q-learning, the best action is selected using the Q-network that
            # is being updated, but the Q-value for this action is obtained from the
            # target Q-network. See page 5 of https://arxiv.org/pdf/1509.06461.pdf for more details.
            q_tp1 = torch.gather(qa_tp1_values, 1,
                                 torch.argmax(qa_t_values,
                                              dim=1).unsqueeze(1)).squeeze(1)
        else:
            q_tp1, _ = qa_tp1_values.max(dim=1)

        # TODO compute targets for minimizing Bellman error
        # HINT: as you saw in lecture, this would be:
        #currentReward + self.gamma * qValuesOfNextTimestep * (not terminal)
        target = reward_n + self.gamma * (q_tp1 * (1 - terminal_n))
        target = target.detach()

        assert q_t_values.shape == target.shape
        loss = self.loss(q_t_values, target)

        self.optimizer.zero_grad()
        loss.backward()
        utils.clip_grad_value_(self.q_net.parameters(),
                               self.grad_norm_clipping)
        self.optimizer.step()

        return {
            'Training Loss': ptu.to_numpy(loss),
        }
示例#8
0
 def step(self, q, d, r, stage=1):
     """
     Do a training step.
     
     Parameters
     ----------
     q : torch.Tensor
         A batch of queries.
     d : torch.Tensor
         A batch of documents.
     r : torch.Tensor (dtype=torch.bool)
         A matrix (2D tensor) with r[i,j] indicating if q[i] match with d[j]
     stage : float (optional)
         The stage for computing the tanh(stage*beta*x). (default=1)
         
     Returns
     -------
     loss : torch.Tensor (size 1)
         The loss of HashNet.
     """
     self.zero_grad()
     zq = self.fq(q)
     zd = self.fd(d)
     bq = torch.tanh(stage * self.beta * zq)
     bd = torch.tanh(stage * self.beta * zd)
     sh = (bq[:, None] * bd[None, :]).sum(dim=2)
     ash = self.alpha * sh
     p = self.match_prob
     w = (r / p + ~r / (1 - p))
     losses = w * (softplus(ash) - r * ash)
     loss = losses.mean()
     loss.backward()
     clip_grad_value_(self.parameters(), 5)
     self.optim.step()
     return loss
示例#9
0
    def _step(self, loader: DataLoader, training: bool = True) -> float:
        running_loss = 0.0
        self.net.train() if training else self.net.eval()

        for _, (_, data) in enumerate(loader):
            inputs, labels = data
            inputs = inputs.to(self.dev)
            labels = labels.to(self.dev)

            if training:
                self.optimizer.zero_grad()

            outputs = self.net(inputs.float())
            loss = self.loss_function(outputs.squeeze(),
                                      labels.squeeze().float())

            if training:
                loss.backward()
                clip_grad_value_(self.net.parameters(), clip_value=0.05)
                self.optimizer.step()
            running_loss += loss.item() * self.batch_size

        if self.use_hvd:
            running_loss = hvd.allreduce(torch.tensor(running_loss),
                                         name="avg_loss").item()

        inputs.detach()
        labels.detach()
        return running_loss
示例#10
0
 def backward(self, batch):
     self.lmOpt.zero_grad()
     backwardAgent(batch,
                   device=self.config.device,
                   n_quant=self.config.N_QUANT_LM)
     clip_grad_value_(self.lm.parameters(), 10)
     self.lmOpt.step()
示例#11
0
 def step(self, q, d, match, l2_ratio=0.01, nbatch=None):
     """
     Do a training step.
     
     Parameters
     ----------
     q : torch.Tensor
         A batch of queries.
     d : torch.Tensor
         A batch of documents.
     match : torch.Tensor (dtype=torch.bool)
         A matrix (2D tensor) with match[i,j] indicating if q[i] match with d[j]
     l2_ratio : float (optional)
         The wanted ratio between the Fbeta Loss and L2 regularization. (default 0.01)
     nbatch : int or None (optional)
         Give the number of batch, this is uses for ramping. If None, this uses the
         final ramping value.
         
     Returns
     -------
     loss : torch.Tensor (size 1)
         The loss of the current batch.
     """
     self.zero_grad()
     loss = self.loss(q, d, match, l2_ratio=l2_ratio, nbatch=nbatch)
     loss.backward()
     clip_grad_value_(self.parameters(), 5)
     self.optim.step()
     return loss
示例#12
0
文件: model.py 项目: sgalkina/pixyz
    def train(self, train_x_dict={}, **kwargs):
        """Train the model.

        Parameters
        ----------
        train_x_dict : dict
            Input data.
        **kwargs

        Returns
        -------
        loss : torch.Tensor
            Train loss value

        """
        self.distributions.train()

        self.optimizer.zero_grad()
        loss = self.loss_cls.eval(train_x_dict, **kwargs)

        # backprop
        loss.backward()

        if self.clip_norm:
            clip_grad_norm_(self.distributions.parameters(), self.clip_norm)
        if self.clip_value:
            clip_grad_value_(self.distributions.parameters(), self.clip_value)

        # update params
        self.optimizer.step()

        return loss
示例#13
0
    def update(self, ob_no, ac_na, next_ob_no, reward_n, terminal_n):
        ob_no = ptu.from_numpy(ob_no)
        ac_na = ptu.from_numpy(ac_na).to(torch.long)
        next_ob_no = ptu.from_numpy(next_ob_no)
        reward_n = ptu.from_numpy(reward_n)
        terminal_n = ptu.from_numpy(terminal_n)

        qa_t_values = self.q_net(ob_no)
        q_t_values = torch.gather(qa_t_values, 1,
                                  ac_na.unsqueeze(1)).squeeze(1)
        qa_tp1_values = self.q_net_target(next_ob_no)

        if self.double_q:
            next_actions = self.q_net(next_ob_no).argmax(dim=1)
            q_tp1 = torch.gather(qa_tp1_values, 1,
                                 next_actions.unsqueeze(1)).squeeze(1)
        else:
            q_tp1, _ = qa_tp1_values.max(dim=1)

        target = reward_n + self.gamma * q_tp1 * (1 - terminal_n)
        target = target.detach()
        loss = self.loss(q_t_values, target)

        self.optimizer.zero_grad()
        loss.backward()
        utils.clip_grad_value_(self.q_net.parameters(),
                               self.grad_norm_clipping)
        self.optimizer.step()

        return {'Training Loss': ptu.to_numpy(loss)}
示例#14
0
    def train_step(
        self, inputs, labels, device="cpu", grad_clip_value=1, weight_clip_value=1
    ):
        self.optim.zero_grad()
        output = self.model(inputs.to(device))
        loss = self.criterion(output, labels.to(device))
        loss.backward()
        clip_grad_value_(self.model.parameters(), grad_clip_value)

        for p in self.model.parameters():
            if hasattr(p, "latent_"):
                p.data.copy_(p.latent_)

        self.optim.step()

        for p in self.model.parameters():
            if hasattr(p, "latent_"):
                p.latent_.copy_(p.data.clamp_(-weight_clip_value, weight_clip_value))

        pred = output.argmax(dim=1, keepdims=True)
        correct = (
            pred.eq(labels.to(device).view_as(pred)).sum().item() / labels.shape[0]
        ) * 100

        return loss, correct
示例#15
0
    def optimize(self, loss, clip_norm_args=None, clip_val_args=None):
        """Short summary.

        Parameters
        ----------
        loss : torch.Tensor
        clip_norm_args : list, tuple
            If provided the norm of the gradients will be clipped.
            First value represents the max. grad. value, second the norm
            (optional).
        clip_val_args : int
            If provided the gradients will be clipped in range
            (-clip_val_args, clip_val_args)
        Note
        ----------
        clip_norm_args and clip_val_args are mutually exclusive.
        """
        
        if clip_norm_args is not None and clip_val_args is not None:
            raise ValueError(
                "'clip_norm_args' and 'clip_val_args' are mutually exclusive.")

        self.optimizer.zero_grad()
        loss.backward()
        if clip_norm_args is not None:
            clip_grad_norm_(self.parameters, *clip_norm_args)
        if clip_val_args is not None:
            clip_grad_value_(self.parameters, clip_val_args)
        self.optimizer.step()
示例#16
0
def train(epoch, loss_fn, train_loader, model, optimizer, gclip=5, schd=None):
    model.train()
    loader = progress_bar(train_loader, parent=epoch)

    total_loss = 0

    for data, target in loader:
        if is_cuda:
            data, target = data.cuda(), target.cuda()
            
        optimizer.zero_grad()
        output, att, _ = model(data)
        loss = loss_fn([output, att], target)
        total_loss += loss.item()
        loss.backward()
        if gclip > 0:
            utils.clip_grad_value_(
                model.parameters(),
                gclip
            )
        optimizer.step()

        if schd and hasattr(schd, 'batch_step'):
            schd.batch_step()

        loader.comment = f'Loss: {loss.item():.4f}'
    
    return total_loss / len(train_loader)
示例#17
0
def get_objf(batch: Dict,
             model: AcousticModel,
             P: k2.Fsa,
             device: torch.device,
             graph_compiler: MmiTrainingGraphCompiler,
             is_training: bool,
             tb_writer: Optional[SummaryWriter] = None,
             global_batch_idx_train: Optional[int] = None,
             optimizer: Optional[torch.optim.Optimizer] = None):
    feature = batch['inputs']
    # at entry, feature is [N, T, C]
    feature = feature.permute(0, 2, 1)  # now feature is [N, C, T]
    assert feature.ndim == 3
    feature = feature.to(device)

    supervisions = batch['supervisions']
    supervision_segments, texts = encode_supervisions(supervisions,
                                                      model.subsampling_factor)

    loss_fn = LFMMILoss(graph_compiler=graph_compiler,
                        P=P,
                        den_scale=den_scale)

    grad_context = nullcontext if is_training else torch.no_grad

    with grad_context():
        nnet_output = model(feature)
        # nnet_output is [N, C, T]
        nnet_output = nnet_output.permute(0, 2,
                                          1)  # now nnet_output is [N, T, C]
        mmi_loss, tot_frames, all_frames = loss_fn(nnet_output, texts,
                                                   supervision_segments)

    if is_training:

        def maybe_log_gradients(tag: str):
            if (tb_writer is not None and global_batch_idx_train is not None
                    and global_batch_idx_train % 200 == 0):
                tb_writer.add_scalars(tag,
                                      measure_gradient_norms(model, norm='l1'),
                                      global_step=global_batch_idx_train)

        optimizer.zero_grad()
        (-mmi_loss).backward()
        maybe_log_gradients('train/grad_norms')
        clip_grad_value_(model.parameters(), 5.0)
        maybe_log_gradients('train/clipped_grad_norms')
        if tb_writer is not None and global_batch_idx_train % 200 == 0:
            # Once in a time we will perform a more costly diagnostic
            # to check the relative parameter change per minibatch.
            deltas = optim_step_and_measure_param_change(model, optimizer)
            tb_writer.add_scalars('train/relative_param_change_per_minibatch',
                                  deltas,
                                  global_step=global_batch_idx_train)
        else:
            optimizer.step()

    ans = -mmi_loss.detach().cpu().item(), tot_frames.cpu().item(
    ), all_frames.cpu().item()
    return ans
示例#18
0
def clip_grad_func(parameters, method: str, **kwargs):
    if method == 'norm':
        clip_grad_norm_(parameters, **kwargs)
    elif method == 'value':
        clip_grad_value_(parameters, **kwargs)
    else:
        raise ValueError("Wrong gradient clip type!")
示例#19
0
 def _train_batch(self, batch: CollateBatch) -> Dict[str, Any]:
     batch: Dict[str, torch.Tensor] = batch.to_device(
         device=self._cuda_device, non_blocking=True
     )
     output_dict = self._pytorch_model(**batch).pop("loss_info")
     loss = output_dict.get("batch-loss")
     loss.backward()
     # Gradient Clipping
     if self._grad_norm is not None:
         clip_grad_norm_(self._model.parameters(), self._grad_norm)
     if self._grad_clip is not None:
         clip_grad_value_(self._model.parameters(), self._grad_clip)
     # Update step
     self._perform_one_step()
     metrics = self._model.get_metrics()
     # Add metrics from output dict
     metrics.update(
         {k: v.item() if isinstance(v, torch.Tensor) else v for k, v in output_dict.items()}
     )
     # Add Learning rate
     if self._encoder_scheduler is not None:
         metrics["encoder_lr"] = self._encoder_scheduler.get_current_lr()[0]
         metrics["decoder_lr"] = self._decoder_scheduler.get_current_lr()[0]
     else:
         metrics["lr"] = self._scheduler.get_current_lr()[0]
     return metrics
示例#20
0
def run(config):
    model = get_model(config[MODEL_NAME], config[MODEL_PARAMS]).cuda()
    criterion = get_loss(config[LOSS_NAME], config[LOSS_PARAMS])
    optimizer = get_optimizer(config[OPTIM_NAME],
                              model.parameters(),
                              optimizer_params=config[OPTIM_PARAMS])

    last_epoch = -1
    scheduler = get_scheduler(config[SCHEDULER_NAME], optimizer, last_epoch,
                              config[SCHEDULER_PARAMS])

    datasets = {
        stage: CustomDataset(DATA_DIR, stage, config[FOLD_ID],
                             config[DATA_PREFIX], config[INPUT_SIZE])
        for stage in ['train', 'test']
    }

    dataloaders = {
        stage: get_dataloader(datasets[stage], config[BATCH_SIZE])
        for stage in ['train', 'test']
    }

    writer = SummaryWriter(config[TRAIN_DIR])
    clip_grad_value_(model.parameters(), 2.0)
    train(config, model, dataloaders, criterion, optimizer, scheduler, writer,
          last_epoch + 1)
示例#21
0
def train_epoch(epoch, args, model, dataloader_train, optimizer, scheduler, feature_map):
    # Set training mode for modules
    for _, net in model.items():
        net.train()
        
    batch_count = len(dataloader_train)
    total_loss = 0.0
    for batch_id, data in enumerate(dataloader_train):
        for _, net in model.items():
            net.zero_grad()
        
        loss = evaluate_loss(args, model, data, feature_map)

        loss.backward()
        total_loss += loss.data.item()

        # Clipping gradients
        if args.gradient_clipping:
            for _, net in model.items():
                clip_grad_value_(net.parameters(), 1.0)

        # Update params of rnn and mlp
        for _, opt in optimizer.items():
            opt.step()
        
        for _, sched in scheduler.items():
            sched.step()

        if args.log_tensorboard:
            log_value('train_batch_loss ' + args.fname, loss, batch_id + batch_count * epoch)

    return total_loss / batch_count
示例#22
0
    def iterate_func(engine, batch):
        optimizer.zero_grad()
        inputA, inputB, target, personA, personB, ind, _, _ = batch
        if len(inputA.shape) == len(inputB.shape) == 4:
            inputA = torch.unsqueeze(inputA, 0)
            inputB = torch.unsqueeze(inputB, 0)
        assert inputA.shape[1] == inputB.shape[1] == opt.sampleSeqLength, \
            ValueError(f"ind: {ind}, inputA {inputA.shape}, inputB {inputB.shape}, required seq lenth {opt.sampleSeqLength}")
        if torch.cuda.is_available():
            inputA = inputA.float().cuda()
            inputB = inputB.float().cuda()
            target = target.float().cuda()
            personA = personA.long().cuda()
            personB = personB.long().cuda()
        distance, outputA, outputB = model(inputA, inputB)
        contrast_loss = contrast_criterion(distance, target)
        class_loss_A = class_criterion_A(outputA, personA)
        class_loss_B = class_criterion_B(outputB, personB)
        loss = contrast_loss + class_loss_A + class_loss_B
        loss.backward()

        clip_grad_value_(model.parameters(),
                         clip_value=opt.gradClip or sys.maxsize)
        optimizer.step()
        return loss.item(), contrast_loss.item(), class_loss_A.item(
        ), class_loss_B.item()
示例#23
0
def train_lfmmi_one_iter(model, egs_file, den_fst_path, training_opts, feat_dim, 
    minibatch_size="64", use_gpu=True, lr=0.0001, weight_decay=0.25, frame_shift=0, print_interval=10):
    pkwrap.kaldi.InstantiateKaldiCuda()
    if training_opts is None:
        training_opts = pkwrap.kaldi.chain.CreateChainTrainingOptionsDefault()
    den_graph = pkwrap.kaldi.chain.LoadDenominatorGraph(den_fst_path, model.output_dim)
    criterion = pkwrap.chain.KaldiChainObjfFunction.apply
    if use_gpu:
        model = model.cuda()
    optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
    acc_sum = torch.tensor(0., requires_grad=False)
    for mb_id, merged_egs in enumerate(pkwrap.chain.prepare_minibatch(egs_file, minibatch_size)):
        features = pkwrap.kaldi.chain.GetFeaturesFromEgs(merged_egs)
#       1+frame_shift because we generated a context of 14, not 13 as required by the model
        features = features[:,1+frame_shift:1+140+25+frame_shift,:]
        features = features.cuda()
        output, xent_output = model(features)
        sup = pkwrap.kaldi.chain.GetSupervisionFromEgs(merged_egs)
        deriv = criterion(training_opts, den_graph, sup, output, xent_output)
        acc_sum.add_(deriv[0])
        if mb_id>0 and mb_id%print_interval==0:
            sys.stderr.write("Overall objf={}\n".format(acc_sum/print_interval))
            acc_sum.zero_()
        optimizer.zero_grad()
        deriv.backward()
        clip_grad_value_(model.parameters(), 5.0)
        optimizer.step()
    sys.stdout.flush()
    model = model.cpu()
    return model
示例#24
0
 def on_backward_end(self, *args, **kwargs):
     if self.__mode == "value":
         for md in self.__modules:
             clip_grad_value_(md.parameters(), **self.__kwargs)
     else:
         for md in self.__modules:
             clip_grad_norm_(md.parameters(), **self.__kwargs)
示例#25
0
def train_epoch(
        epoch, args, model, dataloader_train, optimizer,
        scheduler, feature_map, summary_writer=None):
    # Set training mode for modules
    for _, net in model.items():
        net.train()

    batch_count = len(dataloader_train)
    total_loss = 0.0
    for batch_id, data in enumerate(dataloader_train):
        for _, net in model.items():
            net.zero_grad()

        loss = evaluate_loss(args, model, data, feature_map)

        loss.backward()
        total_loss += loss.data.item()

        # Clipping gradients
        if args.gradient_clipping:
            for _, net in model.items():
                clip_grad_value_(net.parameters(), 1.0)

        # Update params of rnn and mlp
        for _, opt in optimizer.items():
            opt.step()

        for _, sched in scheduler.items():
            sched.step()

        if args.log_tensorboard:
            summary_writer.add_scalar('{} {} Loss/train batch'.format(
                args.note, args.graph_type), loss, batch_id + batch_count * epoch)

    return total_loss / batch_count
示例#26
0
def train(loader, model, crit, optimizer, lr_scheduler, opt, rl_crit=None):
    model.train()
    #model = nn.DataParallel(model)
    for epoch in range(opt["epochs"]):
        lr_scheduler.step()

        iteration = 0

        # If start self crit training
        if opt["self_crit_after"] != -1 and epoch >= opt["self_crit_after"]:
            sc_flag = True
            init_cider_scorer(opt["cached_tokens"])
        else:
            sc_flag = False


        for data in loader:
            #torch.cuda.synchronize()
            i3d_feats = data['i3d_feats'].squeeze(1) #.cuda()
            labels = data['labels'] #.cuda()
            masks = data['masks'] #.cuda()

            if not sc_flag:
                seq_probs, _ = model(i3d_feats, labels, 'train')
                loss = crit(seq_probs, labels[:, 1:], masks[:, 1:])
            else:
                seq_probs, seq_preds = model(
                    i3d_feats, mode='inference', opt=opt)
                reward = get_self_critical_reward(model, i3d_feats, data,
                                                  seq_preds)
                print(reward.shape)
                loss = rl_crit(seq_probs, seq_preds,
                               torch.from_numpy(reward).float()) #.cuda())
            
            loss.backward()
            clip_grad_value_(model.parameters(), opt['grad_clip'])
            optimizer.step()
            train_loss = loss.item()
            #torch.cuda.synchronize()

            iteration += 1
            
            if not sc_flag:
                print("iter %d (epoch %d), train_loss = %.6f" %
                      (iteration, epoch, train_loss))
            else:
                print("iter %d (epoch %d), avg_reward = %.6f" %
                      (iteration, epoch, np.mean(reward[:, 0])))
            

        if epoch % opt["save_checkpoint_every"] == 0:
            model_path = os.path.join(opt["checkpoint_path"],
                                      'model_%d.pth' % (epoch))
            model_info_path = os.path.join(opt["checkpoint_path"],
                                           'model_score.txt')
            torch.save(model.state_dict(), model_path)
            print("model saved to %s" % (model_path))
            with open(model_info_path, 'a') as f:
                f.write("model_%d, loss: %.6f\n" % (epoch, train_loss))
示例#27
0
def train_epoch(epoch,
                args,
                model,
                dataloader_train,
                optimizer,
                scheduler,
                feature_map,
                summary_writer=None):
    # Set training mode for modules
    for _, net in model.items():
        net.train()

    batch_count = len(dataloader_train)  # number of batches
    # print('number of batches: ', batch_count)
    # print('batch_count: ', batch_count)
    # print('len of train dataset: ', len(dataloader_train.dataset))

    y_preds = []
    y_trues = []

    total_loss = 0.0
    for batch_id, data in enumerate(dataloader_train):

        # print('train.py: batch_id: ', batch_id)
        # print('train.py: data: ', data)

        for _, net in model.items():
            net.zero_grad()

        loss, y_pred, y_true = evaluate_loss(args, model, data, feature_map)
        y_preds.extend(y_pred)
        y_trues.extend(y_true)

        loss.backward()
        total_loss += loss.data.item()

        # Clipping gradients
        if args.gradient_clipping:
            for _, net in model.items():
                clip_grad_value_(net.parameters(), 1.0)

        # Update params of rnn and mlp
        for _, opt in optimizer.items():
            opt.step()

        for _, sched in scheduler.items():
            sched.step()

        if args.log_tensorboard:
            summary_writer.add_scalar(
                '{} {} Loss/train batch'.format(args.note, args.graph_type),
                loss, batch_id + batch_count * epoch)

    y_preds = [1. if n >= 0.5 else 0. for n in y_preds]
    # print('y_trues: ', y_trues)
    # print('y_preds: ', y_preds)

    return total_loss / batch_count, accuracy_score(y_trues, y_preds)
示例#28
0
    def train(self):
        self.optimizer.schedule()
        self.loss.step()
        epoch = self.optimizer.get_last_epoch() + 1
        lr = self.optimizer.get_lr()

        self.ckp.write_log(
                '[Epoch {}]\tLearning rate: {:.2e}'.format(epoch, Decimal(lr)))
        self.loss.start_log()
        self.model.train()

        timer_data, timer_model = utility.timer(), utility.timer()
        loss_exits = None
        for batch, (lr, hr, _, idx_scale) in enumerate(self.loader_train):
            lr, hr = self.prepare(lr, hr)
            timer_data.hold()
            timer_model.tic()

            self.optimizer.zero_grad()
            sr = self.model(lr, idx_scale)
            loss = self.loss(sr, hr)
            if loss_exits is None and self.args.multi_exit:
                loss_exits = [0 for _ in range(len(sr))]
            if self.args.multi_exit:
                temp_loss = loss[1:]
                for i in range(len(temp_loss)):
                    loss_exits[i] += temp_loss[i] / self.args.print_every
                loss = loss[0]
            loss.backward()
            if self.args.gclip > 0:
                utils.clip_grad_value_(self.model.parameters(), self.args.gclip)
            self.optimizer.step()

            timer_model.hold()

            if (batch + 1) % self.args.print_every == 0:
                if loss_exits:
                    exits_loss_str = ""
                    exits_loss_str += "E0" + ": %.4f" % loss_exits[0].item()
                    for i in range(1, len(loss_exits)):
                        exits_loss_str += " E" + str(i) + ": %.4f" % loss_exits[i].item()
                    self.ckp.write_log('[{}/{}]\t{}\t{}\t{:.1f}+{:.1f}s'.format(
                            (batch + 1) * self.args.batch_size,
                            len(self.loader_train.dataset),
                            self.loss.display_loss(batch), exits_loss_str,
                            timer_model.release(), timer_data.release()))
                    loss_exits = None
                else:
                    self.ckp.write_log('[{}/{}]\t{}\t{:.1f}+{:.1f}s'.format(
                            (batch + 1) * self.args.batch_size,
                            len(self.loader_train.dataset),
                            self.loss.display_loss(batch),
                            timer_model.release(), timer_data.release()))

            timer_data.tic()

        self.loss.end_log(len(self.loader_train))
        self.error_last = self.loss.log[-1, -1]
示例#29
0
文件: optim.py 项目: pyro-ppl/pyro
 def _clip_grad(
     params: Union[Tensor, Iterable[Tensor]],
     clip_norm: Optional[Union[int, float]] = None,
     clip_value: Optional[Union[int, float]] = None,
 ) -> None:
     if clip_norm is not None:
         clip_grad_norm_(params, clip_norm)
     if clip_value is not None:
         clip_grad_value_(params, clip_value)
def train(model,
          data_loader,
          data_loader_test,
          optimizer,
          epoch=5,
          thres=0.5,
          weight=1,
          agg='mean',
          save_name=None,
          multilayer=True,
          local=False,
          graphsage=False):
    for i in range(epoch):
        min_loss = 10.
        total_loss = 0
        print('epoch: ', i)
        for index, d in enumerate(data_loader):
            try:
                model.train()
                model.to(Device)
                hidden_vec, label = model(d[0])
                loss = torch.sum(-1. * torch.log(hidden_vec[label == 1]))
                N = torch.sum(label == 1).item()
                # hard negative mining for loss back prop
                neg_loss = torch.sort(
                    -1. * torch.log(1 - hidden_vec[label == 0]).view(-1, ),
                    descending=True)[0][:3 * N]
                neg_N = len(neg_loss)
                loss += torch.sum(neg_loss)
                loss = loss / (N + neg_N)

                if math.isnan(loss.item()) or math.isinf(loss.item()):
                    # print (loss.item())
                    continue

                loss.backward()
                utils.clip_grad_value_(model.parameters(), 4)
                optimizer.step()
                optimizer.zero_grad()

                total_loss += loss.item()
                current_loss = total_loss / (index + 1)

                if index % 50 == 0:
                    print(f'current loss for index:{index} is: {current_loss}')
                # num_eval += 1
                # print ('num_eval:', num_eval)
                # print (f'training loss for epoch:{i} is: {total_loss/(index +1)}')
            except:
                continue
        path = f'{save_name}/checkpoints.pt'
        torch.save(model.state_dict(), path)
        if i >= epoch - 2:
            pr_rec = eval_model(model, data_loader_test)
            path = f'{save_name}/pr_rec'
            with open(path, 'wb') as f:
                pickle.dump(pr_rec, f)