示例#1
0
def train():
    # Turn on training mode which enables dropout.
    print('Load training data')
    model.train()
    if hasattr(model.rnn, 'step_slope'):
        model.rnn.step_slope = step_slope
    total_loss = 0
    start_time = time.time()
    hidden = model.init_hidden(args.batch_size)
    # Shuffle order of talks
    train_data = data_shuffle(datafile_train)
    print('Start training')
    for (data,targets,batch) in data_producer(train_data, args.batch_size, args.bptt, cuda=args.cuda, use_durs=args.use_durs):
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        optimizer.zero_grad()
        output, hidden = model(data, hidden)
        if args.tier=='combined':
            loss, loss_phone, loss_word = model.criterion(output, targets)
        else:
            loss = model.criterion(output, targets)

        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        # for p in model.parameters():
        #     p.data.add_(-lr, p.grad.data)
        optimizer.step()

        total_loss += loss.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.6f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data[0]) // args.batch_size // args.bptt, lr,
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
示例#2
0
def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    phone_loss = 0; word_loss = 0
    hidden = model.init_hidden(eval_batch_size)
    for (data,targets,batch) in data_producer(data_source, eval_batch_size, args.bptt, cuda=args.cuda, use_durs=args.use_durs, evaluation=True):
        output, hidden = model(data, hidden)
        if args.tier=='combined':
            loss, loss_phone, loss_word = model.criterion(output, targets)
            total_loss += loss.data
            phone_loss += loss_phone.data
            word_loss += loss_word.data
        else:
            total_loss += model.criterion(output, targets).data
        hidden = repackage_hidden(hidden)
    if args.tier=='combined':
        return (total_loss[0]/(batch+1), phone_loss[0]/(batch+1), word_loss[0]/(batch+1))
    else:
        return total_loss[0]/(batch+1)
示例#3
0
 def test_forward_and_backward(self):
     # Note: model and dataset all transfer to CUDA device.
     batches = self.loader.load(self.trainset,
                                batch_size=10,
                                shuffle=True,
                                to_tensor=True,
                                to_cuda=True)
     model = Net(word_vocab_size=self.params.word_vocab_size,
                 tag_vocab_size=self.params.tag_vocab_size,
                 embedding_dim=self.params.embedding_dim,
                 lstm_hidden_dim=self.params.lstm_hidden_dim).cuda()
     for batch in batches:
         inputs, targets = batch
         outputs = model(inputs)
         loss = criterion(outputs, targets)
         loss.backward()
         self.logger.debug('loss: {}'.format(loss.item()))
         break
def evaluate_model(epoch, history=None):
    model.eval()
    loss = 0
    dev_pred_1 = []  # logtic 阈值0.3
    dev_pred_2 = []  # logtic 阈值0.6
    with torch.no_grad():  # try finally的简写形式
        for img_batch, mask_batch, gaussian_batch, regr_batch in tqdm(
                dev_loader, desc="验证中"):
            img_batch = img_batch.to(device)
            mask_batch = mask_batch.to(device)
            gaussian_batch = gaussian_batch.to(device)
            regr_batch = regr_batch.to(device)

            output = model(img_batch)

            loss += criterion(output,
                              mask_batch,
                              gaussian_batch,
                              regr_batch,
                              size_average=False).data
            output = output.data.cpu().numpy()
            for out in output:
                coords_1 = extract_coords(out, threshold=0.5)
                coords_2 = extract_coords(out, threshold=0.6)
                s_1 = coords2str(coords_1)
                s_2 = coords2str(coords_2)
                dev_pred_1.append(s_1)
                dev_pred_2.append(s_2)

    loss /= len(dev_loader.dataset)
    df_dev_pred_1['PredictionString'] = dev_pred_1
    df_dev_pred_2['PredictionString'] = dev_pred_2

    if history is not None:
        history.loc[epoch, 'dev_loss'] = loss.cpu().numpy()

    print('Dev loss: {:.4f}'.format(loss))
    mAP_1 = calculate_mAP(valid_df=df_dev_pred_1, train_df=df_dev)
    mAP_2 = calculate_mAP(valid_df=df_dev_pred_2, train_df=df_dev)
    print('mAP threshold: 0.5:', mAP_1)
    print('mAP threshold: 0.6:', mAP_2)
    mAP_history.loc[epoch, '0.5'] = mAP_1
    mAP_history.loc[epoch, '0.6'] = mAP_2
def train_model(epoch, history=None):
    model.train()
    loss_all = 0
    mask_focal_loss_all = 0
    regr_loss_all = 0
    for batch_idx, (img_batch, mask_batch, gaussian_mask_batch,
                    regr_batch) in enumerate(tqdm(train_loader, desc="训练中")):
        img_batch = img_batch.to(device)
        mask_batch = mask_batch.to(device)
        gaussian_mask_batch = gaussian_mask_batch.to(device)
        regr_batch = regr_batch.to(device)

        optimizer.zero_grad()
        output = model(img_batch)

        loss, mask_focal_loss, regr_loss = criterion(output,
                                                     mask_batch,
                                                     gaussian_mask_batch,
                                                     regr_batch,
                                                     split_loss=True)
        if history is not None:
            history.loc[epoch + batch_idx / len(train_loader),
                        'train_loss'] = loss.data.cpu().numpy()

        loss.backward()
        loss_all += loss.data
        mask_focal_loss_all += mask_focal_loss.data
        regr_loss_all += regr_loss.data

        optimizer.step()
        exp_lr_scheduler.step()  # 学习率衰减

    print(
        '\nTrain Epoch: {} \tLR: {:.6f}\tLoss: {:.6f}\tbinary loss {:.6f}\tregression loss {:.6f}'
        .format(epoch,
                optimizer.state_dict()['param_groups'][0]['lr'],
                loss_all / len(train_loader),
                mask_focal_loss_all / len(train_loader),
                regr_loss_all / len(train_loader)))
示例#6
0
model = opts.model(base_model, dataset_train.num_training_classes).cuda()
model_weights, model_biases, base_model_weights, base_model_biases = [[p for k, p in model.named_parameters() if p.requires_grad and ('bias' in k) == is_bias and ('base' in k) == is_base] for is_base in [False, True] for is_bias in [False, True]]

base_model_lr_mult = model.optimizer_params.pop('base_model_lr_mult', 1.0)
optimizer = model.optimizer([dict(params = base_model_weights, lr = base_model_lr_mult * model.optimizer_params['lr']), dict(params = base_model_biases, lr = base_model_lr_mult * model.optimizer_params['lr'], weight_decay = 0.0), dict(params = model_biases, weight_decay = 0.0)], **model.optimizer_params)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, **model.lr_scheduler_params)

log = open(opts.log, 'w', 0)
for epoch in range(opts.epochs):
	scheduler.step()
	model.train()
	loss_all, norm_all = [], []
	for batch_idx, batch in enumerate(loader_train if model.criterion is not None else []):
		tic = time.time()
		images, labels = [tensor.cuda() for tensor in batch]
		loss = model.criterion(model(images), labels)
		loss_all.append(float(loss))
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
		print('train {:>3}.{:05}  loss  {:.04f}   hz {:.02f}'.format(epoch, batch_idx, loss_all[-1], len(images) / (time.time() - tic)))
	log.write('loss epoch {}: {:.04f}\n'.format(epoch, torch.Tensor(loss_all or [0.0]).mean()))
	
	if epoch < 10 or epoch % 5 == 0 or epoch == opts.epochs - 1:
		model.eval()
		embeddings_all, labels_all = [], []
		for batch_idx, batch in enumerate(loader_eval):
			tic = time.time()
			images, labels = [tensor.cuda() for tensor in batch]
			with torch.no_grad():
				output = model(images)
示例#7
0
                                               num_workers=8,
                                               batch_size=opts.batch,
                                               drop_last=True)
    model.train()

    # batch train
    scheduler.step()
    loss_all = []
    for batch_idx, batch in enumerate(
            loader_train if model.criterion is not None else []):
        a_images, p_images, n_images, p_w, n_w = [
            torch.autograd.Variable(tensor.cuda()) for tensor in batch
        ]
        loss = model.criterion(model(a_images),
                               model(p_images),
                               model(n_images),
                               p_w,
                               n_w,
                               margin=opts.margin)
        loss_all.append(loss.data.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print('loss epoch {}: {:.04f}'.format(epoch, np.mean(loss_all)))
    log.write('loss epoch {}: {:.04f}\n'.format(epoch, np.mean(loss_all)))

    # evaluate on test set
    if epoch < 10 or (epoch + 1) % 5 == 0 or (epoch + 1) == opts.epochs:
        model.eval()
        embeddings_all, labels_all = get_dataset_embeddings(
            model, dataset_eval)
        rec = [