def train(trainloader, model, optimizer): model.train() train_loss = [] train_acc = [] for i, (features, labels) in enumerate(trainloader): data = Variable(features).to(device) target = Variable(labels).to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() _, predicted = torch.max(output.data, 1) correct = (predicted == labels).sum().item() accuracy = format(100 * correct / batch_size) train_acc.append(accuracy) train_loss.append(loss.item()) loss_train = np.mean(train_loss) acc = np.mean(train_acc) return loss_train, acc
def fit(epoch, model, data_loader, phase='training', volatile=False): optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) if phase == 'training': model.train() if phase == 'validation': model.eval() volatile = True running_loss = 0.0 running_correct = 0 for batch_idx, (data, target) in enumerate(data_loader): data, target = Variable(data, volatile), Variable(target) if phase == 'training': optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) running_loss += F.nll_loss(output, target, size_average=False).data preds = output.data.max(dim=1, keepdim=True)[1] running_correct += preds.eq(target.data.view_as(preds)).cpu().sum() if phase == 'training': loss.backward() optimizer.step() loss = running_loss / len(data_loader.dataset) accuracy = 100. * running_correct / len(data_loader.dataset) print( f'{phase} loss is {loss:{5}.{2}} and {phase} accuracy is {running_correct}/{len(data_loader.dataset)}{accuracy:{10}.{4}}' ) return loss, accuracy
def train(trainloader, model, optimizer, criterion): model.train() train_loss = [] correct = 0 total = 0 train_acc = [] for i, (features, labels) in enumerate(trainloader): data, target = Variable(features).cuda(), Variable(labels).cuda() optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() prediction = output.data.max(1)[1] accuracy = (float(prediction.eq(target.data).sum()) / float(batch_size)) * 100.0 train_acc.append(accuracy) train_loss.append(loss.data[0]) loss_train = np.mean(train_loss) acc = np.mean(train_acc) return loss_train, acc
def take_optimisation_step(self, optimizer, network, loss, clipping_norm=None, retain_graph=False): """Takes an optimisation step by calculating gradients given the loss and then updating the parameters""" if not isinstance(network, list): network = [network] # Reset network gradients to 0. optimizer.zero_grad() # Backpropagate and calculate gradients. loss.backward(retain_graph=retain_graph) self.logger.info("Loss -- {}".format(loss.item())) self.wandb_log(dict(loss=loss, episode_number=self.episode_number), step=self.global_step_number) if self.debug_mode: self.log_gradient_and_weight_information(network, optimizer) if clipping_norm is not None: for net in network: # Clip gradients to help stabilise training torch.nn.utils.clip_grad_norm_(net.parameters(), clipping_norm) # Finally, take optimization step. optimizer.step()
def take_optimisation_step(self, optimizer, network, loss, clipping_norm=None, retain_graph=False): """Takes an optimisation step by calculating gradients given the loss and then updating the parameters""" if not isinstance(network, list): network = [network] optimizer.zero_grad() #reset gradients to 0 loss.backward( retain_graph=retain_graph) #this calculates the gradients if self.config.log_loss: self.logger.info("Loss -- {}".format(loss.item())) # print('loss', loss.item()) if self.debug_mode: self.log_gradient_and_weight_information(network, optimizer) if clipping_norm is not None: for net in network: torch.nn.utils.clip_grad_norm_( net.parameters(), clipping_norm) #clip gradients to help stabilise training optimizer.step() #this applies the gradients
def train(): for epoch in range(epochs): resnet50.train() epoch_loss = 0 with tqdm(total=n_train, desc=f'Epoch {epoch+1}/{epochs}', unit='img') as pbar: for i_batch, batch_data in enumerate(train_loader): img = batch_data['img'].to(device=device) #print(vin.shape) label = batch_data['label'].to(device=device) pred = resnet50(img) pred = pred.squeeze() # print(pred.shape) # print(label.shape) loss = criterion(pred, label) epoch_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() pbar.update(img.shape[0]) print('training loss is:', epoch_loss / n_train) _, __, val_loss, ___ = test() scheduler.step(val_loss) writer.add_scalar('train_loss', epoch_loss, epoch) writer.add_scalar('val_loss', val_loss, epoch)
def train_epoch(model, data_loaders, optimizer, device, criterion, epoch, scheduler=None): acc = Acc("train_batch_acc") loss = Loss("train_batch_loss") model.train() data_size = len(data_loaders["train"]) i = 0 with torch.set_grad_enabled(True): for inputs, labels in data_loaders["train"]: optimizer.zero_grad() inputs = inputs.cuda(device) labels = labels.cuda(device) pred = model(inputs) c_loss = criterion(pred, labels) c_loss.backward() optimizer.step() loss.update(c_loss.item(), epoch * data_size + i) acc.update(pred, labels, epoch * data_size + i) i += 1 writer.add_scalar("learningRate", optimizer.param_groups[0]['lr'], (epoch + 1) * data_size) epoch_loss = loss.get() epoch_acc = acc.get() if scheduler: if type(scheduler) == torch.optim.lr_scheduler.ReduceLROnPlateau: pass else: scheduler.step() return epoch_acc, epoch_loss
def train_step(model, optimizer, positive_sample, negative_sample, subsampling_weight, mode, device="cuda"): model.train() optimizer.zero_grad() positive_sample = positive_sample.to(device) negative_sample = negative_sample.to(device) subsampling_weight = subsampling_weight.to(device) negative_score = model((positive_sample, negative_sample), mode=mode) negative_score = F.logsigmoid(-negative_score).mean(dim=1) positive_score = model(positive_sample) positive_score = F.logsigmoid(positive_score).squeeze(dim=1) positive_sample_loss = -(subsampling_weight * positive_score ).sum() / subsampling_weight.sum() negative_sample_loss = -(subsampling_weight * negative_score ).sum() / subsampling_weight.sum() loss = (positive_sample_loss + negative_sample_loss) / 2 loss.backward() optimizer.step() return loss.item()
def main(): net = Net().cuda() criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) optimizer = optim.Adam(net.parameters(), lr=0.001) # 训练 for epoch in range(10): running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs;data is a list of [inputs,labels] inputs, labels = data[0].cuda(), data[1].cuda() # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 2000 == 1999: print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 # 预测test样本 total = 0 correct = 0 for i, test_data in enumerate(testloader): with torch.no_grad(): test_inputs, test_labers = test_data[0].cuda( ), test_data[1].cuda() test_outputs = net(test_inputs) predicts = torch.max(test_outputs, 1)[1] total += test_labers.size(0) correct += (predicts == test_labers).sum() accuracy = correct / total * 100 print("Accuracy of the network on the 100 test images: {}%". format(accuracy)) print('Finished Training') PATH = './cifar_net.pth' torch.save(net.state_dict(), PATH)
def pretrain_generator(net_G, train_dl, optimizer, criterion, epochs): for e in range(epochs): loss_meter = AverageMeter() for data in tqdm.tqdm(train_dl): L, ab = data['L'].to(device), data['ab'].to(device) preds = net_G(L) loss = criterion(preds, ab) optimizer.zero_grad() loss.backward() optimizer.step() loss_meter.update(loss.item(), L.size(0)) print(f'Epoch {e +1}/{epochs} ') print(f'L1 Loss: {loss_meter.avg:.5f}')
def train(model, train_loader, optimizer, log_interval): model.train() for batch_idx, (image, label) in enumerate(train_loader): image = image.to(DEVICE) label = label.to(DEVICE) optimizer.zero_grad() output = model(image) loss = criterion(output, label) loss.backward() optimizer.step() if batch_idx % log_interval == 0: print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format( epoch, batch_idx * len(image), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item()))
def PPO_Update(policy, memory, optimizer, opts): for i in range(opts.epoch): state, action, reward, probs, done = memory.sample() new_probs = policy.get_prob(state, action) ratios = (new_probs - probs.squeeze()).exp() obs1 = ratios * reward obs2 = torch.clamp(ratios, 1 - opts.eps, 1 + opts.eps) * reward obs = -torch.min(obs1, obs2).mean() optimizer.zero_grad() obs.backward() optimizer.step()
def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader, weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD): torch.cuda.empty_cache() history = [] # Set up cutom optimizer with weight decay optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay) # Set up one-cycle learning rate scheduler sched = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr, epochs=epochs, steps_per_epoch=len(train_loader)) for epoch in range(epochs): # Training Phase model.train() train_losses = [] lrs = [] for batch in train_loader: loss = model.training_step(batch) train_losses.append(loss) loss.backward() # Gradient clipping if grad_clip: nn.utils.clip_grad_value_(model.parameters(), grad_clip) optimizer.step() optimizer.zero_grad() # Record & update learning rate lrs.append(get_lr(optimizer)) sched.step() # Validation phase result = evaluate(model, val_loader) result['train_loss'] = torch.stack(train_losses).mean().item() result['lrs'] = lrs model.epoch_end(epoch, result) history.append(result) return history
def train_model(train_dl, model): # define the optimization criterion = CrossEntropyLoss() optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) # enumerate epochs for epoch in range(500): # enumerate mini batches for i, (inputs, targets) in enumerate(train_dl): # clear the gradients optimizer.zero_grad() # compute the model output yhat = model(inputs) # calculate loss loss = criterion(yhat, targets) # credit assignment loss.backward() # update model weights optimizer.step()
def train(data_loader, model, optimizer, device): model.train() batch_MSEs = [] for data in tqdm(data_loader): # remember, we have image and targets # in our dataset class inputs = data[0] targets = data[1] labels = data[2] # move inputs/targets to cuda/cpu device inputs = inputs.to(device, dtype=torch.float) targets = targets.to(device, dtype=torch.float) labels = labels.to(device, dtype=torch.float) optimizer.zero_grad() outputs = model(inputs) "BCE_LOSS" loss = torch.nn.BCELoss()(outputs, targets) "MSE_LOSS" # loss = nn.MSELoss()(outputs, targets) "SSIM_LOSS" # criterion = 1 - pytorch_ssim.ssim()(outputs, targets) # loss = criterion() # crit = SSIMLoss().cuda(device) # ssim = crit(outputs, targets) # loss = ssim "MAE_LOSS" # loss = torch.abs(targets - outputs).mean() batch_MSEs.append(loss.item()) # backward step the loss loss.backward() # step optimizer optimizer.step() # print(loss.item()) batch_MSEs = np.array(batch_MSEs) epoch_loss = np.mean(batch_MSEs) print(epoch_loss) return epoch_loss
def update(engine, batch): model.train() y, x_label, y_pure, H = train_dataset.prepare_batch(batch, device=args.device) if args.with_pure_y and args.with_h: x_pred, y_pure_pred, H_pred = model(y, pure=y_pure, H=H, opp=True) loss_1 = criterion(x_pred, x_label) / args.gradient_accumulation_steps if args.loss_type == "MSELoss": loss_1 = loss_1 / x_pred.size(0) loss_noise = criterion2(y_pure_pred, y_pure) / y.size(0) / args.gradient_accumulation_steps loss_noise_h = criterion2(H_pred, H) / H.size(0) / args.gradient_accumulation_steps if args.only_l1: loss = loss_1 else: loss = loss_1 + loss_noise * args.noise_lambda + loss_noise_h output = (loss.item(), loss_1.item(), loss_noise.item(), loss_noise_h.item()) elif args.with_pure_y: x_pred, y_pure_pred = model(y, pure=y_pure if args.interpolation else None, opp=True) loss_1 = criterion(x_pred, x_label) / args.gradient_accumulation_steps loss_noise = criterion2(y_pure_pred, y_pure) / y.size(0) / args.gradient_accumulation_steps loss = loss_1 + loss_noise * args.noise_lambda output = (loss.item(), loss_1.item(), loss_noise.item()) elif args.with_h: x_pred, H_pred = model(y, opp=True) loss_1 = criterion(x_pred, x_label) / args.gradient_accumulation_steps loss_noise = criterion2(H_pred, H) / H.size(0) / args.gradient_accumulation_steps loss = loss_1 + loss_noise * args.noise_lambda output = (loss.item(), loss_1.item(), loss_noise.item()) else: x_pred = model(y) loss_1 = criterion(x_pred, x_label) / args.gradient_accumulation_steps loss = loss_1 output = (loss.item(), loss_1.item(), torch.zeros_like(loss_1).item()) loss.backward() if args.max_norm > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_norm) if engine.state.iteration % args.gradient_accumulation_steps == 0: optimizer.step() optimizer.zero_grad() return output
def loss_and_acc_on_epoch(self, batches_per_epoch, generator, train=True, num_skipped=20): mean_loss = 0 mean_accuracy = 0 sum_constraints, num_constraints = 0, 0 for sample_id, next_element in tqdm( enumerate(islice(generator, batches_per_epoch))): input_seq = next_element['input_seq'] constraint = next_element['constraint'] # input_seq_index is (seq_length, batch_size) input_seq_index = next_element['input_seq_index'] # todo requires_grad? input_seq, constraint, input_seq_index = ( Variable(torch.FloatTensor(input_seq).cuda()), Variable(torch.FloatTensor(constraint).cuda()), Variable(torch.LongTensor(input_seq_index).cuda())) optimizer.zero_grad() output = self((input_seq, constraint)) loss = mean_crossentropy_loss(output, input_seq_index, num_skipped=num_skipped, constraint=constraint) if train: loss.backward() optimizer.step() # compute mean loss and accuracy mean_loss += loss.data.mean() seq_accuracy, (sum_constraint, num_constraint) = accuracy( output_seq=output, targets_seq=input_seq_index, num_skipped=num_skipped, constraint=constraint) mean_accuracy += seq_accuracy sum_constraints += sum_constraint num_constraints += num_constraint return mean_loss / batches_per_epoch, mean_accuracy / batches_per_epoch, sum_constraints / num_constraints
def train(dataloader, model, criterion, optimizer, scheduler, epoch): model.train() print('epoch ' + str(epoch)) train_loss = 0 train_acc = 0 total = len(dataloader) start = time.time() toPilImage = transforms.ToPILImage( ) # transform tensor into PIL image to save for batch_num, (x, y) in enumerate(dataloader): # print((x.shape, y.shape)) x = x.to(device) y = y.to(device) x = x + torch.randn_like(x, device=device) * args.noise_sd # # output image # if i < 5: # # noisy_image = torch.clamp(x.cpu() + noise * noise_sd, min=0, max=1) # pil = toPilImage(x.cpu()) # pil.save("{}/img_n_{}_.png".format("./output", batch_num )) # if i == 5: # exit(0) output = model(x) loss = criterion(output, y) acc = accuracy(output, y) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() train_acc += acc scheduler.step() end = time.time() print('trainning time:', end - start, 'sec, loss: ', train_loss / total, 'acc: ', train_acc / total)
def train(train_loader): # 损失函数值 running_loss = 0.0 for i, data in enumerate(train_loader, 0): inputs, labels = data # 如果有gpu,则使用gpu inputs, labels = inputs.to(device), labels.to(device) # 梯度置零 optimizer.zero_grad() # 前向传播 output = net(inputs) # 损失函数 loss = criterion(output, labels) # 反向传播,权值更新 loss.backward() optimizer.step() running_loss += loss.item() # 每50个batch_size后打印一次损失函数值 if i % 100 == 99: print('%5d loss: %.3f' % (i + 1, running_loss / 100)) running_loss = 0.0
def train_step(self, model, optimizer, train_iterator, args): ''' A single train step. Apply back-propation and return the loss ''' model.train() optimizer.zero_grad() positive_sample, negative_sample, subsampling_weight, mode = next(train_iterator) if args['gpu']: positive_sample = positive_sample.cuda() negative_sample = negative_sample.cuda() subsampling_weight = subsampling_weight.cuda() negative_score = self.forward(model, (positive_sample, negative_sample), mode=mode) if args['negative_adversarial_sampling']: # In self-adversarial sampling, we do not apply back-propagation on the sampling weight negative_score = (F.softmax(negative_score * args['adversarial_temperature'], dim=1).detach() * F.logsigmoid(-negative_score)).sum(dim=1) else: negative_score = F.logsigmoid(-negative_score).mean(dim=1) positive_score = self.forward(model, positive_sample) positive_score = F.logsigmoid(positive_score).squeeze(dim=1) if args['uni_weight']: positive_sample_loss = - positive_score.mean() negative_sample_loss = - negative_score.mean() else: positive_sample_loss = - (subsampling_weight * positive_score).sum() / subsampling_weight.sum() negative_sample_loss = - (subsampling_weight * negative_score).sum() / subsampling_weight.sum() loss = (positive_sample_loss + negative_sample_loss) / 2 if args['regularization'] != 0.0: # Use L3 regularization for ComplEx and DistMult regularization = args.regularization * ( model.entity_embedding.norm(p=3) ** 3 + model.relation_embedding.norm(p=3).norm(p=3) ** 3 ) loss = loss + regularization regularization_log = {'regularization': regularization.item()} else: regularization_log = {} loss.backward() optimizer.step() log = { **regularization_log, 'positive_sample_loss': positive_sample_loss.item(), 'negative_sample_loss': negative_sample_loss.item(), 'loss': loss.item() } return log
) tic_run = time.time() best_test_seen, best_test_unseen, test_seen, test_unseen, Z = 0, 0, 0, 0, None for epoch in range(args.epochs): # train tic_epoch = time.time() model.train() optimizer.zero_grad() Z = model(Xseen) loss = F.nll_loss(Z[train_idx], Y[train_idx]) loss.backward() optimizer.step() train_time = time.time() - tic_epoch # eval model.eval() Z = model(X) train_acc = accuracy(Z[train_idx], Y[train_idx]) test_seen = accuracy(Z[test_idx_seen], Y[test_idx_seen]) test_unseen = accuracy(Z[test_idx_unseen], Y[test_idx_unseen]) best_test_seen = max(best_test_seen, test_seen) best_test_unseen = max(best_test_unseen, test_unseen) baselogger.info( f'epoch:{epoch} | loss:{loss:.4f} | train acc:{train_acc:.2f} | best_seen: {best_test_seen:.2f} | seen:{test_seen:.2f} | best_unseen: {best_test_unseen:.2f} | unseen:{test_unseen:.2f} | time:{train_time*1000:.1f}ms'
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) # Path parser.add_argument("--output_model_path", default="./models/classifier_model.bin", type=str, help="Path of the output model.") parser.add_argument("--output_lossfig_path", default="./models/loss.png", type=str, help="Path of the output model.") # Model options. parser.add_argument("--batch_size", type=int, default=32, help="Batch size.") parser.add_argument("--seq_length", type=int, default=128, help="Sequence length.") # Optimizer options. parser.add_argument("--learning_rate", type=float, default=2e-5, help="Learning rate.") parser.add_argument("--warmup", type=float, default=0.1, help="Warm up value.") # Training options. parser.add_argument("--dropout", type=float, default=0.5, help="Dropout.") parser.add_argument("--epochs_num", type=int, default=5, help="Number of epochs.") parser.add_argument("--report_steps", type=int, default=100, help="Specific steps to print prompt.") parser.add_argument("--seed", type=int, default=7, help="Random seed.") parser.add_argument("--device", type=str, default='cpu', help="Device use.") args = parser.parse_args() def set_seed(seed=7): random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True set_seed(args.seed) # 读取数据 train = pd.read_csv('../data5k/train.tsv', encoding='utf-8', sep='\t') dev = pd.read_csv('../data5k/dev.tsv', encoding='utf-8', sep='\t') test = pd.read_csv('../data5k/test.tsv', encoding='utf-8', sep='\t') # Load bert vocabulary and tokenizer bert_config = BertConfig('bert_model/bert_config.json') BERT_MODEL_PATH = 'bert_model' bert_tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=False) # 产生输入数据 processor = DataPrecessForSingleSentence(bert_tokenizer=bert_tokenizer) # train dataset seqs, seq_masks, seq_segments = processor.get_input( sentences=train['text_a'].tolist(), max_seq_len=args.seq_length) labels = train['label'].tolist() t_seqs = torch.tensor(seqs, dtype=torch.long) t_seq_masks = torch.tensor(seq_masks, dtype=torch.long) t_seq_segments = torch.tensor(seq_segments, dtype=torch.long) t_labels = torch.tensor(labels, dtype=torch.long) train_data = TensorDataset(t_seqs, t_seq_masks, t_seq_segments, t_labels) train_sampler = RandomSampler(train_data) train_dataloder = DataLoader(dataset=train_data, sampler=train_sampler, batch_size=args.batch_size) # dev dataset seqs, seq_masks, seq_segments = processor.get_input( sentences=dev['text_a'].tolist(), max_seq_len=args.seq_length) labels = dev['label'].tolist() t_seqs = torch.tensor(seqs, dtype=torch.long) t_seq_masks = torch.tensor(seq_masks, dtype=torch.long) t_seq_segments = torch.tensor(seq_segments, dtype=torch.long) t_labels = torch.tensor(labels, dtype=torch.long) dev_data = TensorDataset(t_seqs, t_seq_masks, t_seq_segments, t_labels) dev_sampler = RandomSampler(dev_data) dev_dataloder = DataLoader(dataset=dev_data, sampler=dev_sampler, batch_size=args.batch_size) # test dataset seqs, seq_masks, seq_segments = processor.get_input( sentences=test['text_a'].tolist(), max_seq_len=args.seq_length) labels = test['label'].tolist() t_seqs = torch.tensor(seqs, dtype=torch.long) t_seq_masks = torch.tensor(seq_masks, dtype=torch.long) t_seq_segments = torch.tensor(seq_segments, dtype=torch.long) t_labels = torch.tensor(labels, dtype=torch.long) test_data = TensorDataset(t_seqs, t_seq_masks, t_seq_segments, t_labels) test_sampler = RandomSampler(test_data) test_dataloder = DataLoader(dataset=test_data, sampler=test_sampler, batch_size=args.batch_size) # build classification model model = BertForSequenceClassification(bert_config, 2) # For simplicity, we use DataParallel wrapper to use multiple GPUs. if args.device == 'cpu': device = torch.device("cpu") else: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: print("{} GPUs are available. Let's use them.".format( torch.cuda.device_count())) model = nn.DataParallel(model) model = model.to(device) # evaluation function def evaluate(args, is_test, metrics='Acc'): if is_test: dataset = test_dataloder instances_num = test.shape[0] print("The number of evaluation instances: ", instances_num) else: dataset = dev_dataloder instances_num = dev.shape[0] print("The number of evaluation instances: ", instances_num) correct = 0 model.eval() # Confusion matrix. confusion = torch.zeros(2, 2, dtype=torch.long) for i, batch_data in enumerate(dataset): batch_data = tuple(t.to(device) for t in batch_data) batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels = batch_data with torch.no_grad(): logits = model(batch_seqs, batch_seq_masks, batch_seq_segments, labels=None) pred = logits.softmax(dim=1).argmax(dim=1) gold = batch_labels for j in range(pred.size()[0]): confusion[pred[j], gold[j]] += 1 correct += torch.sum(pred == gold).item() if is_test: print("Confusion matrix:") print(confusion) print("Report precision, recall, and f1:") for i in range(confusion.size()[0]): p = confusion[i, i].item() / confusion[i, :].sum().item() r = confusion[i, i].item() / confusion[:, i].sum().item() f1 = 2 * p * r / (p + r) if i == 1: label_1_f1 = f1 print("Label {}: {:.3f}, {:.3f}, {:.3f}".format(i, p, r, f1)) print("Acc. (Correct/Total): {:.4f} ({}/{}) ".format( correct / instances_num, correct, instances_num)) if metrics == 'Acc': return correct / instances_num elif metrics == 'f1': return label_1_f1 else: return correct / instances_num # training phase print("Start training.") instances_num = train.shape[0] batch_size = args.batch_size train_steps = int(instances_num * args.epochs_num / batch_size) + 1 print("Batch size: ", batch_size) print("The number of training instances:", instances_num) # 待优化的参数 param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup, t_total=train_steps) # 存储每一个batch的loss all_loss = [] all_acc = [] total_loss = 0.0 result = 0.0 best_result = 0.0 for epoch in range(1, args.epochs_num + 1): model.train() for step, batch_data in enumerate(train_dataloder): batch_data = tuple(t.to(device) for t in batch_data) batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels = batch_data # 对标签进行onehot编码 one_hot = torch.zeros(batch_labels.size(0), 2).long() '''one_hot_batch_labels = one_hot.scatter_( dim=1, index=torch.unsqueeze(batch_labels, dim=1), src=torch.ones(batch_labels.size(0), 2).long()) logits = model( batch_seqs, batch_seq_masks, batch_seq_segments, labels=None) logits = logits.softmax(dim=1) loss_function = CrossEntropyLoss() loss = loss_function(logits, batch_labels)''' loss = model(batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels) loss.backward() total_loss += loss.item() if (step + 1) % 100 == 0: print("Epoch id: {}, Training steps: {}, Avg loss: {:.3f}". format(epoch, step + 1, total_loss / 100)) sys.stdout.flush() total_loss = 0. #print("Epoch id: {}, Training steps: {}, Avg loss: {:.3f}".format(epoch, step+1, loss)) optimizer.step() optimizer.zero_grad() all_loss.append(total_loss) total_loss = 0. print("Start evaluation on dev dataset.") result = evaluate(args, False) all_acc.append(result) if result > best_result: best_result = result torch.save(model, open(args.output_model_path, "wb")) #save_model(model, args.output_model_path) else: continue print("Start evaluation on test dataset.") evaluate(args, True) print('all_loss:', all_loss) print('all_acc:', all_acc) # Evaluation phase. print("Final evaluation on the test dataset.") model.load_state_dict(torch.load(args.output_model_path)) evaluate(args, True) '''
def train(dataloader, model, criterion, optimizer, scheduler, epoch): model.train() print('epoch ' + str(epoch)) train_loss = 0.0 train_acc = 0.0 total = len(dataloader) start = time.time() toPilImage = transforms.ToPILImage( ) # transform tensor into PIL image to save for batch_num, (x, y) in enumerate(dataloader): x = x.to(device) y = y.to(device) # gauss noise training gauss_noise = torch.randn_like(x, device=device) * args.noise_sd # x_noise = x + torch.randn_like(x, device=device) * args.noise_sd # targeted noise training tmp_criterion = nn.CrossEntropyLoss() tmp_optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) classifier = PyTorchClassifier( model=model, clip_values=(min_pixel_value, max_pixel_value), loss=tmp_criterion, optimizer=tmp_optimizer, input_shape=(3, 32, 32), nb_classes=10, ) # generate random targets targets = art.utils.random_targets(y.cpu().numpy(), get_num_classes()) # calculate loss gradient grad = classifier.loss_gradient(x=x.cpu().numpy(), y=targets) * (-1.0) scaled_grad = torch.Tensor(grad * args.eps_step).to(device) # print((scaled_grad.shape, gauss_noise.shape, targets.shape)) # combine noise and targeted noise x_combine = x + (gauss_noise * (1.0 - args.k_value)) + (scaled_grad * args.k_value) model.zero_grad() output = model(x_combine) loss = criterion(output, y) acc = accuracy(output, y) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() train_acc += acc scheduler.step() end = time.time() print('trainning time:', end - start, 'sec, loss: ', train_loss / total, 'acc: ', train_acc / total) return train_loss / total, train_acc / total
model = nn.Linear(1, 1) # 损失函数 criterion = nn.MSELoss() # 随机梯度下降 optimizer = SGD(model.parameters(), lr=0.001) x_train = x.reshape(-1, 1).astype('float32') y_train = y.reshape(-1, 1).astype('float32') # 训练次数 train_times = 30000 for i in range(train_times): input = torch.from_numpy(x_train) labels = torch.from_numpy(y_train) output = model(input) optimizer.zero_grad() #梯度清零 loss = criterion(output, labels) loss.backward() #反向传播 optimizer.step() #更新参数 if (i % 100 == 0): plt.clf() #每 100次打印一下损失函数,看看效果 plt.scatter(x, y) plt.plot(x, output.data.numpy(), color="red") plt.pause(0.1) print('epoch {}, loss {:1.4f}'.format(i, loss.data.item())) plt.ioff() plt.show()
def train_model(classifier, criterion, optimizer, trainLoader, valLoader, epochs): loss_list = [] val_loss_list = [] correct_list = [] val_correct_list = [] for epoch in range(epochs): running_loss = 0.0 val_running_loss = 0.0 running_correct = 0.0 val_running_correct = 0.0 for i, data in enumerate(tqdm(trainLoader)): inputs, labels = data # Get the inputs and labels from the data loader inputs = inputs.to( device ) # input to device as our model is running in mentioned device. labels = labels.to(device) optimizer.zero_grad() # Zero the parameter gradients # Forward pass outputs = classifier(inputs) loss = criterion(outputs, labels) # Compute the loss # Backward pass optimizer.zero_grad() loss.backward() optimizer.step() # Parameter update # Calculate the running loss and correct running_loss += loss.item() _, predicted = torch.max(outputs.data, 1) running_correct += torch.sum(predicted == labels.data) else: with torch.no_grad(): for j, data in enumerate(valLoader): inputs, labels = data inputs = inputs.to( device ) # input to device as our model is running in mentioned device. labels = labels.to(device) # Forward pass outputs = classifier(inputs) # Calculate loss loss = criterion(outputs, labels) val_running_loss += loss.item() # Compute the running correct _, predicted = torch.max(outputs.data, 1) val_running_correct += torch.sum(predicted == labels.data) epoch_loss = running_loss / len(trainLoader) epoch_acc = running_correct.float() / len(trainLoader) loss_list.append(epoch_loss) correct_list.append( epoch_acc.cpu()) # convert to cpu to avoid error val_epoch_loss = val_running_loss / len(valLoader) val_epoch_acc = val_running_correct.float() / len(valLoader) val_loss_list.append(val_epoch_loss) val_correct_list.append( val_epoch_acc.cpu()) # convert to cpu to avoid error print('Epoch {}/{}'.format(epoch + 1, epochs)) print('Training Loss: {:.4f}'.format(epoch_loss)) print('Training Accuracy: {:.4f}'.format(epoch_acc)) print('Validation Loss: {:.4f}'.format(val_epoch_loss)) print('Validation Accuracy: {:.4f}'.format(val_epoch_acc)) return loss_list, correct_list, val_loss_list, val_correct_list
def train(model, train_dataloder, valid_dataloder, loss_function, optimizer,\ device, num_labels, save_path='./job_fine_tuned_bert.pth'): # 存储loss train_losses = [] valid_losses = [] avg_train_losses = [] avg_valid_losses = [] patience = 20 early_stopping = EarlyStopping(patience=patience, verbose=True) # 模型训练 for i in trange(EPOCHS, desc='Epoch'): model.train() # 训练 for step, batch_data in enumerate(train_dataloder): batch_data = tuple(t.to(device) for t in batch_data) batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels = batch_data logits = model(batch_seqs, batch_seq_masks, batch_seq_segments, labels=None) logits = torch.nn.functional.log_softmax(logits, dim=1) # loss_function = CrossEntropyLoss() loss = loss_function(logits, batch_labels) loss.backward() train_losses.append(loss.item()) print("\r step: %d / %d, loss: %f" % (step, len(train_dataloder), loss), end='') optimizer.step() optimizer.zero_grad() torch.cuda.empty_cache() model.eval() # 验证 for step, batch_data in enumerate(valid_dataloder): with torch.no_grad(): batch_data = tuple(t.to(device) for t in batch_data) batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels = batch_data logits = model(batch_seqs, batch_seq_masks, batch_seq_segments, labels=None) logits = torch.nn.functional.log_softmax(logits, dim=1) # loss_function = CrossEntropyLoss() loss = loss_function(logits, batch_labels) valid_losses.append(loss.item()) torch.cuda.empty_cache() train_loss = np.average(train_losses) valid_loss = np.average(valid_losses) avg_train_losses.append(train_loss) avg_valid_losses.append(valid_loss) if step % 20 == 0: print("train_loss:%f, valid_loss:%f" % (train_loss, valid_loss)) # 重置训练损失和验证损失 train_losses = [] valid_losses = [] early_stopping(valid_loss, model) if early_stopping.early_stop: print("Early Stopping") break torch.save(model, open(save_path, "wb")) # 绘制 loss 图 fig = plt.figure(figsize=(8, 6)) plt.plot(range(1, len(avg_train_losses) + 1), avg_train_losses, label='Training Loss') plt.plot(range(1, len(avg_valid_losses) + 1), avg_valid_losses, label='Validation Loss') # find the position of lowest validation loss minposs = avg_valid_losses.index(min(avg_valid_losses)) + 1 # plt.axvline(minposs, linestyle='--', color = 'r', lable='Early Stopping Checkpoint') plt.xlabel('epochs') plt.ylabel('loss') plt.grid(True) plt.legend() plt.tight_layout() plt.show() fig.savefig('loss_plot.png', bbox_inches='tight') return model
def train(dataloader, model,criterion, optimizer, scheduler, epoch): model.train() print('epoch ' + str(epoch)) train_loss = 0.0 train_acc = 0.0 total = len(dataloader) start = time.time() toPilImage = transforms.ToPILImage() # transform tensor into PIL image to save for batch_num, (x, y) in enumerate(dataloader): x = x.to(device) y = y.to(device) # gauss noise training gauss_noise = torch.randn_like(x, device=device) * args.noise_sd # x_noise = x + torch.randn_like(x, device=device) * args.noise_sd # targeted noise training tmp_criterion = nn.CrossEntropyLoss() tmp_optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) classifier = PyTorchClassifier( model=model, clip_values=(min_pixel_value, max_pixel_value), loss=tmp_criterion, optimizer=tmp_optimizer, input_shape=(3, 32, 32), nb_classes=10, ) # all other classes targets = [] y_np = y.cpu().numpy() for i in range(y.shape[0]) : targets.append( np.expand_dims( np.random.permutation( np.delete(np.arange(get_num_classes()), y_np[i]) ), axis=0 ) ) # print(targets[0].shape) targets = np.concatenate(targets) # print(targets.shape) # exit(0) mix_noise = torch.zeros_like(x) for t in range(targets.shape[1]): # generate random targets # targets = art.utils.random_targets(y.cpu().numpy(), get_num_classes()) # calculate loss gradient # print(np.squeeze(targets[:,t]).shape) # exit() y_slice = np.squeeze(targets[:,t]) y_oh = np.zeros((y_slice.size, get_num_classes())) y_oh[np.arange(y_slice.size), y_slice] = 1 grad = classifier.loss_gradient(x=x.cpu().numpy(), y=y_oh) * (-1.0) scaled_grad = torch.Tensor(grad * args.eps_step).to(device) mix_noise += scaled_grad model.zero_grad() tmp_optimizer.zero_grad() # print((scaled_grad.shape, gauss_noise.shape, targets.shape)) # combine noise and targeted noise x_combine = x + (gauss_noise * (1.0 - args.k_value)) + (mix_noise * args.k_value) model.zero_grad() output = model(x_combine) loss = criterion(output, y) acc = accuracy(output, y) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() train_acc += acc scheduler.step() end = time.time() print('trainning time:',end - start,'sec, loss: ', train_loss/total, 'acc: ', train_acc/total) return train_loss/total, train_acc/total
fig = plt.figure() ax = mp3.Axes3D(fig) for i, opt in enumerate(optims): x = Variable(torch.normal( torch.zeros(2), torch.FloatTensor([1, 1])) + torch.FloatTensor([9, 9]), requires_grad = True ) optimizer = opt([x, ], lr = lrs[i]) x_start = x.data.clone() pos = [] for j in range(max_iter): point = x.data.clone() pos.append(point.numpy()) y = func_to_use(x) y.backward(retain_graph = True) optimizer.step() # 在不传入closure(某个可以自动绑定外部变量的函数)时只更新一步,传入closure更新多步 # 如果没有closure,由于backward已经求出了x的梯度,则可以使用这个梯度进行线搜索得到步长,并移动x(更新) optimizer.zero_grad() # 一般的习惯是,先进行zero_grad,再重新开始计算 # zero_grad的作用是,将之前的grad清零(否则会累加) # step的作用: print("Optimizer %s, iteration %d / %d, x = "%(opt.__name__, j, max_iter), x.data, "y = ", float(y.data)) ## 绘图 pos = np.array(pos) curve_vals = np.array([func_to_use(p) for p in pos]) # print(pos) xs = pos[:, 0] ys = pos[:, 1] # print(xs.shape, ys.shape, curve_vals.shape)
def train(epoch, data): net.train().to(device) # zero the parameter gradients optimizer.zero_grad() inputs, labels = data # print(type(inputs)) inputs = torch.from_numpy(np.asarray(inputs).astype(np.float32)) permutation = torch.randperm(inputs.size()[0]) running_loss = 0 # print(inputs.size()[0]) count = 0 batch_losses = [] for batch_idx in range(0, inputs.size()[0], BATCH_SIZE): t0 = time.time() count += 1 optimizer.zero_grad() indices = permutation[batch_idx:batch_idx + BATCH_SIZE] batch_x, batch_y = inputs[indices], labels[indices] # print(batch_x.shape) batch_x = batch_x.reshape(batch_x.size()[0], 1, batch_x.size()[1], batch_x.size()[2]) # print("###### ", batch_x.shape) outputs = net(batch_x.to(device)).to(device) # print(outputs.shape) loss = criterion(outputs.to(device), batch_y.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() batch_losses.append(loss.item()) sys.stdout.write('\r') sys.stdout.write(" Train data epoch %d [%-100s] %d/%d \t Loss:%f" % (epoch, '=' * int( (batch_idx / inputs.size()[0]) * 100), batch_idx, inputs.size()[0], loss.item())) sys.stdout.flush() time.sleep(0.25) if batch_idx % inputs.size()[0] == 0: test_output = net(batch_x.to(device)).to(device) pred_y = torch.argmax(test_output, dim=1) print( float( np.array([(x == y) for x, y in zip(batch_y, pred_y) ]).astype(int).sum()) / float(batch_y.size()[0])) accuracy = float( np.array([(x == y) for x, y in zip(batch_y, pred_y) ]).astype(int).sum()) / float(batch_y.size()[0]) print( "numerateur:", float( np.array([(x == y) for x, y in zip(batch_y, pred_y) ]).astype(int).sum())) print('Epoch: ', epoch, '| train loss: %.4f' % loss.cpu().data.numpy(), '| train accuracy: %.2f' % accuracy) print("\n") print('Epoch {}, loss {}, took {} seconds'.format(epoch, loss.item(), time.time() - t0)) print("\n")
def train_model(classifier, criterion, optimizer, trainLoader, valLoader, epochs): loss_list = [] val_loss_list = [] correct_list = [] val_correct_list = [] for epoch in range(epochs): running_loss = 0.0 val_running_loss = 0.0 running_correct = 0.0 val_running_correct = 0.0 for i, data in enumerate(tqdm(trainLoader)): inputs, labels = data # Get the inputs and labels from the data loader optimizer.zero_grad() # Zero the parameter gradients # Forward pass outputs = classifier(inputs) loss = criterion(outputs, labels) # Compute the loss # Backward pass optimizer.zero_grad() loss.backward() optimizer.step() # Parameter update # Calculate the running loss and correct running_loss += loss.item() _, predicted = torch.max(outputs.data, 1) running_correct += torch.sum(predicted == labels.data) else: with torch.no_grad(): for j, data in enumerate(valLoader): inputs, labels = data # Forward pass outputs = classifier(inputs) # Calculate loss loss = criterion(outputs, labels) val_running_loss += loss.item() # Compute the running correct _, predicted = torch.max(outputs.data, 1) val_running_correct += torch.sum(predicted == labels.data) epoch_loss = running_loss / len(trainLoader) # loss per epoch epoch_acc = running_correct.float() / len( trainLoader.dataset) # accuracy per epoch loss_list.append(epoch_loss) correct_list.append(epoch_acc) val_epoch_loss = val_running_loss / len( valLoader) # loss per epoch val_epoch_acc = val_running_correct.float() / len( valLoader.dataset) # accuracy per epoch val_loss_list.append(val_epoch_loss) val_correct_list.append(val_epoch_acc) print('Epoch {}/{}'.format(epoch + 1, epochs)) print('Training Loss: {:.4f}'.format(epoch_loss)) print('Training Accuracy: {:.4f}'.format(epoch_acc)) print('Validation Loss: {:.4f}'.format(val_epoch_loss)) print('Validation Accuracy: {:.4f}'.format(val_epoch_acc)) plt.style.use('ggplot') plt.plot(loss_list, label='Training Loss') plt.plot(val_loss_list, label='Validation Loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.title('Training and Validation Loss') plt.legend() plt.savefig('data/loss_25_Adam_LeNet2.png') # MODIFY OUTPUT NAME plt.show() plt.style.use('ggplot') plt.plot(correct_list, label='Training Accuracy') plt.plot(val_correct_list, label='Validation Accuracy') plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.title('Training and Validation Accuracy') plt.legend() plt.savefig('data/accuracy_25_Adam_LeNet2.png') # MODIFY OUTPUT NAME plt.show()