def test_model(model, criterion, data_loader, mode='test'): val_preds = None val_labels = None avg_val_loss = 0. model.eval() with torch.no_grad(): for step, (images, labels) in enumerate(data_loader): images, labels = images.cuda(), labels.cuda() if val_labels is None: val_labels = labels.clone().squeeze(-1) else: val_labels = torch.cat((val_labels, labels.squeeze(-1)), dim=0) outputs = model(images) avg_val_loss += (criterion(outputs, labels.squeeze(-1)).item() / len(data_loader)) if val_preds is None: val_preds = outputs else: val_preds = torch.cat((val_preds, outputs), dim=0) val_labels, val_preds = (val_labels.data.cpu()).data.numpy(), ( val_preds.data.cpu()).data.numpy() acc = cal_score(val_labels, val_preds) if mode == 'oof': return val_labels, val_preds else: return avg_val_loss, acc
def train_model(model, optimizer, scheduler, data_loader, criterion, accumulate): model.train() avg_loss = 0 optimizer.zero_grad() for step, (images, labels) in enumerate(data_loader): images, labels = images.cuda(), labels.cuda() output_train = model(images) loss = criterion(output_train, labels.squeeze(-1)) with amp.scale_loss(loss / accumulate, optimizer) as scaled_loss: scaled_loss.backward() if (step + 1) % accumulate == 0: torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0, norm_type=2) optimizer.step() optimizer.zero_grad() scheduler.step() avg_loss += loss.item() / len(data_loader) return avg_loss
def test_gradient_optimization(self): startPoint = numpy.zeros(2, numpy.float) optimi = StandardOptimizer(function = Function2(), step = FRConjugateGradientStep(), criterion = criterion(ftol = 0.0000001, gtol=0.0001), x0 = startPoint, line_search = StrongWolfePowellRule()) assert_almost_equal(optimi.optimize(), numpy.array((2., -2)))
def test_hessian_optimization(self): startPoint = numpy.zeros(2, numpy.float) optimi = StandardOptimizer(function = Function(), step = NewtonStep(), criterion = criterion(iterations_max = 100, ftol = 0.0000001), x0 = startPoint, line_search = SimpleLineSearch()) assert_almost_equal(optimi.optimize(), numpy.array((2., -2)))
def test_gradient_optimization(self): opt = StandardOptimizer(function = self.function, x0 = self.x0, step = GradientStep(), line_search = FibonacciSectionSearch(min_alpha_step=0.000001), criterion = criterion(ftol = 0.00001, iterations_max = 1000)) optimum = opt.optimize() assert_array_almost_equal(optimum, numpy.array((4., 4., 3., -2.)))
def test_newton_optimization(self): opt = StandardOptimizer(function = self.function, x0 = self.x0, step = MarquardtStep(gamma = 10.), line_search = SimpleLineSearch(), criterion = criterion(ftol = 0.00001, iterations_max = 1000)) optimum = opt.optimize() assert_array_almost_equal(optimum, numpy.array((4., 4., 3., -2.)))
def train(self): if self.pretrained_model: start = self.pretrained_model + 1 else: start = 0 criterion = CriterionAll() criterion.cuda() best_miou = 0 # Data iterator for epoch in range(start, self.epochs): self.G.train() for i_iter, batch in enumerate(self.data_loader): i_iter += len(self.data_loader) * epoch # lr = adjust_learning_rate(self.g_lr, # self.g_optimizer, i_iter, self.total_iters) imgs, labels, edges = batch size = labels.size() imgs = imgs.cuda() labels = labels.cuda() if self.arch in __BA__: edges = edges.cuda() preds = self.G(imgs) c_loss = criterion(preds, [labels, edges]) labels_predict = preds[0][-1] else: labels_predict = self.G(imgs) c_loss = cross_entropy2d( labels_predict, labels.long(), reduction='mean') self.reset_grad() c_loss.backward() # Note:这里为了简便没有对优化器进行参数断点记录!!! self.g_optimizer.step() self.lr_scheduler.step(epoch=None) # info on tensorboard if (i_iter + 1) % self.tb_step == 0: # scalr info on tensorboard self.writer.add_scalar( 'cross_entrophy_loss', c_loss.data, i_iter) self.writer.add_scalar( 'learning_rate', self.g_optimizer.param_groups[0]['lr'], i_iter) # image info on tensorboard labels = labels[:, :, :].view(size[0], 1, size[1], size[2]) oneHot_size = (size[0], self.classes, size[1], size[2]) labels_real = torch.cuda.FloatTensor(torch.Size(oneHot_size)).zero_() labels_real = labels_real.scatter_(1, labels.data.long().cuda(), 1.0) label_batch_predict = generate_label(labels_predict, self.imsize) label_batch_real = generate_label(labels_real, self.imsize) img_combine = imgs[0] real_combine = label_batch_real[0] predict_combine = label_batch_predict[0] for i in range(1, self.batch_size): img_combine = torch.cat([img_combine, imgs[i]], 2) real_combine = torch.cat([real_combine, label_batch_real[i]], 2) predict_combine = torch.cat([predict_combine, label_batch_predict[i]], 2) all_combine = torch.cat([denorm(img_combine.cpu().data), real_combine, predict_combine], 1) self.writer.add_image('imresult/img-gt-pred', all_combine, i_iter) # self.writer.add_image('imresult/img', (img_combine.data + 1) / 2.0, i_iter) # self.writer.add_image('imresult/real', real_combine, i_iter) # self.writer.add_image('imresult/predict', predict_combine, i_iter) # Sample images in folder if (i_iter + 1) % self.sample_step == 0: # labels_sample = generate_label(labels_predict, self.imsize) compare_predict_color = generate_compare_results(imgs, labels_real, labels_predict, self.imsize) # save_image((labels_sample.data), osp.join(self.sample_path, '{}_predict.png'.format(i_iter + 1))) save_image((compare_predict_color.data), osp.join(self.sample_path, '{}_predict.png'.format(i_iter + 1))) print('Train iter={} of {} completed, loss={}'.format( i_iter, self.total_iters, c_loss.data)) print('----- Train epoch={} of {} completed -----'.format(epoch+1, self.epochs)) # miou = self.verifier.validation(self.G) score = self.verifier.validation(self.G) # oacc = score["Overall Acc: \t"] miou = score["Mean IoU : \t"] print("----------------- Total Performance --------------------") for k, v in score.items(): print(k, v) print("---------------------------------------------------") if miou > best_miou: best_miou = miou torch.save(self.G.state_dict(), osp.join( self.model_save_path, '{}_{}_G.pth'.format(str(epoch), str(round(best_miou, 4)))))
def train(args, model): model.train() weight = torch.ones(NUM_CLASSES) #weight[0] = 0 loader = DataLoader(CityScapes(args.datadir, input_transform, target_transform), num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True) val_loader = DataLoader(CityScapes_validation(args.datadir, input_transform, target_transform), num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True) if args.cuda: criterion = CrossEntropyLoss2d(weight.cuda()) #criterion=torch.nn.BCEWithLogitsLoss() else: criterion = CrossEntropyLoss2d(weight) optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5) ''' if args.model.startswith('FCN'): optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5) if args.model.startswith('PSP'): optimizer=SGD(filter(lambda p: p.requires_grad, model.parameters()), 1e-2,0.9,1e-4) #optimizer = SGD(model.parameters(), 1e-2, .9, 1e-4) if args.model.startswith('Seg'): optimizer = SGD(filter(lambda p: p.requires_grad, model.parameters()), 1e-3, .9) ''' print("Total steps:",len(loader)) best_loss=100 best_val_loss=100 best_acc=0 best_val_acc=0 for epoch in range(1, args.num_epochs+1): epoch_loss = [] iteration=1 train_acc=[] for step, (images, labels) in enumerate(loader): print("Iter:"+str(iteration)) iteration=iteration+1 if args.cuda: images = images.cuda() labels = labels.cuda() inputs = Variable(images) targets = Variable(labels) outputs=model(inputs) optimizer.zero_grad() loss = criterion(outputs, targets[:, 0,:,:])#Bx1xHxW loss.backward() optimizer.step() print(loss.item()) epoch_loss.append(loss.item()) acc_vec=[] for b in range(outputs.size()[0]): acc_vec.append(pixel_accuracy(torch.max(outputs[b,:,:,:],0)[1],targets[b,0,:,:])) acc=sum(acc_vec)/len(acc_vec) print("train_acc: "+str(acc)) train_acc.append(acc) if args.steps_loss > 0 and step>0 and step % args.steps_loss == 0: average = sum(epoch_loss) / len(epoch_loss) average_acc=sum(train_acc) / len(train_acc) epoch_loss=[] train_acc=[] if best_loss>average: best_loss=average #torch.save(model.state_dict(), "model_linknet34.pth") #print("Model saved!") if best_acc<average_acc: best_acc=average_acc print("loss: "+str(average)+" epoch: "+str(epoch)+", step: "+str(step)) print("best loss: "+str(best_loss)+" epoch: "+str(epoch)+", step: "+str(step)) print("train acc: "+str(average_acc)) print("best train acc: "+str(best_acc)) f=open("train_loss.txt","a") f.write(str(epoch)+" "+str(step)+" "+str(average)+" "+str(best_loss)+" "+str(average_acc)+" "+str(best_acc)+"\n") f.close() print("Best loss: "+str(best_loss)) print("Best val loss: "+str(best_val_loss)) print("best train acc: "+str(best_acc)) print("Best val acc: "+str(best_val_acc)) epoch_loss = [] val_acc=[] iteration=1 for step, (images, labels) in enumerate(val_loader): print("Val Iter:"+str(iteration)) iteration=iteration+1 if args.cuda: images = images.cuda() labels = labels.cuda() inputs = Variable(images) targets = Variable(labels) outputs=model(inputs) loss = criterion(outputs, targets[:, 0,:,:]) print(loss.item()) epoch_loss.append(loss.item()) val_acc_vec=[] for b in range(outputs.size()[0]): val_acc_vec.append(pixel_accuracy(torch.max(outputs[b,:,:,:],0)[1],targets[b,0,:,:])) acc=sum(val_acc_vec)/len(val_acc_vec) val_acc.append(acc) if args.steps_loss > 0 and step>0 and step % args.steps_loss == 0: average = sum(epoch_loss) / len(epoch_loss) average_acc=sum(val_acc) / len(val_acc) epoch_loss=[] val_acc=[] if best_val_loss>average: best_val_loss=average torch.save(model.state_dict(), "model_linknet34.pth") print("Model saved!") if best_val_acc<average_acc: best_val_acc=average_acc print("val loss: "+str(average)+" epoch: "+str(epoch)+", step: "+str(step)) print("best val loss: "+str(best_val_loss)+" epoch: "+str(epoch)+", step: "+str(step)) print("val acc: "+str(average_acc)) print("best val acc: "+str(best_acc)) f1=open("val_loss.txt","a") f1.write(str(epoch)+" "+str(step)+" "+str(average)+" "+str(best_val_loss)+" "+ str(average_acc)+" "+str(best_val_acc)+"\n") f1.close() print("Best val loss: "+str(best_val_loss)) print("Best val acc: "+str(best_val_acc))
#else: # label_raw = open('train_labels.txt', 'r').read().split() # labels = [int(label) for label in label_raw] # no_layers = 20 # hidden_dim = 5 # learn_rate = 0.01 # epoch = 60 # batch_size = 16 # vocab_size = 149 # data = torch.load('train_tensor.txt').double() hprev = torch.zeros(batch_size, hidden_dim).double() model_0 = model(no_layers, hidden_dim, batch_size, vocab_size, True) criterion = criterion() min_batch = torch.arange(0, data.shape[0], batch_size) print("\n.......Training Started.......\n") for e in range(epoch): index = sample(range(0, data.shape[0]), data.shape[0]) input_vect = data[index] label_int = [labels[i] for i in index] avg_epoch_loss = 0 for batch in min_batch: output, hprev = model_0.forward( input_vect[batch:batch + batch_size, :, :], hprev) loss = criterion.forward(output, label_int[batch:batch + batch_size]) gradLoss = criterion.backward(output,
def train(model, dataset, optimizer, criterion, model_name, split=[0.9, 0.1], batch_size=32, n_epochs=1, model_path='./', random_seed=None): # Create directory if doesn't exist model_dir = model_path + model_name if not os.path.exists(model_dir): os.makedirs(model_dir) # Logging: we save output mmessages in a log file log_path = os.path.join(model_dir, 'logs.log') utils.set_logger(log_path) # Dataset dataloaders = {} dataloaders['train'], dataloaders['val'] = splitDataLoader( dataset, split=split, batch_size=batch_size, random_seed=random_seed) # --- # If the validation loss reaches a plateau, we decrease the learning rate: scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10) #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=150, gamma=0.5) #scheduler = CosineWithRestarts(optimizer, T_max=40, eta_min=1e-7, last_epoch=-1) #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-7, last_epoch=-1) # Metrics metrics_path = os.path.join(model_dir, 'metrics.json') metrics = { 'model': model_dir, 'optimizer': optimizer.__class__.__name__, 'criterion': criterion.__class__.__name__, 'scheduler': scheduler.__class__.__name__, 'dataset_size': int(len(dataset)), 'train_size': int(split[0] * len(dataset)), 'test_size': int(split[1] * len(dataset)), 'n_epoch': n_epochs, 'batch_size': batch_size, 'learning_rate': [], 'train_loss': [], 'val_loss': [] } # Device: We use cuda on GPUs only device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Training since = time.time() dataset_size = { 'train': int(split[0] * len(dataset)), 'val': int(split[1] * len(dataset)) } best_loss = 0.0 for epoch in range(n_epochs): logging.info('-' * 30) epoch_time = time.time() # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 zernike_loss = 0.0 for _, sample in enumerate(dataloaders[phase]): # GPU support inputs = sample['input'].to(device) target = sample['target'].to(device) ############################################################## # Zero the parameter gradients # The backward() function accumulates gradients -> zero_grad() not to mix up gradients between minibatches optimizer.zero_grad() # forward: track history if only in train with torch.set_grad_enabled(phase == 'train'): # 1. Make prediction: ratio_estimation = model(inputs) #print(ratio_estimation) #print(ratio_estimation) #print(abs(ratio_estimation - target)) # 2. Compute the loss for the current batch: loss = criterion(torch.squeeze(ratio_estimation), torch.squeeze(target)) ############################################################################################## yhat = torch.squeeze(ratio_estimation) y = torch.squeeze(target) d = {} d['yhat'] = torch.flatten(yhat).detach().numpy().tolist() d['syhat'] = yhat.shape d['y'] = torch.flatten(y).detach().numpy().tolist() d['sy'] = y.shape print('yhat: ', torch.squeeze(ratio_estimation), 'y: ', torch.squeeze(target)) with open('../../data/yhat_y.txt', 'w+') as outfile: json.dump(d, outfile) ############################################################################################# # Perform backward propagation to update the weights: if phase == 'train': loss.backward() optimizer.step() running_loss += 1 * loss.item() * inputs.size(0) logging.info('[%i/%i] %s loss: %f' % (epoch + 1, n_epochs, phase, running_loss / dataset_size[phase])) # Update metrics metrics[phase + '_loss'].append(running_loss / dataset_size[phase]) #metrics['zernike_'+phase+'_loss'].append(zernike_loss / dataset_size[phase]) if phase == 'train': metrics['learning_rate'].append(get_lr(optimizer)) # Adaptive learning rate if phase == 'val': #scheduler.step() # If scheduler is ReduceLROnPlateau we need to give current validation loss: scheduler.step(metrics[phase + '_loss'][epoch]) # Save weigths if epoch == 0 or running_loss < best_loss: best_loss = running_loss model_path = os.path.join(model_dir, 'model.pth') torch.save(model.state_dict(), model_path) # Save metrics with open(metrics_path, 'w') as f: json.dump(metrics, f, indent=4) logging.info('[%i/%i] Time: %f s' % (epoch + 1, n_epochs, time.time() - epoch_time)) time_elapsed = time.time() - since logging.info('[-----] All epochs completed in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
def train(args, model): model.train() weight = torch.ones(22) weight[0] = 0 loader = DataLoader(VOC12(args.datadir, input_transform, target_transform), num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True) if args.cuda: criterion = CrossEntropyLoss2d(weight.cuda()) else: criterion = CrossEntropyLoss2d(weight) #optimizer = Adam(model.parameters()) optimizer=SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3) ''' if args.model.startswith('FCN'): optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5) if args.model.startswith('PSP'): optimizer = SGD(model.parameters(), 1e-2, .9, 1e-4) if args.model.startswith('Seg'): optimizer = SGD(model.parameters(), 1e-3, .9) ''' ''' if args.steps_plot > 0: board = Dashboard(args.port) ''' print(len(loader)) for epoch in range(1, args.num_epochs+1): epoch_loss = [] iteration=1 for step, (images, labels) in enumerate(loader): print("Iter:"+str(iteration)) iteration=iteration+1 if args.cuda: images = images.cuda() labels = labels.cuda() inputs = Variable(images) targets = Variable(labels) #change outputs = model(inputs) optimizer.zero_grad() loss = criterion(outputs, targets[:, 0]) loss.backward() optimizer.step() epoch_loss.append(loss.data[0]) if args.steps_plot > 0 and step % args.steps_plot == 0: image = inputs[0].cpu().data image[0] = image[0] * .229 + .485 image[1] = image[1] * .224 + .456 image[2] = image[2] * .225 + .406 ''' board.image(image, f'input (epoch: {epoch}, step: {step})') board.image(color_transform(outputs[0].cpu().max(0)[1].data), f'output (epoch: {epoch}, step: {step})') board.image(color_transform(targets[0].cpu().data), f'target (epoch: {epoch}, step: {step})') ''' #if args.steps_loss > 0 and step % args.steps_loss == 0: average = sum(epoch_loss) / len(epoch_loss) print("loss: "+str(average)+" epoch: "+str(epoch)+", step: "+str(step)) '''
testdata[j] = result if i == steps * BatchSize: break t = TRN(D_in, D_hid, D_out, D_future, D_inpseq, BatchSize, device) hx,cx = t.initialise_hidden_parameters() t = t.to(device) Act, Act_bar = t(x, (hx,cx)) btarget = torch.arange(Act.size(1), device = device, requires_grad = False) cur_target = btarget.repeat(D_inpseq, 1) fut_target = btarget.repeat(D_inpseq,D_future,1) crit = criterion(D_inpseq, alpha) loss, Avgloss = crit((Act, Act_bar), (cur_target,fut_target)) print(loss, Avgloss) Avgloss.backward() Model_Parameters = ModelParam(t) optimizer = torch.optim.Adam(Model_Parameters,lr=0.0005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.0005) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
def train(model, dataset, optimizer, criterion, split=[0.9, 0.1], batch_size=32, n_epochs=1, model_dir='./', random_seed=None, visdom=False): # Create directory if doesn't exist if not os.path.exists(model_dir): os.makedirs(model_dir) # Logging log_path = os.path.join(model_dir, 'logs.log') utils.set_logger(log_path) # Visdom support if visdom: vis = VisdomWebServer() # Dataset dataloaders = {} dataloaders['train'], dataloaders['val'] = splitDataLoader( dataset, split=split, batch_size=batch_size, random_seed=random_seed) # --- scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=150, gamma=0.1) #scheduler = CosineWithRestarts(optimizer, T_max=40, eta_min=1e-7, last_epoch=-1) #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-7, last_epoch=-1) # Metrics metrics_path = os.path.join(model_dir, 'metrics.json') metrics = { 'model': model_dir, 'optimizer': optimizer.__class__.__name__, 'criterion': criterion.__class__.__name__, 'scheduler': scheduler.__class__.__name__, 'dataset_size': int(len(dataset)), 'train_size': int(split[0] * len(dataset)), 'test_size': int(split[1] * len(dataset)), 'n_epoch': n_epochs, 'batch_size': batch_size, 'learning_rate': [], 'train_loss': [], 'val_loss': [], 'zernike_train_loss': [], 'zernike_val_loss': [] } # Zernike basis z_basis = torch.as_tensor(aotools.zernikeArray(100 + 1, 128, norm='rms'), dtype=torch.float32) # Device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Training since = time.time() dataset_size = { 'train': int(split[0] * len(dataset)), 'val': int(split[1] * len(dataset)) } best_loss = 0.0 for epoch in range(n_epochs): logging.info('-' * 30) epoch_time = time.time() # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 zernike_loss = 0.0 for _, sample in enumerate(dataloaders[phase]): # GPU support inputs = sample['image'].to(device) phase_0 = sample['phase'].to(device) # zero the parameter gradients optimizer.zero_grad() # forward: track history if only in train with torch.set_grad_enabled(phase == 'train'): # Network return phase and zernike coeffs phase_estimation = model(inputs) loss = criterion(torch.squeeze(phase_estimation), phase_0) # backward if phase == 'train': loss.backward() optimizer.step() running_loss += 1 * loss.item() * inputs.size(0) logging.info('[%i/%i] %s loss: %f' % (epoch + 1, n_epochs, phase, running_loss / dataset_size[phase])) # Update metrics metrics[phase + '_loss'].append(running_loss / dataset_size[phase]) #metrics['zernike_'+phase+'_loss'].append(zernike_loss / dataset_size[phase]) if phase == 'train': metrics['learning_rate'].append(get_lr(optimizer)) # Adaptive learning rate if phase == 'val': scheduler.step() # Save weigths if epoch == 0 or running_loss < best_loss: best_loss = running_loss model_path = os.path.join(model_dir, 'model.pth') torch.save(model.state_dict(), model_path) # Save metrics with open(metrics_path, 'w') as f: json.dump(metrics, f, indent=4) # Visdom update if visdom: vis.update(metrics) logging.info('[%i/%i] Time: %f s' % (epoch + 1, n_epochs, time.time() - epoch_time)) time_elapsed = time.time() - since logging.info('[-----] All epochs completed in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
def train(self): if self.pretrained_model: start = self.pretrained_model + 1 else: start = 0 criterion = CriterionAll() criterion.cuda() best_miou = 0 # Data iterator for epoch in range(start, self.epochs): self.G.train() for i_iter, batch in enumerate(self.data_loader): i_iter += len(self.data_loader) * epoch # lr = adjust_learning_rate(self.g_lr, # self.g_optimizer, i_iter, self.total_iters) imgs, labels, edges = batch size = labels.size() imgs = imgs.cuda() labels = labels.cuda() if self.arch in __BA__: edges = edges.cuda() preds = self.G(imgs) c_loss = criterion(preds, [labels, edges]) labels_predict = preds[0][-1] else: labels = labels.cuda() # oneHot_size = (size[0], self.classes, size[1], size[2]) # labels_real = torch.cuda.FloatTensor( # torch.Size(oneHot_size)).zero_() # labels_real = labels_real.scatter_( # 1, labels.data.long().cuda(), 1.0) labels_predict = self.G(imgs) c_loss = cross_entropy2d(labels_predict, labels.long(), reduction='mean') self.reset_grad() c_loss.backward() # 备注:这里为了渐简便没有对优化器进行参数断点记录!!! self.g_optimizer.step() self.lr_scheduler.step(epoch=None) # scalr info on tensorboard if (i_iter + 1) % self.tb_step == 0: self.writer.add_scalar('cross_entrophy_loss', c_loss.data, i_iter) self.writer.add_scalar( 'learning_rate', self.g_optimizer.param_groups[0]['lr'], i_iter) # # Sample images # if (i_iter + 1) % self.sample_step == 0: # labels_sample = generate_label( # labels_predict, self.imsize) # save_image(denorm(labels_sample.data), # osp.join(self.sample_path, '{}_predict.png'.format(i_iter + 1))) print('iter={} of {} completed, loss={}'.format( i_iter, self.total_iters, c_loss.data)) miou = self.verifier.validation(self.G) if miou > best_miou: best_miou = miou torch.save( self.G.state_dict(), osp.join( self.model_save_path, '{}_{}_G.pth'.format(str(epoch), str(round(best_miou, 4)))))
def train(args, model): model.train() weight = torch.ones(NUM_CLASSES) weight[0] = 0 loader = DataLoader(VOC12(args.datadir, input_transform, target_transform), num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True) if args.cuda: criterion = CrossEntropyLoss2d(weight.cuda()) #criterion=torch.nn.BCEWithLogitsLoss() else: criterion = CrossEntropyLoss2d(weight) if args.model.startswith('FCN'): optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5) if args.model.startswith('PSP'): optimizer = SGD(filter(lambda p: p.requires_grad, model.parameters()), 1e-2, 0.9, 1e-4) #optimizer = SGD(model.parameters(), 1e-2, .9, 1e-4) if args.model.startswith('Seg'): optimizer = SGD(filter(lambda p: p.requires_grad, model.parameters()), 1e-3, .9) print("Total images:", len(loader)) best_loss = 100 f = open("loss.txt", "a") for epoch in range(1, args.num_epochs + 1): epoch_loss = [] iteration = 1 for step, (images, labels) in enumerate(loader): print("Iter:" + str(iteration)) iteration = iteration + 1 if args.cuda: images = images.cuda() labels = labels.cuda() inputs = Variable(images) targets = Variable(labels) outputs = model(inputs) optimizer.zero_grad() loss = criterion(outputs, targets[:, 0]) loss.backward() optimizer.step() print(loss.data[0]) epoch_loss.append(loss.data[0]) if args.steps_loss > 0 and step > 0 and step % args.steps_loss == 0: average = sum(epoch_loss) / len(epoch_loss) epoch_loss = [] if best_loss > average: best_loss = average torch.save(model.state_dict(), "model_pspnet_VOC_2012_analysis.pth") print("Model saved!") f.write("loss: " + str(average) + " epoch: " + str(epoch) + ", step: " + str(step) + "\n") f.write("best loss: " + str(best_loss) + " epoch: " + str(epoch) + ", step: " + str(step) + "\n") print("loss: " + str(average) + " epoch: " + str(epoch) + ", step: " + str(step)) print("best loss: " + str(best_loss) + " epoch: " + str(epoch) + ", step: " + str(step)) print("Best loss: " + str(best_loss))