def init_optimizer(self): self.optimizer = SGD(self.model.parameters(), lr=INIT_LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) self.optimizer_scheduler = lr_scheduler.StepLR(self.optimizer, step_size=STEP_SIZE, gamma=0.5 ** (STEP_SIZE / LR_HALF_LIFE)) self.optimizer_wrapper = OptimizerWrapper(self.model, self.optimizer, self.optimizer_scheduler)
def Train(): print('********************load data********************') if args.dataset == 'NIHCXR': dataloader_train = get_train_dataloader_NIH(batch_size=config['BATCH_SIZE'], shuffle=True, num_workers=8) dataloader_val = get_test_dataloader_NIH(batch_size=config['BATCH_SIZE'], shuffle=False, num_workers=8) elif args.dataset == 'VinCXR': dataloader_train = get_train_dataloader_VIN(batch_size=config['BATCH_SIZE'], shuffle=True, num_workers=8) dataloader_val = get_val_dataloader_VIN(batch_size=config['BATCH_SIZE'], shuffle=False, num_workers=8) else: print('No required dataset') return print('********************load data succeed!********************') print('********************load model********************') if args.model == 'CXRNet' and args.dataset == 'NIHCXR': N_CLASSES = len(CLASS_NAMES_NIH) model = CXRNet(num_classes=N_CLASSES, is_pre_trained=True)#initialize model CKPT_PATH = config['CKPT_PATH'] + args.model + '_' + args.dataset + '_best.pkl' if os.path.exists(CKPT_PATH): checkpoint = torch.load(CKPT_PATH) model.load_state_dict(checkpoint) #strict=False print("=> Loaded well-trained CXRNet model checkpoint of NIH-CXR dataset: "+CKPT_PATH) elif args.model == 'CXRNet' and args.dataset == 'VinCXR': N_CLASSES = len(CLASS_NAMES_Vin) model = CXRNet(num_classes=N_CLASSES, is_pre_trained=True)#initialize model CKPT_PATH = config['CKPT_PATH'] + args.model + '_' + args.dataset + '_best.pkl' if os.path.exists(CKPT_PATH): checkpoint = torch.load(CKPT_PATH) model.load_state_dict(checkpoint) #strict=False print("=> Loaded well-trained CXRNet model checkpoint of NIH-CXR dataset: "+CKPT_PATH) else: print('No required model') return #over model = nn.DataParallel(model).cuda() # make model available multi GPU cores training optimizer_model = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5) lr_scheduler_model = lr_scheduler.StepLR(optimizer_model , step_size = 10, gamma = 1) torch.backends.cudnn.benchmark = True # improve train speed slightly bce_criterion = nn.BCELoss() #define binary cross-entropy loss print('********************load model succeed!********************') print('********************begin training!********************') AUROC_best = 0.50 for epoch in range(config['MAX_EPOCHS']): since = time.time() print('Epoch {}/{}'.format(epoch+1 , config['MAX_EPOCHS'])) print('-' * 10) model.train() #set model to training mode train_loss = [] with torch.autograd.enable_grad(): for batch_idx, (image, label, _) in enumerate(dataloader_train): var_image = torch.autograd.Variable(image).cuda() var_label = torch.autograd.Variable(label).cuda() optimizer_model.zero_grad() _, var_output = model(var_image) loss_tensor = bce_criterion(var_output, var_label)#backward loss_tensor.backward() optimizer_model.step()##update parameters sys.stdout.write('\r Epoch: {} / Step: {} : train loss = {}'.format(epoch+1, batch_idx+1, float('%0.6f'%loss_tensor.item()))) sys.stdout.flush() train_loss.append(loss_tensor.item()) lr_scheduler_model.step() #about lr and gamma print("\r Eopch: %5d train loss = %.6f" % (epoch + 1, np.mean(train_loss))) model.eval()#turn to test mode val_loss = [] gt = torch.FloatTensor().cuda() pred = torch.FloatTensor().cuda() with torch.autograd.no_grad(): for batch_idx, (image, label, _) in enumerate(dataloader_val): var_image = torch.autograd.Variable(image).cuda() var_label = torch.autograd.Variable(label).cuda() _, var_output = model(var_image)#forward loss_tensor = bce_criterion(var_output, var_label)#backward sys.stdout.write('\r Epoch: {} / Step: {} : validation loss = {}'.format(epoch+1, batch_idx+1, float('%0.6f'%loss_tensor.item()))) sys.stdout.flush() val_loss.append(loss_tensor.item()) gt = torch.cat((gt, label.cuda()), 0) pred = torch.cat((pred, var_output.data), 0) AUROCs = compute_AUCs(gt, pred, N_CLASSES) AUROC_avg = np.array(AUROCs).mean() logger.info("\r Eopch: %5d validation loss = %.6f, Validataion AUROC = %.4f" % (epoch + 1, np.mean(val_loss), AUROC_avg)) if AUROC_best < AUROC_avg: AUROC_best = AUROC_avg CKPT_PATH = config['CKPT_PATH'] + args.model + '_' + args.dataset + '_best.pkl' torch.save(model.module.state_dict(), CKPT_PATH) #Saving torch.nn.DataParallel Models print(' Epoch: {} model has been already save!'.format(epoch+1)) time_elapsed = time.time() - since print('Training epoch: {} completed in {:.0f}m {:.0f}s'.format(epoch+1, time_elapsed // 60 , time_elapsed % 60))
#model name passed as arguments model_name = models[in_args.m] model = getModel(class_names, model_name) #learning rate lr = in_args.lr # Criteria NLLLoss which is recommended with Softmax final layer #(negative log loss) criteria = nn.NLLLoss() # Observe that all parameters are being optimized #call model.fc.parameters #model.classifier is not an attribute for resnet #do not change if in_args.m == "resnet": optimizer = optim.Adam(model.fc.parameters(), lr) else: optimizer = optim.Adam(model.classifier.parameters(), lr) # Decay LR by a factor of 0.1 every 4 epochs sched = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1) # Number of epochs eps = in_args.eps model_ft = train_model(model, dataloaders, criteria, optimizer, sched, dataset_sizes, eps) print("Model trained") calc_accuracy(model_ft, 'test', dataloaders) print("Accuracy")
rel_rec = np.array(encode_onehot(np.where(off_diag)[1]), dtype=np.float32) rel_send = np.array(encode_onehot(np.where(off_diag)[0]), dtype=np.float32) rel_rec = torch.FloatTensor(rel_rec) rel_send = torch.FloatTensor(rel_send) if args.encoder == 'mlp': model = MLPEncoder(args.timesteps * args.dims, args.hidden, args.edge_types, args.dropout, args.factor) elif args.encoder == 'cnn': model = CNNEncoder(args.dims, args.hidden, args.edge_types, args.dropout, args.factor) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.lr_decay, gamma=args.gamma) # Linear indices of an upper triangular mx, used for loss calculation triu_indices = get_triu_offdiag_indices(args.num_atoms) if args.cuda: model.cuda() rel_rec = rel_rec.cuda() rel_send = rel_send.cuda() triu_indices = triu_indices.cuda() rel_rec = Variable(rel_rec) rel_send = Variable(rel_send) best_model_params = model.state_dict()
def main(args): device = torch.device("cuda") r = args.r # load data train_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), ])), batch_size=100, shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), ])), batch_size=100, shuffle=True) # creat NICE model model = NICE().to(device) model.cnn.load_state_dict(torch.load('pretrained_model/model.pth')) # freeze target model for p in model.cnn.parameters(): p.requires_grad = False # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=0.001) scheduler = lrsch.StepLR(optimizer, step_size=5, gamma=0.1) # Train the model for epoch in range(10): model.train() for batch_idx, (xb, yb) in enumerate(train_loader): # Convert numpy arrays to torch tensors inputs = xb.to(device) targets = yb.to(device) # Forward pass outputs, z, loss2 = model(inputs) loss1 = F.nll_loss(outputs, targets) loss2 = torch.mean(loss2) loss = loss1 + r * loss2 # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() if (epoch + 1) % 1 == 0: print('Epoch [{}/{}], Loss: {:.4f}, Loss1: {:.4f}, Loss2: {:.4f}'. format(epoch + 1, 10, loss.item(), loss1.item(), loss2.item() * r)) model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output, _, _ = model(data) test_loss += F.nll_loss(output, target).item() # sum up batch loss pred = output.argmax( dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'. format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) if not os.path.exists('nice_model'): os.makedirs('nice_model') torch.save(model.state_dict(), 'nice_model/model_r_{}.pth'.format(r))
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) # tensorboardX writer = SummaryWriter(log_dir=osp.join(args.save_dir, 'summary')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_img_dataset( root=args.root, name=args.dataset, split_id=args.split_id, cuhk03_labeled=args.cuhk03_labeled, cuhk03_classic_split=args.cuhk03_classic_split, ) transform_train = T.Compose([ T.Random2DTranslation(args.height, args.width), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) if args.random_erasing: transform_train = T.Compose([ T.Random2DTranslation(args.height, args.width), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), RandomErasing(probability=args.probability, mean=[0.0, 0.0, 0.0]), ]) # transform_test = T.Compose([ # T.Resize((args.height, args.width)), # T.ToTensor(), # T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # ]) pin_memory = True if use_gpu else False if args.loss == 'xent,htri': trainloader = DataLoader( ImageDataset(dataset.train, transform=transform_train), sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances), batch_size=args.train_batch, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) elif args.loss == 'xent': trainloader = DataLoader( ImageDataset(dataset.train, transform=transform_train), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) # queryloader = DataLoader( # ImageDataset(dataset.query, transform=transform_test), # batch_size=args.test_batch, shuffle=False, num_workers=args.workers, # pin_memory=pin_memory, drop_last=False, # ) # galleryloader = DataLoader( # ImageDataset(dataset.gallery, transform=transform_test), # batch_size=args.test_batch, shuffle=False, num_workers=args.workers, # pin_memory=pin_memory, drop_last=False, # ) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss=args.loss) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion_xent = CrossEntropyLabelSmooth( num_classes=dataset.num_train_pids, use_gpu=use_gpu) criterion_htri = TripletLoss(margin=args.margin) optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay) if args.stepsize > 0: if not args.warmup: scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) start_epoch = args.start_epoch if args.resume: print("Loading checkpoint from '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") test(model, queryloader, galleryloader, use_gpu) return def adjust_lr(optimizer, ep): if ep < 20: lr = 1e-4 * (ep + 1) / 2 elif ep < 80: #lr = 1e-3 * len(args.gpu_devices) lr = 1e-3 elif ep < 180: #lr = 1e-4 * len(args.gpu_devices) lr = 1e-4 elif ep < 300: #lr = 1e-5 * len(args.gpu_devices) lr = 1e-5 elif ep < 320: #lr = 1e-5 * 0.1 ** ((ep - 320) / 80) * len(args.gpu_devices) lr = 1e-5 * 0.1**((ep - 320) / 80) elif ep < 400: lr = 1e-6 elif ep < 480: #lr = 1e-4 * len(args.gpu_devices) lr = 1e-4 else: #lr = 1e-5 * len(args.gpu_devices) lr = 1e-5 for p in optimizer.param_groups: p['lr'] = lr length = len(trainloader) start_time = time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 #best_rerank1 = -np.inf #best_rerankepoch = 0 print("==> Start training") for epoch in range(start_epoch, args.max_epoch): start_train_time = time.time() if args.stepsize > 0: if args.warmup: adjust_lr(optimizer, epoch + 1) else: scheduler.step() train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu=use_gpu, summary=writer, length=length) train_time += round(time.time() - start_train_time) if (epoch + 1) > args.start_eval and args.eval_step > 0 and ( epoch + 1) % args.eval_step == 0 or (epoch + 1) == args.max_epoch: # print("==> Test") # rank1 = test(epoch, model, queryloader, galleryloader, use_gpu=True, summary=writer) # is_best = rank1 > best_rank1 # if is_best: # best_rank1 = rank1 # best_epoch = epoch + 1 # ####### Best Rerank # #is_rerankbest = rerank1 > best_rerank1 # #if is_rerankbest: # # best_rerank1 = rerank1 # # best_rerankepoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) writer.close() # print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(best_rank1, best_epoch)) #print("==> Best Rerank-1 {:.1%}, achieved at epoch {}".format(best_rerank1, best_rerankepoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time))
def train(model, optimizer, data_loaders, metrics, transformers_dict, prot_desc_dict, tasks, view, n_iters=5000, is_hsearch=False, sim_data_node=None, epoch_ckpt=(2, 1.0), tb_writer=None): tb_writer = tb_writer() comp_view, prot_view = view start = time.time() best_model_wts = model.state_dict() best_score = -10000 best_epoch = -1 terminate_training = False e_avg = ExpAverage(.01) n_epochs = n_iters // len(data_loaders["train"]) scheduler = sch.StepLR(optimizer, step_size=400, gamma=0.01) criterion = torch.nn.MSELoss() # sub-nodes of sim data resource loss_lst = [] train_loss_node = DataNode(label="training_loss", data=loss_lst) metrics_dict = {} metrics_node = DataNode(label="validation_metrics", data=metrics_dict) scores_lst = [] scores_node = DataNode(label="validation_score", data=scores_lst) # add sim data nodes to parent node if sim_data_node: sim_data_node.data = [train_loss_node, metrics_node, scores_node] try: # Main training loop tb_idx = {'train': Count(), 'val': Count(), 'test': Count()} for epoch in range(n_epochs): if terminate_training: print("Terminating training...") break for phase in ["train", "val" if is_hsearch else "test"]: if phase == "train": print("Training....") # Training mode model.train() else: print("Validation...") # Evaluation mode model.eval() data_size = 0. epoch_losses = [] epoch_scores = [] # Iterate through mini-batches i = 0 with TBMeanTracker(tb_writer, 10) as tracker: for batch in tqdm(data_loaders[phase]): batch_size, data = batch_collator(batch, prot_desc_dict, spec=comp_view) # Data if prot_view in ["p2v", "rnn", "pcnn", "pcnna"]: protein_x = data[comp_view][0][2] else: # then it's psc protein_x = data[comp_view][0][1] if comp_view == "gconv": # graph data structure is: [(compound data, batch_size), protein_data] X = ((data[comp_view][0][0], batch_size), protein_x) else: X = (data[comp_view][0][0], protein_x) y = data[comp_view][1] w = data[comp_view][2] y = np.array([k for k in y], dtype=np.float) w = np.array([k for k in w], dtype=np.float) optimizer.zero_grad() # forward propagation # track history if only in train with torch.set_grad_enabled(phase == "train"): outputs = model(X) target = torch.from_numpy(y).float() weights = torch.from_numpy(w).float() if cuda: target = target.cuda() weights = weights.cuda() outputs = outputs * weights target = target * weights loss = criterion(outputs, target) if str(loss.item()) == "nan": terminate_training = True break # metrics eval_dict = {} score = SingleViewDTI.evaluate(eval_dict, y, outputs, w, metrics, tasks, transformers_dict[comp_view]) # TBoard info tracker.track("%s/loss" % phase, loss.item(), tb_idx[phase].IncAndGet()) tracker.track("%s/score" % phase, score, tb_idx[phase].i) for k in eval_dict: tracker.track('{}/{}'.format(phase, k), eval_dict[k], tb_idx[phase].i) if phase == "train": print("\tEpoch={}/{}, batch={}/{}, loss={:.4f}".format(epoch + 1, n_epochs, i + 1, len(data_loaders[phase]), loss.item())) # for epoch stats epoch_losses.append(loss.item()) # for sim data resource loss_lst.append(loss.item()) # optimization ops loss.backward() optimizer.step() else: # for epoch stats epoch_scores.append(score) # for sim data resource scores_lst.append(score) for m in eval_dict: if m in metrics_dict: metrics_dict[m].append(eval_dict[m]) else: metrics_dict[m] = [eval_dict[m]] print("\nEpoch={}/{}, batch={}/{}, " "evaluation results= {}, score={}".format(epoch + 1, n_epochs, i + 1, len(data_loaders[phase]), eval_dict, score)) i += 1 data_size += batch_size # End of mini=batch iterations. if phase == "train": ep_loss = np.nanmean(epoch_losses) e_avg.update(ep_loss) if epoch % (epoch_ckpt[0] - 1) == 0 and epoch > 0: if e_avg.value > epoch_ckpt[1]: terminate_training = True # Adjust the learning rate. scheduler.step() print("\nPhase: {}, avg task loss={:.4f}, ".format(phase, np.nanmean(epoch_losses))) else: mean_score = np.mean(epoch_scores) if best_score < mean_score: best_score = mean_score best_model_wts = copy.deepcopy(model.state_dict()) best_epoch = epoch except Exception as e: print(str(e)) duration = time.time() - start print('\nModel training duration: {:.0f}m {:.0f}s'.format(duration // 60, duration % 60)) model.load_state_dict(best_model_wts) return {'model': model, 'score': best_score, 'epoch': best_epoch}
def main(args): def train_model(model, criterion, optimizer, scheduler, num_epochs): since = time() best_model_wts = copy.deepcopy(model.state_dict()) for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) running_loss = 0.0 running_corrects = [] if args.matrix == "yes": true = [[] for _ in range(11)] pred = [[] for _ in range(11)] # true_p = 0 # true_n = 0 # false_p = 0 # false_n = 0 for j, data in enumerate(train_loader): inputs, labels = data if torch.cuda.is_available(): inputs = inputs.to(device) labels = labels.to(device) if args.matrix == "yes": true.extend(labels.cpu().numpy()) if not gpu and args.matrix == "yes": for row in labels: for l in range(11): true[l].append(row.numpy()[l]) optimizer.zero_grad() outputs = model(inputs) preds = outputs > 0.5 if args.matrix == "yes": for row in preds: for l in range(11): pred[l].append(row.float().numpy()[l]) loss = criterion( outputs.view(-1).float(), labels.view(-1).float()) loss.backward() optimizer.step() # for q in range(preds.shape[0]): # for w in range(preds[0].shape[0]): # if preds[q][w].float().item() == 1: # if labels[q][w].float().item() == 1: # true_p += 1 # elif labels[q][w].float().item() == 0: # false_p += 1 # elif preds[q][w].float().item() == 0: # if labels[q][w].float().item() == 1: # false_n += 1 # elif labels[q][w].float().item() == 0: # true_n += 1 running_loss += loss.item() running_corrects.append( torch.sum((preds.float() == labels.float()) * (labels.float() > 0)).item() / (1e-5 + (preds > 0).sum().item())) scheduler.step() epoch_loss = running_loss / len(running_corrects) epoch_acc = sum(running_corrects) / len(running_corrects) model.eval() # val_loss, val_acc = evaluate(model, valid_loader) model.train() plot_train_acc.append(epoch_acc) # plot_valid_acc.append(val_acc) plot_train_loss.append(epoch_loss) # plot_valid_loss.append(val_loss) nRec.append(epoch) # precision = true_p / (true_p + false_p) # recall = true_p / (true_p + false_n) print('Train Loss: {:.4f} Train Acc: {:.4f}'.format( epoch_loss, epoch_acc)) # print('Train Loss: {:.4f} Train Acc: {:.4f} Val Loss: {:.4f} Val Acc: {:.4f}'.format(epoch_loss, epoch_acc, val_loss, val_acc)) # print('TP: %d TN: %d FP: %d FN: %d' % (true_p,true_n,false_p,false_n)) # print('Precision: {:.4f} Recall {:.4f}'.format(precision, recall)) print() test_loss, test_acc = evaluate(model, test_loader) print('test Loss: {:.4f} test Acc: {:.4f}'.format(test_loss, test_acc)) time_elapsed = time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) # print('Best val Acc: {:4f}'.format(max(plot_valid_acc))) model.load_state_dict(best_model_wts) return model data = pd.read_pickle('%s.pkl' % args.pkl_file) data = pd.read_pickle("string_test.pkl") str_data = pd.read_pickle("string_test_pt2.pkl") labels = data["instruments"].values music_data = data["normalized"].values str_labels = str_data["instruments"].values str_music_data = str_data["normalized"].values music_data = np.stack(music_data).reshape(-1, 128 * 65) #65*128, 1025 * 65 str_music_data = np.stack(str_music_data).reshape(-1, 128 * 65) #65*128, 1025 * 65 train_data, valid_data, train_labels, valid_labels = train_test_split( music_data, labels, test_size=0.1, random_state=1) # train_data, valid_data, train_labels, valid_labels = train_data[0:100], valid_data[0:100], train_labels[0:100], valid_labels[0:100] train_set = MusicDataset(train_data, train_labels) valid_set = MusicDataset(valid_data, valid_labels) test_set = MusicDataset(str_music_data, str_labels) train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True) valid_loader = DataLoader(valid_set, batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True) model_ft = MultiInstrumClass(128 * 65, 11, args.emb_dim, args.hidden_dim, args.model) # model_ft = MultiLP(128*64) if torch.cuda.is_available(): model.cuda() plot_train_acc, plot_valid_acc, plot_train_loss, plot_valid_loss, nRec = [], [], [], [], [] optimizer_ft = torch.optim.Adam(model_ft.parameters(), lr=args.lr, weight_decay=.04) exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, args.epochs) fig = plt.figure() ax = plt.subplot(1, 2, 1) plt.plot(nRec, plot_train_acc, label='Training') plt.plot(nRec, plot_valid_acc, label='Validation') plt.title('Accuracy vs. Epoch') plt.xlabel("Epoch") plt.ylabel("Accuracy") ax.legend() bx = plt.subplot(1, 2, 2) bx.plot(nRec, plot_train_loss, label='Training') bx.plot(nRec, plot_valid_loss, label='Validation') plt.title('Loss vs. Epoch') plt.xlabel("Epoch") plt.ylabel("Loss") bx.legend() plt.show() plt.savefig("%s.png" % args.model) plt.clf()
def main(): args.cuda = True # 1 choose the data you want to use # using_data = {'my_sp': False, # 'my_cm': False, # 'template_casia_casia': False, # 'template_coco_casia': False, # 'cod10k': True, # 'casia': False, # 'coverage': False, # 'columb': False, # 'negative_coco': False, # 'negative_casia': False, # 'texture_sp': False, # 'texture_cm': False, # } using_data = { 'my_sp': True, 'my_cm': True, 'template_casia_casia': True, 'template_coco_casia': True, 'cod10k': True, 'casia': False, 'coverage': False, 'columb': False, 'negative_coco': True, 'negative_casia': False, 'texture_sp': True, 'texture_cm': True, } using_data_test = { 'my_sp': False, 'my_cm': False, 'template_casia_casia': False, 'template_coco_casia': False, 'cod10k': False, 'casia': False, 'coverage': True, 'columb': False, 'negative_coco': False, 'negative_casia': False, } # 2 define 3 types trainData = TamperDataset(stage_type='stage2', using_data=using_data, train_val_test_mode='train') valData = TamperDataset(stage_type='stage2', using_data=using_data, train_val_test_mode='val') testData = TamperDataset(stage_type='stage2', using_data=using_data_test, train_val_test_mode='test') # 3 specific dataloader trainDataLoader = torch.utils.data.DataLoader(trainData, batch_size=args.batch_size, num_workers=3, shuffle=True, pin_memory=False) valDataLoader = torch.utils.data.DataLoader(valData, batch_size=args.batch_size, num_workers=3) testDataLoader = torch.utils.data.DataLoader(testData, batch_size=args.batch_size, num_workers=0) # model model1 = Net1() model2 = Net2() if torch.cuda.is_available(): model1.cuda() model2.cuda() else: model1.cpu() model2.cpu() # 模型初始化 # 如果没有这一步会根据正态分布自动初始化 model1.apply(weights_init) model2.apply(weights_init) # 模型可持续化 # optimizer1 = optim.Adam(model1.parameters(), lr=1e-5, betas=(0.9, 0.999), eps=1e-8) optimizer2 = optim.Adam(model2.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-8) if args.resume[0]: if isfile(args.resume[0]): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint1 = torch.load(args.resume[0]) # checkpoint2 = torch.load(args.resume[1]) model1.load_state_dict(checkpoint1['state_dict']) # optimizer1.load_state_dict(checkpoint1['optimizer']) ################################################ # model2.load_state_dict(checkpoint2['state_dict']) # optimizer2.load_state_dict(checkpoint2['optimizer']) print("=> loaded checkpoint '{}'".format(args.resume)) else: print("=> Error!!!! checkpoint found at '{}'".format(args.resume)) else: print("=> no checkpoint found at '{}'".format(args.resume)) # 调整学习率 # scheduler1 = lr_scheduler.StepLR(optimizer1, step_size=args.stepsize, gamma=args.gamma) scheduler2 = lr_scheduler.StepLR(optimizer2, step_size=args.stepsize, gamma=args.gamma) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=3, verbose=True) # 数据迭代器 for epoch in range(args.start_epoch, args.maxepoch): train_avg = train(model1=model1, model2=model2, optimizer2=optimizer2, dataParser=trainDataLoader, epoch=epoch) val_avg = val(model1=model1, model2=model2, dataParser=valDataLoader, epoch=epoch) test_avg = test(model1=model1, model2=model2, dataParser=testDataLoader, epoch=epoch) """""" """""" """""" """""" """""" " 写入图 " """""" """""" """""" """""" """""" try: writer.add_scalars('tr/val/test_avg_loss_per_epoch', { 'train': train_avg['loss_avg'], 'val': val_avg['loss_avg'], 'test': test_avg['loss_avg'] }, global_step=epoch) writer.add_scalars('tr/val/test_avg_f1_per_epoch', { 'train': train_avg['f1_avg_stage2'], 'val': val_avg['f1_avg_stage2'], 'test': test_avg['f1_avg_stage2'] }, global_step=epoch) writer.add_scalars('tr/val/test_avg_precision_per_epoch', { 'train': train_avg['precision_avg_stage2'], 'val': val_avg['precision_avg_stage2'], 'test': test_avg['precision_avg_stage2'] }, global_step=epoch) writer.add_scalars('tr/val/test_avg_acc_per_epoch', { 'train': train_avg['accuracy_avg_stage2'], 'val': val_avg['accuracy_avg_stage2'], 'test': test_avg['accuracy_avg_stage2'] }, global_step=epoch) writer.add_scalars('tr/val/test_avg_recall_per_epoch', { 'train': train_avg['recall_avg_stage2'], 'val': val_avg['recall_avg_stage2'], 'test': test_avg['recall_avg_stage2'] }, global_step=epoch) writer.add_scalar('lr_per_epoch_stage2', scheduler2.get_lr(), global_step=epoch) except Exception as e: print(e) """""" """""" """""" """""" """""" " 写入图 " """""" """""" """""" """""" """""" output_name = output_name_file_name % \ (epoch, val_avg['loss_avg'], val_avg['f1_avg_stage2'], val_avg['precision_avg_stage2'], val_avg['accuracy_avg_stage2'], val_avg['recall_avg_stage2']) try: # send_msn(epoch, f1=val_avg['f1_avg']) email_output_train = 'The train epoch:%d,f1:%f,loss:%f,precision:%f,accuracy:%f,recall:%f' % \ (epoch, train_avg['loss_avg'], train_avg['f1_avg'], train_avg['precision_avg'], train_avg['accuracy_avg'], train_avg['recall_avg']) email_output_val = 'The val epoch:%d,f1:%f,loss:%f,precision:%f,accuracy:%f,recall:%f' % \ (epoch, val_avg['loss_avg'], val_avg['f1_avg'], val_avg['precision_avg'], val_avg['accuracy_avg'], val_avg['recall_avg']) email_output_test = 'The test epoch:%d,f1:%f,loss:%f,precision:%f,accuracy:%f,recall:%f' % \ (epoch, test_avg['loss_avg'], test_avg['f1_avg'], test_avg['precision_avg'], test_avg['accuracy_avg'], test_avg['recall_avg']) email_output = email_output_train + '\n' + email_output_val + '\n' + email_output_test + '\n\n\n' email_list.append(email_output) send_email(str(email_header), context=str(email_list)) except: pass if epoch % 1 == 0: # save_model_name_stage1 = os.path.join(args.model_save_dir, 'stage1' + output_name) save_model_name_stage2 = os.path.join(args.model_save_dir, 'stage2' + output_name) # torch.save({'epoch': epoch, 'state_dict': model1.state_dict(), 'optimizer': optimizer1.state_dict()}, # save_model_name_stage1) torch.save( { 'epoch': epoch, 'state_dict': model2.state_dict(), 'optimizer': optimizer2.state_dict() }, save_model_name_stage2) # scheduler1.step(epoch=epoch) scheduler2.step(epoch=epoch) print('训练已完成!')
def main(args): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') #add logging print(args) log_path, model_path = do_logging(args.logs_base_path, args.models_base_path) #original transforms: #data_transforms = { # 'train': transforms.Compose([ # # transforms.RandomResizedCrop(224), # # transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # ]), # 'val': transforms.Compose([ # # transforms.Resize(256), # # transforms.CenterCrop(224), # transforms.ToTensor(), # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # ]), #} #load data data_transforms = { 'train': transforms.Compose([ transforms.ToPILImage( ), #might have to leave this out when using cifar10 transforms.Resize(224), transforms.CenterCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.ToPILImage( ), #might have to leave this out when using cifar10 transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } datasets_path = os.path.join(os.path.expanduser(args.root_path), "datasets") dataset_depth = { 'train': args.train_dataset_depth, 'val': args.val_dataset_depth } # if I use my own datasets, they are always .pid files, so i want to keep to keep these in a separate folder from the jpeg files loaded from torchvision. if args.torchvision_dataset: datasets_path = os.path.join(os.path.expanduser(args.root_path), "jpg_datasets") dataloaders, dataset_sizes, num_classes = None, None, None dataset_names_csvs = [{ 'name': 'omniglot_1_folder_splits', 'csv_train': '/nfs/home4/mhouben/facenet_pytorch/datasets/omniglot_train_1_folder.csv', 'csv_val': '/nfs/home4/mhouben/facenet_pytorch/datasets/omniglot_val_1_folder.csv', 'best_models_path': '' }, { 'name': 'vggface2', 'csv_train': '/nfs/home4/mhouben/facenet_pytorch/datasets/train_vggface2.csv', 'csv_val': '/nfs/home4/mhouben/facenet_pytorch/datasets/test_vggface2.csv', 'best_models_path': '' }, { 'name': 'CASIA_aligned', 'csv_train': '/nfs/home4/mhouben/facenet_pytorch/datasets/CASIA_train.csv', 'csv_val': '/nfs/home4/mhouben/facenet_pytorch/datasets/CASIA_test.csv', 'best_models_path': '' }, { 'name': 'inat_reptiles', 'csv_train': '/nfs/home4/mhouben/facenet_pytorch/datasets/inaturalist2019_alphabet_csvs/train/Reptiles.csv', 'csv_val': '/nfs/home4/mhouben/facenet_pytorch/datasets/inaturalist2019_alphabet_csvs/val/Reptiles.csv', 'best_models_path': '' }, { 'name': 'inat_amphibians', 'csv_train': '/nfs/home4/mhouben/facenet_pytorch/datasets/inaturalist2019_alphabet_csvs/train/Amphibians.csv', 'csv_val': '/nfs/home4/mhouben/facenet_pytorch/datasets/inaturalist2019_alphabet_csvs/val/Amphibians.csv', 'best_models_path': '' }] #if args.pure_validation_all_models_all_datasets: # for pretraining_set in dataset_names_csvs: # model = load_model(pretraining_set['best_model_path']) # for validation_set in dataset_names_csvs: # pure_validation(model, validation_set['csv_val']) if args.dataset_name == 'omniglot_1_folder_splits': dataset_path = os.path.join(os.path.expanduser(datasets_path), args.dataset_name) dataloaders, dataset_sizes, num_classes = load_own_data( dataset_path, args.train_csv_path, args.val_csv_path, args.image_count, args.train_format, args.valid_format, args.train_dataset_depth, args.val_dataset_depth, data_transforms, args.batch_size) elif args.dataset_name == 'vggface2': dataset_path = os.path.join(os.path.expanduser(datasets_path), args.dataset_name) dataloaders, dataset_sizes, num_classes = load_own_data( dataset_path, args.train_csv_path, args.val_csv_path, args.image_count, args.train_format, args.valid_format, args.train_dataset_depth, args.val_dataset_depth, data_transforms, args.batch_size) elif args.dataset_name == 'CASIA_aligned': dataset_path = os.path.join(os.path.expanduser(datasets_path), args.dataset_name) dataloaders, dataset_sizes, num_classes = load_own_data( dataset_path, args.train_csv_path, args.val_csv_path, args.image_count, args.train_format, args.valid_format, args.train_dataset_depth, args.val_dataset_depth, data_transforms, args.batch_size) elif args.dataset_name == 'inat_reptiles': dataset_path = os.path.join(os.path.expanduser(datasets_path), args.dataset_name) dataloaders, dataset_sizes, num_classes = load_own_data( dataset_path, args.train_csv_path, args.val_csv_path, args.image_count, args.train_format, args.valid_format, args.train_dataset_depth, args.val_dataset_depth, data_transforms, args.batch_size) elif args.dataset_name == 'inat_amphibians': dataset_path = os.path.join(os.path.expanduser(datasets_path), args.dataset_name) dataloaders, dataset_sizes, num_classes = load_own_data( dataset_path, args.train_csv_path, args.val_csv_path, args.image_count, args.train_format, args.valid_format, args.train_dataset_depth, args.val_dataset_depth, data_transforms, args.batch_size) elif args.torchvision_dataset: dataset_path = os.path.join(os.path.expanduser(datasets_path), args.dataset_name) dataloaders, dataset_sizes, num_classes = load_torchvision_data( args.dataset_name, dataset_path, data_transforms, dataset_depth, args.batch_size) else: raise Exception("This dataset is not known.") print("num_classes: ", num_classes) #load model(s) if args.run_all_models: MODEL_NAMES = [ #'squeezenetv10', #'squeezenetv11', '5_layer_net', 'resnet34', 'vgg16', 'googlenet', 'alexnet' ] #model_parameters = { # 'model_name': 'squeezenetv10', # 'learning_rate': 0.001, #perhaps some scheduler here that works well for the given network # 'criterion': nn.CrossEntropyLoss() #} for model_name in MODEL_NAMES: model = load_tv_model(model_name, num_classes, args.pretrained_imagenet) # print("resnet34 model: ", model) # model = vgg16(num_classes) model = model.to(device) print("model: ", model) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.num_epochs / 4, gamma=0.1) try: model_ft = train_model(device, model, model_name, criterion, optimizer, scheduler, args.dataset_name, dataloaders, dataset_sizes, log_path, model_path, num_epochs=args.num_epochs) except: print("traceback: ", traceback.format_exc()) print("something went wrong with model ", model_name) else: model = load_tv_model(args.model_name, num_classes, args.pretrained_imagenet) # print("resnet34 model: ", model) # model = vgg16(num_classes) model = model.to(device) print("model: ", model) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.num_epochs / 4, gamma=0.1) model_ft = train_model(device, model, model_name, criterion, optimizer, scheduler, args.dataset_name, dataloaders, dataset_sizes, log_path, model_path, num_epochs=args.num_epochs)
def train(args): """Trains model for args.nepochs (default = 30)""" t_start = time.time() train_data = coco_loader(args.coco_root, split='train', ncap_per_img=args.ncap_per_img) print('[DEBUG] Loading train data ... %f secs' % (time.time() - t_start)) train_data_loader = DataLoader(dataset=train_data, num_workers=args.nthreads,\ batch_size=args.batchsize, shuffle=True, drop_last=True) lang_model = Seq2Seq(train_data.numwords) lang_model = lang_model.cuda() lang_model.load_state_dict( torch.load('log_model/bestmodel.pth')['lang_state_dict']) lang_model.train() #Load pre-trained imgcnn model_imgcnn = Vgg16Feats() model_imgcnn.cuda() model_imgcnn.train(True) model_imgcnn.load_state_dict( torch.load('log_reg/bestmodel.pth')['img_state_dict']) #Convcap model model_convcap = convcap(train_data.numwords, args.num_layers, is_attention=args.attention) model_convcap.cuda() model_convcap.load_state_dict( torch.load('log_reg/bestmodel.pth')['state_dict']) model_convcap.train(True) optimizer = optim.RMSprop(model_convcap.parameters(), lr=args.learning_rate) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=.1) img_optimizer = None batchsize = args.batchsize ncap_per_img = args.ncap_per_img batchsize_cap = batchsize * ncap_per_img max_tokens = train_data.max_tokens nbatches = np.int_(np.floor((len(train_data.ids) * 1.) / batchsize)) bestscore = .0 for epoch in range(args.epochs): loss_train = 0. if (epoch == args.finetune_after): img_optimizer = optim.RMSprop(model_imgcnn.parameters(), lr=1e-5) img_scheduler = lr_scheduler.StepLR(img_optimizer, step_size=args.lr_step_size, gamma=.1) scheduler.step() if (img_optimizer): img_scheduler.step() it = 0 #One epoch of train for batch_idx, (imgs, captions, wordclass, mask, _) in \ tqdm(enumerate(train_data_loader), total=nbatches): it = it + 1 imgs = imgs.view(batchsize, 3, 224, 224) wordclass = wordclass.view(batchsize_cap, max_tokens).cuda() mask = mask.view(batchsize_cap, max_tokens) captions = utils.decode_sequence(train_data.wordlist, wordclass, None) captions_all = [] for index, caption in enumerate(captions): captions_all.append(caption) imgs_v = Variable(imgs).cuda() wordclass_v = Variable(wordclass).cuda() optimizer.zero_grad() if (img_optimizer): img_optimizer.zero_grad() imgsfeats, imgsfc7 = model_imgcnn(imgs_v) imgsfeats, imgsfc7 = repeat_img_per_cap(imgsfeats, imgsfc7, ncap_per_img) _, _, feat_h, feat_w = imgsfeats.size() if (args.attention == True): wordact, attn = model_convcap(imgsfeats, imgsfc7, wordclass_v) attn = attn.view(batchsize_cap, max_tokens, feat_h, feat_w) else: wordact, _ = model_convcap(imgsfeats, imgsfc7, wordclass_v) wordact = wordact[:, :, :-1] wordclass_v = wordclass_v[:, 1:] mask = mask[:, 1:].contiguous() wordact_t = wordact.permute(0, 2, 1).contiguous().view(\ batchsize_cap*(max_tokens-1), -1) wordclass_t = wordclass_v.contiguous().view(\ batchsize_cap*(max_tokens-1), 1) maskids = torch.nonzero(mask.view(-1)).numpy().reshape(-1) if (args.attention == True): #Cross-entropy loss and attention loss of Show, Attend and Tell loss_xe = F.cross_entropy(wordact_t[maskids, ...], \ wordclass_t[maskids, ...].contiguous().view(maskids.shape[0])) \ + (torch.sum(torch.pow(1. - torch.sum(attn, 1), 2)))\ /(batchsize_cap*feat_h*feat_w) else: loss_xe = F.cross_entropy(wordact_t[maskids, ...], \ wordclass_t[maskids, ...].contiguous().view(maskids.shape[0])) wordact = lang_model(wordclass_v.transpose(1, 0), wordclass_v.transpose(1, 0), imgs) wordact = wordact.transpose(1, 0)[:, :-1, :] wordclass_v = wordclass_v[:, 1:] wordact_t = wordact.contiguous().view(\ batchsize_cap*wordact.size(1), -1) wordclass_t = wordclass_v.contiguous().view(\ batchsize_cap*wordclass_v.size(1), 1) loss_xe_lang = F.cross_entropy(wordact_t[...], \ wordclass_t[...].contiguous().view(-1)) with torch.no_grad(): outcap, sampled_ids, sample_logprobs, x_all_langauge, outputs = lang_model.sample( wordclass.transpose(1, 0), wordclass.transpose(1, 0), imgsfeats.transpose(1, 0), train_data.wordlist) logprobs_input, _ = model_convcap(imgsfeats, imgsfc7, sampled_ids.long().cuda()) log_probs = F.log_softmax( logprobs_input.transpose(2, 1)[:, :-1, :], -1) sample_logprobs_true = log_probs.gather( 2, sampled_ids[:, 1:].cuda().long().unsqueeze(2)) with torch.no_grad(): reward = get_self_critical_reward(batchsize_cap, lang_model, wordclass.transpose(1, 0), imgsfeats.transpose(1, 0), outcap, captions_all, train_data.wordlist, 16) loss_rl1 = rl_crit( torch.exp(sample_logprobs_true.squeeze()) / torch.exp(sample_logprobs[:, 1:]).cuda().detach(), sampled_ids[:, 1:].cpu(), torch.from_numpy(reward).float().cuda()) #loss_rl2 = rl_crit(sample_logprobs[:,1:].cuda(), sampled_ids[:, 1:].cpu(), torch.from_numpy(reward).float().cuda()) loss = 0.0 * loss_xe + loss_rl1 # + loss_xe_lang + loss_rl2 if it % 500 == 0: modelfn = osp.join(args.model_dir, 'model.pth') scores = test(args, 'val', model_convcap=model_convcap, model_imgcnn=model_imgcnn) score = scores[0][args.score_select] if (score > bestscore): bestscore = score print('[DEBUG] Saving model at epoch %d with %s score of %f'\ % (epoch, args.score_select, score)) bestmodelfn = osp.join(args.model_dir, 'bestmodel.pth') os.system('cp %s %s' % (modelfn, bestmodelfn)) torch.save( { 'epoch': epoch, 'state_dict': model_convcap.state_dict(), 'img_state_dict': model_imgcnn.state_dict(), 'optimizer': optimizer.state_dict(), 'lang_state_dict': lang_model.state_dict() }, modelfn) loss_train = loss_train + loss loss.backward() optimizer.step() if (img_optimizer): img_optimizer.step() loss_train = (loss_train * 1.) / (batch_idx) print('[DEBUG] Training epoch %d has loss %f' % (epoch, loss_train)) modelfn = osp.join(args.model_dir, 'model.pth') if (img_optimizer): img_optimizer_dict = img_optimizer.state_dict() else: img_optimizer_dict = None torch.save( { 'epoch': epoch, 'state_dict': model_convcap.state_dict(), 'img_state_dict': model_imgcnn.state_dict(), 'optimizer': optimizer.state_dict(), 'lang_state_dict': lang_model.state_dict() }, modelfn) #Run on validation and obtain score scores = test(args, 'val', model_convcap=model_convcap, model_imgcnn=model_imgcnn) score = scores[0][args.score_select] if (score > bestscore): bestscore = score print('[DEBUG] Saving model at epoch %d with %s score of %f'\ % (epoch, args.score_select, score)) bestmodelfn = osp.join(args.model_dir, 'bestmodel.pth') os.system('cp %s %s' % (modelfn, bestmodelfn))
num_classes = 100 trainset = Dataset(train_dir, dirname_to_classname_path, num_classes) testset = Dataset(val_dir, dirname_to_classname_path, num_classes) train_dataloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size_train, shuffle=True, num_workers=8) test_dataloader = torch.utils.data.DataLoader(testset, batch_size=batch_size_train, shuffle=False, num_workers=8) net = GoogLeNet(num_classes, mode='train').cuda() net.init_weights('KAMING') if pretrained_weights != None: net_pretrain = torch.load(pretrained_weights) net.load_state_dict(net_pretrain) criterion = nn.CrossEntropyLoss().cuda() optimizer= optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum, weight_decay=0.0001) scheduler = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.5) # original = 0.96 train_loss_list = list() train_accuracy_list = list() test_loss_list = list() test_accuracy_list = list() for epoch in range(num_epoch): time_s = time.time() print('Epoch : ', epoch + 1, optimizer) net.train() for batch_idx, (img, y_GT) in enumerate(train_dataloader): img = img.permute(0, 3, 1, 2).float()
def main(): # Configurations args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu n_class = 1 batch_size = args.batchsize epochs = 10 lr = 1e-4 momentum = 0 w_decay = 1e-5 step_size = 50 gamma = 0.5 configs = "FCNs-MSE_batch{}_epoch{}_RMSprop_scheduler-step{}-gamma{}_lr{}_momentum{}_w_decay{}".format( batch_size, epochs, step_size, gamma, lr, momentum, w_decay) print("Configs:", configs) # Create dir for model output_dir = os.path.join(args.exp_dir, args.exp_name) if not os.path.exists(output_dir): os.makedirs(output_dir) if not os.path.exists(os.path.join(output_dir, 'checkpoints')): os.makedirs(os.path.join(output_dir, 'checkpoints')) use_gpu = torch.cuda.is_available() num_gpu = list(range(torch.cuda.device_count())) # Traning and validation loaders train_data = DHF1KDualDataset('../dataset/DHF1K/train/data_fix', '../dataset/DHF1K/train/flows_fix', '../dataset/DHF1K/train/target_fix', 640, 360) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=1) val_data = DHF1KDualDataset('../dataset/DHF1K/val/data', '../dataset/DHF1K/val/flows_fix', '../dataset/DHF1K/val/target_fix', 640, 360, small_part=10) val_loader = DataLoader(val_data, batch_size=16, num_workers=1) test_data = DHF1KDualDataset('../dataset/DHF1K/test/data', '../dataset/DHF1K/test/flows_fix', '../dataset/DHF1K/test/target', 640, 360) test_loader = DataLoader(test_data, batch_size=16, num_workers=4) fcn_model = SaliencyDualFCN(n_class=n_class) if use_gpu: ts = time.time() fcn_model = fcn_model.cuda() fcn_model = nn.DataParallel(fcn_model, device_ids=num_gpu) print("Finish cuda loading, time elapsed {}".format(time.time() - ts)) # Optimizer and Loss Function criterion = nn.MSELoss() optimizer = optim.RMSprop(fcn_model.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay) scheduler = lr_scheduler.StepLR( optimizer, step_size=step_size, gamma=gamma) # decay LR by a factor of 0.5 every 30 epochs train(args, train_loader, val_loader, test_loader, fcn_model, scheduler, optimizer, output_dir, use_gpu, epochs, criterion)
def Train(Model, args): writer = SummaryWriter() beta1_Adam = args.beta1 beta2_Adam = args.beta2 if args.cuda: Model.cuda() #optimizer = optim.Adam(Model.parameters(), lr=args.lr, betas=(beta1_Adam, beta2_Adam)) optimizer = optim.SGD(Model.parameters(), lr=args.lr) if args.resume: checkpoint = torch.load(args.resume) optimizer.load_state_dict(checkpoint['optimizer']) Model.train() steps = 0 #loss_criterion_Angular = AngleLoss().cuda() CUDNN.benchmark = True if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) if args.dynamic_lr == True: scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3000, verbose=False, threshold=0.00001, threshold_mode='rel', cooldown=2000, min_lr=0, eps=1e-08) for epoch in range(args.start_epoch, args.epochs+1): #if epoch==3: #optimizer = optim.SGD(Model.parameters(), lr=args.lr) # Every args.lr_step, changes learning rate by multipling args.lr_decay #adjust_learning_rate(optimizer, epoch, args) # Load augmented data #transformed_dataset = FaceIdPoseDataset(args.train_csv_file, args.data_place, #transform = transforms.Compose([Resize((256,256)), RandomCrop((224,224))])) #for ResNet256x256->224x224 for VGG110x110->96x96 # transformed_dataset = FaceIdPoseDataset(args.train_csv_file, args.data_place, # transforms.Compose([transforms.Resize(256), transforms.RandomCrop(224),transforms.ToTensor()])) # for ResNet256x256->224x224 for VGG110x110->96x96 transformed_dataset = FaceIdPoseDataset(args.train_csv_file, args.data_place,transforms.Compose([transforms.Resize(256), transforms.RandomCrop(224), transforms.ToTensor() ])) # for ResNet256x256->224x224 for VGG110x110->96x96 dataloader = DataLoader(transformed_dataset, batch_size=args.Train_Batch, shuffle=True, num_workers=8) if args.stepsize > 0: scheduler.step() for i, batch_data in enumerate(dataloader): # backward() function accumulates gradients, however we don't want to mix up gradients between minibatches optimizer.zero_grad() batch_image = torch.FloatTensor(batch_data[0].float()) batch_id_label = batch_data[2] if args.cuda: batch_image, batch_id_label = batch_image.cuda(), batch_id_label.cuda() batch_image, batch_id_label = Variable(batch_image), Variable(batch_id_label) steps += 1 Prediction = Model(batch_image) Loss = Model.ID_Loss(Prediction, batch_id_label) #Loss = loss_criterion_Angular(Prediction, batch_id_label) Loss.backward() optimizer.step() if args.dynamic_lr == True: scheduler.step(Loss) log_learning(epoch, steps, 'ResNet50_Model', args.lr, Loss.item(), args) writer.add_scalar('Train/Train_Loss', Loss, steps) writer.add_scalar('Train/Model_Lr', optimizer.param_groups[0]['lr'], epoch) # Validation_Process(Model, epoch, writer, args) Validation_Process(Model, epoch, writer, args) if epoch % args.save_freq == 0: if not os.path.isdir(args.snapshot_dir): os.makedirs(args.snapshot_dir) save_path = os.path.join(args.snapshot_dir, 'epoch{}.pt'.format(epoch)) torch.save(Model.state_dict(), save_path) save_checkpoint({ 'epoch': epoch + 1, 'Model': Model.state_dict(), 'optimizer': optimizer.state_dict(), }, save_dir=os.path.join(args.snapshot_dir, 'epoch{}'.format(epoch))) # export scalar data to JSON for external processing writer.export_scalars_to_json("./all_scalars.json") writer.close()
def experience_mnist(config, path, param): print("START MNIST") use_cuda = config.general.use_cuda and torch.cuda.is_available() torch.manual_seed(config.general.seed) device = torch.device("cuda" if use_cuda else "cpu") print("START TRAINING TARGET MODEL") data_train_target = custum_MNIST(True, 0, config, '../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) data_test_target = custum_MNIST(True, 0, config, '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) criterion = nn.CrossEntropyLoss() train_loader_target = torch.utils.data.DataLoader( data_train_target, batch_size=config.learning.batch_size, shuffle=True) test_loader_target = torch.utils.data.DataLoader( data_test_target, batch_size=config.learning.batch_size, shuffle=True) dataloaders_target = { "train": train_loader_target, "val": test_loader_target } dataset_sizes_target = { "train": len(data_train_target), "val": len(data_test_target) } print("TAILLE dataset", dataset_sizes_target) model_target = Net_mnist().to(device) optimizer = optim.SGD(model_target.parameters(), lr=config.learning.learning_rate, momentum=config.learning.momentum) exp_lr_scheduler = lr_scheduler.StepLR( optimizer, step_size=config.learning.decrease_lr_factor, gamma=config.learning.decrease_lr_every) model_target, best_acc_target, data_test_set, label_test_set, class_test_set = train_model( model_target, criterion, optimizer, exp_lr_scheduler, dataloaders_target, dataset_sizes_target, num_epochs=config.learning.epochs) np.save(path + "/res_train_target_" + str(param) + ".npy", best_acc_target) print("START TRAINING SHADOW MODEL") all_shadow_models = [] all_dataloaders_shadow = [] data_train_set = [] label_train_set = [] class_train_set = [] for num_model_sahdow in range(config.general.number_shadow_model): criterion = nn.CrossEntropyLoss() data_train_shadow = custum_MNIST(False, num_model_sahdow, config, '../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) data_test_shadow = custum_MNIST(False, num_model_sahdow, config, '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) train_loader_shadow = torch.utils.data.DataLoader( data_train_shadow, batch_size=config.learning.batch_size, shuffle=True) test_loader_shadow = torch.utils.data.DataLoader( data_test_shadow, batch_size=config.learning.batch_size, shuffle=True) dataloaders_shadow = { "train": train_loader_shadow, "val": test_loader_shadow } dataset_sizes_shadow = { "train": len(data_train_shadow), "val": len(data_test_shadow) } print("TAILLE dataset", dataset_sizes_shadow) model_shadow = Net_mnist().to(device) optimizer = optim.SGD(model_shadow.parameters(), lr=config.learning.learning_rate, momentum=config.learning.momentum) exp_lr_scheduler = lr_scheduler.StepLR( optimizer, step_size=config.learning.decrease_lr_factor, gamma=config.learning.decrease_lr_every) model_shadow, best_acc_sh, data_train_set_unit, label_train_set_unit, class_train_set_unit = train_model( model_shadow, criterion, optimizer, exp_lr_scheduler, dataloaders_target, dataset_sizes_target, num_epochs=config.learning.epochs) data_train_set.append(data_train_set_unit) label_train_set.append(label_train_set_unit) class_train_set.append(class_train_set_unit) np.save( path + "/res_train_shadow_" + str(num_model_sahdow) + "_" + str(param) + ".npy", best_acc_sh) all_shadow_models.append(model_shadow) all_dataloaders_shadow.append(dataloaders_shadow) print("START GETTING DATASET ATTACK MODEL") data_train_set = np.concatenate(data_train_set) label_train_set = np.concatenate(label_train_set) class_train_set = np.concatenate(class_train_set) #data_test_set, label_test_set, class_test_set = get_data_for_final_eval([model_target], [dataloaders_target], device) #data_train_set, label_train_set, class_train_set = get_data_for_final_eval(all_shadow_models, all_dataloaders_shadow, device) data_train_set, label_train_set, class_train_set = shuffle( data_train_set, label_train_set, class_train_set, random_state=config.general.seed) data_test_set, label_test_set, class_test_set = shuffle( data_test_set, label_test_set, class_test_set, random_state=config.general.seed) print("Taille dataset train", len(label_train_set)) print("Taille dataset test", len(label_test_set)) print("START FITTING ATTACK MODEL") model = lgb.LGBMClassifier(objective='binary', reg_lambda=config.learning.ml.reg_lambd, n_estimators=config.learning.ml.n_estimators) model.fit(data_train_set, label_train_set) y_pred_lgbm = model.predict(data_test_set) precision_general, recall_general, _, _ = precision_recall_fscore_support( y_pred=y_pred_lgbm, y_true=label_test_set, average="macro") accuracy_general = accuracy_score(y_true=label_test_set, y_pred=y_pred_lgbm) precision_per_class, recall_per_class, accuracy_per_class = [], [], [] for idx_class, classe in enumerate(data_train_target.classes): all_index_class = np.where(class_test_set == idx_class) precision, recall, _, _ = precision_recall_fscore_support( y_pred=y_pred_lgbm[all_index_class], y_true=label_test_set[all_index_class], average="macro") accuracy = accuracy_score(y_true=label_test_set[all_index_class], y_pred=y_pred_lgbm[all_index_class]) precision_per_class.append(precision) recall_per_class.append(recall) accuracy_per_class.append(accuracy) print("END MNIST") return (precision_general, recall_general, accuracy_general, precision_per_class, recall_per_class, accuracy_per_class)
def train_net(net, train_dataset, valid_dataset, use_gpu, config): epoch = config.getint("train", "epoch") learning_rate = config.getfloat("train", "learning_rate") task_loss_type = config.get("train", "type_of_loss") output_time = config.getint("output", "output_time") test_time = config.getint("output", "test_time") model_path = os.path.join(config.get("output", "model_path"), config.get("output", "model_name")) try: trained_epoch = config.get("train", "pre_train") trained_epoch = int(trained_epoch) except Exception as e: trained_epoch = 0 os.makedirs(os.path.join(config.get("output", "tensorboard_path")), exist_ok=True) if trained_epoch == 0: shutil.rmtree( os.path.join(config.get("output", "tensorboard_path"), config.get("output", "model_name")), True) # writer = SummaryWriter( # os.path.join(config.get("output", "tensorboard_path"), config.get("output", "model_name")), # config.get("output", "model_name")) writer = None criterion = get_loss(task_loss_type) optimizer_type = config.get("train", "optimizer") if optimizer_type == "adam": optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=config.getfloat( "train", "weight_decay")) elif optimizer_type == "sgd": optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=config.getfloat("train", "momentum"), weight_decay=config.getfloat( "train", "weight_decay")) else: raise NotImplementedError step_size = config.getint("train", "step_size") gamma = config.getfloat("train", "gamma") exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma) print('** start training here! **') print( '----------------|----------TRAIN-----------|----------VALID-----------|----------------|' ) print( ' lr epoch | loss top-1 | loss top-1 | time |' ) print( '----------------|--------------------------|--------------------------|----------------|' ) start = timer() for epoch_num in range(trained_epoch, epoch): cnt = 0 train_cnt = 0 train_loss = 0 train_acc = 0 exp_lr_scheduler.step(epoch_num) lr = 0 for g in optimizer.param_groups: lr = float(g['lr']) break while True: cnt += 1 data = train_dataset.fetch_data(config) if data is None: break ''' for key in data.keys(): if isinstance(data[key], torch.Tensor): if torch.cuda.is_available() and use_gpu: data[key] = Variable(data[key].cuda()) else: data[key] = Variable(data[key]) ''' data = DataCuda(data, use_gpu) optimizer.zero_grad() results = net(data, criterion, config, use_gpu) outputs, loss, accu = results["x"], results["loss"], results[ "accuracy"] loss.backward() train_loss += loss.item() train_acc += accu.item() train_cnt += 1 loss = loss.item() accu = accu.item() optimizer.step() if cnt % output_time == 0: print('\r', end='', flush=True) print( '%.4f % 3d | %.4f % 2.2f | ???? ????? | %s | %d' % (lr, epoch_num + 1, train_loss / train_cnt, train_acc / train_cnt * 100, time_to_str((timer() - start)), cnt), end='', flush=True) train_loss /= train_cnt train_acc /= train_cnt # writer.add_scalar(config.get("output", "model_name") + " train loss", train_loss, epoch_num + 1) # writer.add_scalar(config.get("output", "model_name") + " train accuracy", train_acc, epoch_num + 1) if not os.path.exists(model_path): os.makedirs(model_path) torch.save(net.state_dict(), os.path.join(model_path, "model-%d.pkl" % (epoch_num + 1))) valid_loss, valid_accu, auc_result = valid_net(net, valid_dataset, use_gpu, config, epoch_num + 1, writer) print('\r', end='', flush=True) print( '%.4f % 3d | %.4f %.2f | %.4f % 2.2f | %s | auc_reuslt: %.4f' % (lr, epoch_num + 1, train_loss, train_acc * 100, valid_loss, valid_accu * 100, time_to_str((timer() - start)), auc_result))
def main(): # so other functions can access these variables global args, dataloaders, data_sizes, image_sets args = get_user_args() # defining processing device, if cuda is available then GPU else CPU device = torch.device("cuda:0" if ( torch.cuda.is_available() and args.gpu) else "cpu") print('=> beginning training using {}'.format(str(device).upper())) # lets the user know which model is being trained print('=> creating model: {}'.format(args.arch)) model = models.__dict__[args.arch](pretrained=True) print('* ' * 20) model.to(device) # send device to processor # image location with child folders of train, valid, test data_dir = Path(args.data) train_dir = data_dir / 'train' valid_dir = data_dir / 'valid' test_dir = data_dir / 'test' # variable for various iterations later states = ['train', 'valid', 'test'] # for easy iteration later dirs_dict = {'train': train_dir, 'valid': valid_dir, 'test': test_dir} # image normalization parameters, predefined normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # transforms for valid and test data, use same parameters valid_test_transforms = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize] data_transforms = { 'train': # vector manipulation for generalized learning transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomRotation(30), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), normalize ]), 'valid': transforms.Compose(valid_test_transforms), 'test': transforms.Compose(valid_test_transforms) } image_sets = { i_set: datasets.ImageFolder( dirs_dict[i_set], transform=data_transforms[i_set]) for i_set in states } dataloaders = { 'train': torch.utils.data.DataLoader( image_sets['train'], batch_size=args.batch_size, shuffle=True), 'valid': torch.utils.data.DataLoader(image_sets['valid'], batch_size=args.batch_size), 'test': torch.utils.data.DataLoader(image_sets['test'], batch_size=args.batch_size) } classes = image_sets['train'].classes data_sizes = {x: len(image_sets[x]) for x in states} for p in model.parameters(): p.requires_grad = False # ensures gradients aren't calculated for parameters classifier = nn.Sequential( OrderedDict([ ('fc1', nn.Linear( model.classifier[0].in_features, args.hidden_units)), ('relu1,', nn.ReLU()), ('dropout', nn.Dropout(args.dropout)), ('fc2', nn.Linear(args.hidden_units, len(classes))), ('output', nn.LogSoftmax(dim=1)), ])) model.classifier = classifier criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=args.learning_rate) scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.0125) model_trained = train(model, optimizer, criterion, scheduler, args.epochs, device) save_checkpoint(model_trained, args.epochs, args.save_dir, args.arch, args.learning_rate, optimizer, args.hidden_units)
shuffle=True) test_loader = torch.utils.data.DataLoader( torchvision.datasets.FashionMNIST( root="data", train=False, transform=torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0], [1]), ]), ), batch_size=32) cnn = LeNet(input_shape=(28, 28, 1), num_classes=10) cnn = cnn.cuda() if simulation_config['use_gpu'] else cnn optimizer = torch.optim.Adam(cnn.parameters(), lr=0.005) lr_sch = lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.2) criterion = torch.nn.CrossEntropyLoss() # for i in range(20): # train_ann(model=cnn, train_loader=train_loader, optimizer=optimizer, criterion=criterion, epoch=i+1, simulation_config=simulation_config) # # test_ann(model=cnn, test_loader=test_loader, criterion=criterion, simulation_config=simulation_config) # lr_sch.step() # torch.save(cnn.state_dict(), 'model_output/LeNet_fashionmnist_params.pkl') device = torch.device('cuda') scnn = SpikingLeNet(input_shape=(28, 28, 1), num_classes=10, if_param=IFParameters(), device=device, seq_length=simulation_config['seq_length'],
num_workers=num_workers) folds_training_losses = [] folds_val_losses = [] folds_val_mean_losses = [] for i in range(2): print("CV: ", i) train_epochs_mean_losses = [] val_epochs_mean_losses = [] val_epochs = [] i_fold_val_scores = [] model = MRIRegressor(feats, dropout_p).to(device=device) optimizer = Adam(model.parameters(), lr, weight_decay=weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=gamma, last_epoch=-1) for epoch in range(num_epochs): epoch_train_batch_losses = train_epoch(model, train_loader, loss_function, optimizer, scheduler, device) train_mean_loss = np.mean(epoch_train_batch_losses) train_max_loss = np.max(epoch_train_batch_losses) train_epochs_mean_losses.append(train_mean_loss) if epoch % 5 == 0 or epoch == num_epochs - 1: val_epochs.append(epoch) epoch_val_batch_losses = eval_epoch(model, val_loader, loss_function, device) val_mean_loss = np.mean(epoch_val_batch_losses) val_epochs_mean_losses.append(val_mean_loss)
nn.LeakyReLU(0.2, True), # layer 4 nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), nn.Sigmoid()) self.to(DEVICE) print_network(self) def forward(self, x): return nn.parallel.data_parallel(self.net, x).view(-1, 1).squeeze(1) G = generator(nz, nc, ngf) D = discriminator(3, ndf) opt_G = optim.Adam(G.parameters(), lr, betas=[0.5, 0.999]) opt_D = optim.Adam(D.parameters(), lr, betas=[0.5, 0.999]) scheduler_lr = lr_scheduler.StepLR(opt_G, step_size=1, gamma=0.9) criterion = nn.BCELoss() for epoch in range(0, n_epochs): G.train() D.train() _batch = 0 scheduler_lr.step() for X, _ in train_iter: _batch += 1 real_x = X.to(DEVICE) z = T.randn(real_x.size(0), nz, 1, 1, device=DEVICE) fake_x = G(z) # instance noise trick
def __init__(self, opt): print('SRRaGANModelllll', opt) super(SRRaGANModel, self).__init__(opt) train_opt = opt['train'] if self.is_train: if opt['datasets']['train']['znorm']: z_norm = opt['datasets']['train']['znorm'] else: z_norm = False # define networks and load pretrained models self.netG = networks.define_G(opt).to(self.device) # G if self.is_train: self.netG.train() if train_opt['gan_weight']: self.netD = networks.define_D(opt).to(self.device) # D self.netD.train() self.load() # load G and D if needed # define losses, optimizer and scheduler if self.is_train: # Define if the generator will have a final capping mechanism in the output self.outm = None if train_opt['finalcap']: self.outm = train_opt['finalcap'] # G pixel loss # """ if train_opt['pixel_weight']: if train_opt['pixel_criterion']: l_pix_type = train_opt['pixel_criterion'] else: # default to cb l_fea_type = 'cb' if l_pix_type == 'l1': self.cri_pix = nn.L1Loss().to(self.device) elif l_pix_type == 'l2': self.cri_pix = nn.MSELoss().to(self.device) elif l_pix_type == 'cb': self.cri_pix = CharbonnierLoss().to(self.device) elif l_pix_type == 'elastic': self.cri_pix = ElasticLoss().to(self.device) elif l_pix_type == 'relativel1': self.cri_pix = RelativeL1().to(self.device) elif l_pix_type == 'l1cosinesim': self.cri_pix = L1CosineSim().to(self.device) else: raise NotImplementedError( 'Loss type [{:s}] not recognized.'.format(l_pix_type)) self.l_pix_w = train_opt['pixel_weight'] else: logger.info('Remove pixel loss.') self.cri_pix = None # """ # G feature loss # """ if train_opt['feature_weight']: if train_opt['feature_criterion']: l_fea_type = train_opt['feature_criterion'] else: # default to l1 l_fea_type = 'l1' if l_fea_type == 'l1': self.cri_fea = nn.L1Loss().to(self.device) elif l_fea_type == 'l2': self.cri_fea = nn.MSELoss().to(self.device) elif l_fea_type == 'cb': self.cri_fea = CharbonnierLoss().to(self.device) elif l_fea_type == 'elastic': self.cri_fea = ElasticLoss().to(self.device) else: raise NotImplementedError( 'Loss type [{:s}] not recognized.'.format(l_fea_type)) self.l_fea_w = train_opt['feature_weight'] else: logger.info('Remove feature loss.') self.cri_fea = None if self.cri_fea: # load VGG perceptual loss self.netF = networks.define_F(opt, use_bn=False).to(self.device) # """ # HFEN loss # """ if train_opt['hfen_weight']: l_hfen_type = train_opt['hfen_criterion'] if train_opt['hfen_presmooth']: pre_smooth = train_opt['hfen_presmooth'] else: pre_smooth = False # train_opt['hfen_presmooth'] if l_hfen_type: if l_hfen_type == 'rel_l1' or l_hfen_type == 'rel_l2': relative = True else: relative = False # True #train_opt['hfen_relative'] if l_hfen_type: self.cri_hfen = HFENLoss(loss_f=l_hfen_type, device=self.device, pre_smooth=pre_smooth, relative=relative).to(self.device) else: raise NotImplementedError( 'Loss type [{:s}] not recognized.'.format(l_hfen_type)) self.l_hfen_w = train_opt['hfen_weight'] else: logger.info('Remove HFEN loss.') self.cri_hfen = None # """ # TV loss # """ if train_opt['tv_weight']: self.l_tv_w = train_opt['tv_weight'] l_tv_type = train_opt['tv_type'] if train_opt['tv_norm']: tv_norm = train_opt['tv_norm'] else: tv_norm = 1 if l_tv_type == 'normal': self.cri_tv = TVLoss(self.l_tv_w, p=tv_norm).to(self.device) elif l_tv_type == '4D': # Total Variation regularization in 4 directions self.cri_tv = TVLoss4D(self.l_tv_w).to(self.device) else: raise NotImplementedError( 'Loss type [{:s}] not recognized.'.format(l_tv_type)) else: logger.info('Remove TV loss.') self.cri_tv = None # """ # SSIM loss # """ if train_opt['ssim_weight']: self.l_ssim_w = train_opt['ssim_weight'] if train_opt['ssim_type']: l_ssim_type = train_opt['ssim_type'] else: # default to ms-ssim l_ssim_type = 'ms-ssim' if l_ssim_type == 'ssim': self.cri_ssim = SSIM(win_size=11, win_sigma=1.5, size_average=True, data_range=1., channel=3).to(self.device) elif l_ssim_type == 'ms-ssim': self.cri_ssim = MS_SSIM(win_size=11, win_sigma=1.5, size_average=True, data_range=1., channel=3).to(self.device) else: logger.info('Remove SSIM loss.') self.cri_ssim = None # """ # LPIPS loss """ lpips_spatial = False if train_opt['lpips_spatial']: #lpips_spatial = True if train_opt['lpips_spatial'] == True else False lpips_spatial = True if train_opt['lpips_spatial'] else False lpips_GPU = False if train_opt['lpips_GPU']: #lpips_GPU = True if train_opt['lpips_GPU'] == True else False lpips_GPU = True if train_opt['lpips_GPU'] else False #""" # """ lpips_spatial = True # False # Return a spatial map of perceptual distance. Meeds to use .mean() for the backprop if True, the mean distance is approximately the same as the non-spatial distance lpips_GPU = True # Whether to use GPU for LPIPS calculations if train_opt['lpips_weight']: if z_norm == True: # if images are in [-1,1] range # images are already in the [-1,1] range self.lpips_norm = False else: # normalize images from [0,1] range to [-1,1] self.lpips_norm = True self.l_lpips_w = train_opt['lpips_weight'] # Can use original off-the-shelf uncalibrated networks 'net' or Linearly calibrated models (LPIPS) 'net-lin' if train_opt['lpips_type']: lpips_type = train_opt['lpips_type'] else: # Default use linearly calibrated models, better results lpips_type = 'net-lin' # Can set net = 'alex', 'squeeze' or 'vgg' or Low-level metrics 'L2' or 'ssim' if train_opt['lpips_net']: lpips_net = train_opt['lpips_net'] else: # Default use VGG for feature extraction lpips_net = 'vgg' self.cri_lpips = models.PerceptualLoss( model=lpips_type, net=lpips_net, use_gpu=lpips_GPU, model_path=None, spatial=lpips_spatial) # .to(self.device) # Linearly calibrated models (LPIPS) # self.cri_lpips = models.PerceptualLoss(model='net-lin', net='alex', use_gpu=lpips_GPU, model_path=None, spatial=lpips_spatial) #.to(self.device) # self.cri_lpips = models.PerceptualLoss(model='net-lin', net='vgg', use_gpu=lpips_GPU, model_path=None, spatial=lpips_spatial) #.to(self.device) # Off-the-shelf uncalibrated networks # Can set net = 'alex', 'squeeze' or 'vgg' # self.cri_lpips = models.PerceptualLoss(model='net', net='alex', use_gpu=lpips_GPU, model_path=None, spatial=lpips_spatial) # Low-level metrics # self.cri_lpips = models.PerceptualLoss(model='L2', colorspace='Lab', use_gpu=lpips_GPU) # self.cri_lpips = models.PerceptualLoss(model='ssim', colorspace='RGB', use_gpu=lpips_GPU) else: logger.info('Remove LPIPS loss.') self.cri_lpips = None # """ # SPL loss # """ if train_opt['spl_weight']: self.l_spl_w = train_opt['spl_weight'] l_spl_type = train_opt['spl_type'] # SPL Normalization (from [-1,1] images to [0,1] range, if needed) if z_norm == True: # if images are in [-1,1] range self.spl_norm = True # normalize images to [0, 1] else: self.spl_norm = False # images are already in [0, 1] range # YUV Normalization (from [-1,1] images to [0,1] range, if needed, but mandatory) if z_norm == True: # if images are in [-1,1] range # normalize images to [0, 1] for yuv calculations self.yuv_norm = True else: self.yuv_norm = False # images are already in [0, 1] range if l_spl_type == 'spl': # Both GPL and CPL # Gradient Profile Loss self.cri_gpl = spl.GPLoss(spl_norm=self.spl_norm) # Color Profile Loss # You can define the desired color spaces in the initialization # default is True for all self.cri_cpl = spl.CPLoss(rgb=True, yuv=True, yuvgrad=True, spl_norm=self.spl_norm, yuv_norm=self.yuv_norm) elif l_spl_type == 'gpl': # Only GPL # Gradient Profile Loss self.cri_gpl = spl.GPLoss(spl_norm=self.spl_norm) self.cri_cpl = None elif l_spl_type == 'cpl': # Only CPL # Color Profile Loss # You can define the desired color spaces in the initialization # default is True for all self.cri_cpl = spl.CPLoss(rgb=True, yuv=True, yuvgrad=True, spl_norm=self.spl_norm, yuv_norm=self.yuv_norm) self.cri_gpl = None else: logger.info('Remove SPL loss.') self.cri_gpl = None self.cri_cpl = None # """ # GD gan loss # """ if train_opt['gan_weight']: self.cri_gan = GANLoss(train_opt['gan_type'], 1.0, 0.0).to(self.device) self.l_gan_w = train_opt['gan_weight'] # D_update_ratio and D_init_iters are for WGAN self.D_update_ratio = train_opt['D_update_ratio'] if train_opt[ 'D_update_ratio'] else 1 self.D_init_iters = train_opt['D_init_iters'] if train_opt[ 'D_init_iters'] else 0 if train_opt['gan_type'] == 'wgan-gp': self.random_pt = torch.Tensor(1, 1, 1, 1).to(self.device) # gradient penalty loss self.cri_gp = GradientPenaltyLoss(device=self.device).to( self.device) self.l_gp_w = train_opt['gp_weigth'] else: logger.info('Remove GAN loss.') self.cri_gan = None # """ # optimizers # G wd_G = train_opt['weight_decay_G'] if train_opt[ 'weight_decay_G'] else 0 optim_params = [] for k, v in self.netG.named_parameters( ): # can optimize for a part of the model if v.requires_grad: optim_params.append(v) else: logger.warning( 'Params [{:s}] will not optimize.'.format(k)) self.optimizer_G = torch.optim.Adam(optim_params, lr=train_opt['lr_G'], weight_decay=wd_G, betas=(train_opt['beta1_G'], 0.999)) self.optimizers.append(self.optimizer_G) # D if self.cri_gan: wd_D = train_opt['weight_decay_D'] if train_opt[ 'weight_decay_D'] else 0 self.optimizer_D = torch.optim.Adam( self.netD.parameters(), lr=train_opt['lr_D'], weight_decay=wd_D, betas=(train_opt['beta1_D'], 0.999)) self.optimizers.append(self.optimizer_D) # schedulers if train_opt['lr_scheme'] == 'MultiStepLR': for optimizer in self.optimizers: self.schedulers.append( lr_scheduler.MultiStepLR(optimizer, train_opt['lr_steps'], train_opt['lr_gamma'])) elif train_opt['lr_scheme'] == 'MultiStepLR_Restart': for optimizer in self.optimizers: self.schedulers.append( lr_schedulerR.MultiStepLR_Restart( optimizer, train_opt['lr_steps'], restarts=train_opt['restarts'], weights=train_opt['restart_weights'], gamma=train_opt['lr_gamma'], clear_state=train_opt['clear_state'])) elif train_opt['lr_scheme'] == 'StepLR': for optimizer in self.optimizers: self.schedulers.append( lr_scheduler.StepLR(optimizer, train_opt['lr_step_size'], train_opt['lr_gamma'])) elif train_opt['lr_scheme'] == 'StepLR_Restart': for optimizer in self.optimizers: self.schedulers.append( lr_schedulerR.StepLR_Restart( optimizer, step_sizes=train_opt['lr_step_sizes'], restarts=train_opt['restarts'], weights=train_opt['restart_weights'], gamma=train_opt['lr_gamma'], clear_state=train_opt['clear_state'])) elif train_opt['lr_scheme'] == 'CosineAnnealingLR_Restart': for optimizer in self.optimizers: self.schedulers.append( lr_schedulerR.CosineAnnealingLR_Restart( optimizer, train_opt['T_period'], eta_min=train_opt['eta_min'], restarts=train_opt['restarts'], weights=train_opt['restart_weights'])) elif train_opt['lr_scheme'] == 'ReduceLROnPlateau': for optimizer in self.optimizers: self.schedulers.append( #lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5) lr_scheduler.ReduceLROnPlateau( optimizer, mode=train_opt['plateau_mode'], factor=train_opt['plateau_factor'], threshold=train_opt['plateau_threshold'], patience=train_opt['plateau_patience'])) else: raise NotImplementedError( 'Learning rate scheme ("lr_scheme") not defined or not recognized.' ) self.log_dict = OrderedDict()
model_ft.classifier[6] = nn.Linear(num_ftrs, len(CLASSES)) # Continue from last saved model if CONTINUE_FLAG == 1: print('Loading Saved Model: ', MODEL_PATH) model_ft.load_state_dict(torch.load(MODEL_PATH)) # Assign model to device (CPU if local) model_ft = model_ft.to(device) # Set up optimize if OPTIMIZER == 'SGD': optimizer_ft = optim.SGD(model_ft.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, momentum=SGD_SETTINGS[1], nesterov=SGD_SETTINGS[0]) elif OPTIMIZER == 'ADAM': optimizer_ft = optim.Adam(model_ft.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) # Setup learning rate decay schedule (note: LR by a factor of 0.1 every 7 epochs) exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=LR_DECAY_EPOCHS, gamma=LR_DECAY_FACTOR) # Train and return model criterion = nn.CrossEntropyLoss() model_ft, best_acc = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, EVAL_FLAG, num_epochs=NUM_EPOCHS) # Save model torch.save(model_ft.state_dict(), MODEL_PATH) # Visualize model # visualize_model(model_ft) # input('press <ENTER> to continue')
def on_start(state): if os.path.isfile(trace_file): os.remove(trace_file) state['scheduler'] = lr_scheduler.StepLR(state['optimizer'], opt['train.decay_every'], gamma=0.5)
def init_optimizer(self, net_params): optimizer = None if self.configer.get('optim', 'optim_method') == 'sgd': optimizer = SGD( net_params, lr=self.configer.get('lr', 'base_lr'), momentum=self.configer.get('optim', 'sgd')['momentum'], weight_decay=self.configer.get('optim', 'sgd')['weight_decay'], nesterov=self.configer.get('optim', 'sgd')['nesterov']) elif self.configer.get('optim', 'optim_method') == 'adam': optimizer = Adam(net_params, lr=self.configer.get('lr', 'base_lr'), betas=self.configer.get('optim', 'adam')['betas'], eps=self.configer.get('optim', 'adam')['eps'], weight_decay=self.configer.get( 'optim', 'adam')['weight_decay']) else: Log.error('Optimizer {} is not valid.'.format( self.configer.get('optim', 'optim_method'))) exit(1) policy = self.configer.get('lr', 'lr_policy') scheduler = None if policy == 'step': scheduler = lr_scheduler.StepLR( optimizer, self.configer.get('lr', 'step')['step_size'], gamma=self.configer.get('lr', 'step')['gamma']) elif policy == 'multistep': scheduler = lr_scheduler.MultiStepLR( optimizer, self.configer.get('lr', 'multistep')['stepvalue'], gamma=self.configer.get('lr', 'multistep')['gamma']) elif policy == 'lambda_poly': lambda_poly = lambda iters: pow( (1.0 - iters / self.configer.get('solver', 'max_iters')), 0.9) scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_poly) elif policy == 'lambda_cosine': lambda_cosine = lambda iters: ( math.cos(math.pi * iters / self.configer.get( 'solver', 'max_iters')) + 1.0) / 2 scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_cosine) elif policy == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, mode=self.configer.get('lr', 'plateau')['mode'], factor=self.configer.get('lr', 'plateau')['factor'], patience=self.configer.get('lr', 'plateau')['patience'], threshold=self.configer.get('lr', 'plateau')['threshold'], threshold_mode=self.configer.get('lr', 'plateau')['thre_mode'], cooldown=self.configer.get('lr', 'plateau')['cooldown'], min_lr=self.configer.get('lr', 'plateau')['min_lr'], eps=self.configer.get('lr', 'plateau')['eps']) else: Log.error('Policy:{} is not valid.'.format(policy)) exit(1) return optimizer, scheduler
def main(args): #load hyperparameters from configuration file with open(args.config) as config_file: hyp = json.load(config_file)['hyperparams'][args.model] #override configuration dropout if args.dropout > 0: hyp['dropout'] = args.dropout if args.question_injection >= 0: hyp['question_injection_position'] = args.question_injection print('Loaded hyperparameters from configuration {}, model: {}: {}'.format( args.config, args.model, hyp)) args.model_dirs = './model_{}_drop{}_bstart{}_bstep{}_bgamma{}_bmax{}_lrstart{}_'+ \ 'lrstep{}_lrgamma{}_lrmax{}_invquests-{}_clipnorm{}_glayers{}_qinj{}_fc1{}_fc2{}_seed{}' args.model_dirs = args.model_dirs.format( args.model, hyp['dropout'], args.batch_size, args.bs_step, args.bs_gamma, args.bs_max, args.lr, args.lr_step, args.lr_gamma, args.lr_max, args.invert_questions, args.clip_norm, hyp['g_layers'], hyp['question_injection_position'], hyp['f_fc1'], hyp['f_fc2'], args.seed) if not os.path.exists(args.model_dirs): os.makedirs(args.model_dirs) #create a file in this folder containing the overall configuration args_str = str(args) hyp_str = str(hyp) all_configuration = args_str + '\n\n' + hyp_str filename = os.path.join(args.model_dirs, 'config.txt') with open(filename, 'w') as config_file: config_file.write(all_configuration) args.features_dirs = './features' args.test_results_dir = './test_results' if not os.path.exists(args.test_results_dir): os.makedirs(args.test_results_dir) args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) print('Building word dictionaries from all the words in the dataset...') dictionaries = utils.build_dictionaries(args.clevr_dir) print('Word dictionary completed!') print('Initializing CLEVR dataset...') clevr_dataset_train, clevr_dataset_test = initialize_dataset( args.clevr_dir, dictionaries, hyp['state_description']) print('CLEVR dataset initialized!') # Build the model args.qdict_size = len(dictionaries[0]) args.adict_size = len(dictionaries[1]) model = RN(args, hyp) if torch.cuda.device_count() > 1 and args.cuda: model = torch.nn.DataParallel(model) model.module.cuda() # call cuda() overridden method if args.cuda: model.cuda() start_epoch = 1 if args.resume: filename = args.resume if os.path.isfile(filename): print('==> loading checkpoint {}'.format(filename)) checkpoint = torch.load(filename) #removes 'module' from dict entries, pytorch bug #3805 if torch.cuda.device_count() == 1 and any( k.startswith('module.') for k in checkpoint.keys()): checkpoint = { k.replace('module.', ''): v for k, v in checkpoint.items() } if torch.cuda.device_count() > 1 and not any( k.startswith('module.') for k in checkpoint.keys()): checkpoint = {'module.' + k: v for k, v in checkpoint.items()} model.load_state_dict(checkpoint) print('==> loaded checkpoint {}'.format(filename)) start_epoch = int( re.match(r'.*epoch_(\d+).pth', args.resume).groups()[0]) + 1 if args.conv_transfer_learn: if os.path.isfile(args.conv_transfer_learn): # TODO: there may be problems caused by pytorch issue #3805 if using DataParallel print('==> loading conv layer from {}'.format( args.conv_transfer_learn)) # pretrained dict is the dictionary containing the already trained conv layer pretrained_dict = torch.load(args.conv_transfer_learn) if torch.cuda.device_count() == 1: conv_dict = model.conv.state_dict() else: conv_dict = model.module.conv.state_dict() # filter only the conv layer from the loaded dictionary conv_pretrained_dict = { k.replace('conv.', '', 1): v for k, v in pretrained_dict.items() if 'conv.' in k } # overwrite entries in the existing state dict conv_dict.update(conv_pretrained_dict) # load the new state dict if torch.cuda.device_count() == 1: model.conv.load_state_dict(conv_dict) params = model.conv.parameters() else: model.module.conv.load_state_dict(conv_dict) params = model.module.conv.parameters() # freeze the weights for the convolutional layer by disabling gradient evaluation # for param in params: # param.requires_grad = False print("==> conv layer loaded!") else: print('Cannot load file {}'.format(args.conv_transfer_learn)) progress_bar = trange(start_epoch, args.epochs + 1) if args.test: # perform a single test print('Testing epoch {}'.format(start_epoch)) _, clevr_test_loader = reload_loaders(clevr_dataset_train, clevr_dataset_test, args.batch_size, args.test_batch_size, hyp['state_description']) test(clevr_test_loader, model, start_epoch, dictionaries, args) else: bs = args.batch_size # perform a full training #TODO: find a better solution for general lr scheduling policies candidate_lr = args.lr * args.lr_gamma**(start_epoch - 1 // args.lr_step) lr = candidate_lr if candidate_lr <= args.lr_max else args.lr_max optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=1e-4) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, min_lr=1e-6, verbose=True) scheduler = lr_scheduler.StepLR(optimizer, args.lr_step, gamma=args.lr_gamma) scheduler.last_epoch = start_epoch print('Training ({} epochs) is starting...'.format(args.epochs)) for epoch in progress_bar: if (((args.bs_max > 0 and bs < args.bs_max) or args.bs_max < 0) and (epoch % args.bs_step == 0 or epoch == start_epoch)): bs = math.floor(args.batch_size * (args.bs_gamma**(epoch // args.bs_step))) if bs > args.bs_max and args.bs_max > 0: bs = args.bs_max clevr_train_loader, clevr_test_loader = reload_loaders( clevr_dataset_train, clevr_dataset_test, bs, args.test_batch_size, hyp['state_description']) #restart optimizer in order to restart learning rate scheduler #for param_group in optimizer.param_groups: # param_group['lr'] = args.lr #scheduler = lr_scheduler.CosineAnnealingLR(optimizer, step, min_lr) print('Dataset reinitialized with batch size {}'.format(bs)) if ((args.lr_max > 0 and scheduler.get_lr()[0] < args.lr_max) or args.lr_max < 0): scheduler.step() print('Current learning rate: {}'.format( optimizer.param_groups[0]['lr'])) # TRAIN progress_bar.set_description('TRAIN') train(clevr_train_loader, model, optimizer, epoch, args) # TEST progress_bar.set_description('TEST') test(clevr_test_loader, model, epoch, dictionaries, args) # SAVE MODEL filename = 'RN_epoch_{:02d}.pth'.format(epoch) torch.save(model.state_dict(), os.path.join(args.model_dirs, filename))
def run(self): dataset_fn = self.config['dataset_fn'] dataset_root = self.config['dataset_root'] learn_rate = self.config['learn_rate'] learn_rate_step = self.config['learn_rate_step'] log_dir = self.config['log_dir'] model_fn = self.config['model_fn'] num_epochs = self.config['num_epochs'] report_scalar_freq = self.config['report_scalar_freq'] save_epoch_freq = self.config['save_epoch_freq'] save_step_freq = self.config['save_step_freq'] valid_freq = self.config['valid_freq'] weight_decay = self.config['weight_decay'] save_prefix = dataset_fn + '_' + model_fn if self.run_name(): save_prefix = save_prefix + '_' + self.run_name() if self.reporter is None: self.reporter = SummaryWriter(log_dir) train_data = { m: DATASETS[dataset_fn](dataset_root, m) for m in self.modes } self.prepare_dataset(train_data) num_categories = train_data[self.modes[0]].num_categories() print('[*] Number of categories:', num_categories) net = self.create_model(num_categories) net.print_params() data_loaders = self.create_data_loaders(train_data) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.params_to_optimize( weight_decay, self.weight_decay_excludes()), lr=learn_rate) if learn_rate_step > 0: lr_exp_scheduler = lr_scheduler.StepLR(optimizer, step_size=learn_rate_step, gamma=0.5) else: lr_exp_scheduler = None best_accu = 0.0 best_net = -1 ckpt_prefix = self.checkpoint_prefix() ckpt_nets = self.config['ckpt_nets'] if ckpt_prefix is not None: loaded_paths = net.load(ckpt_prefix, ckpt_nets) print('[*] Loaded pretrained model from {}'.format(loaded_paths)) for epoch in range(1, num_epochs + 1): print('-' * 20) print('[*] Epoch {}/{}'.format(epoch, num_epochs)) for mode in self.modes: is_train = mode == 'train' if not is_train and epoch % valid_freq != 0: continue print('[*] Starting {} mode'.format(mode)) if is_train: if lr_exp_scheduler is not None: lr_exp_scheduler.step() net.train_mode() else: net.eval_mode() running_corrects = 0 num_samples = 0 pbar = tqdm.tqdm(total=len(data_loaders[mode])) for bid, data_batch in enumerate(data_loaders[mode]): self.step_counters[mode] += 1 logits, loss, gt_category = self.forward_batch( net, data_batch, mode, optimizer, criterion) _, predicts = torch.max(logits, 1) predicts_accu = torch.sum(predicts == gt_category) running_corrects += predicts_accu.item() sampled_batch_size = gt_category.size(0) num_samples += sampled_batch_size if report_scalar_freq > 0 and self.step_counters[ mode] % report_scalar_freq == 0: self.reporter.add_scalar('{}/loss'.format(mode), loss.item(), self.step_counters[mode]) self.reporter.add_scalar( '{}/accuracy'.format(mode), float(predicts_accu.data) / sampled_batch_size, self.step_counters[mode]) if is_train and save_step_freq > 0 and self.step_counters[ mode] % save_step_freq == 0: net.save(log_dir, self.step_counters[mode], save_prefix) pbar.update() pbar.close() epoch_accu = float(running_corrects) / float(num_samples) if is_train: if epoch % save_epoch_freq == 0: print('[*] {} accu: {:.4f}'.format(mode, epoch_accu)) net.save(log_dir, 'epoch_{}'.format(epoch), save_prefix) else: print('[*] {} accu: {:.4f}'.format(mode, epoch_accu)) if epoch_accu > best_accu: best_accu = epoch_accu best_net = epoch print('[*] Best accu: {:.4f}, corresponding epoch: {}'.format( best_accu, best_net)) for m in self.modes: train_data[m].dispose() return best_accu
import datetime from torch.optim import lr_scheduler from config import Config from train import Trainer from loss import BCFocalLoss cfig = Config() net = xceptionAx3(num_classes=1) #create CNN model. criterion = nn.BCEWithLogitsLoss() #define the los optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.0001) #select the optimizer exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1) # create the train_dataset_loader and val_dataset_loader. train_tarnsformed_dataset = CloudDataset(img_dir='data/images224', labels_dir='data/masks224/', transform=transforms.Compose( [ToTensor()])) val_tarnsformed_dataset = CloudDataset(img_dir='data/images224', labels_dir='data/masks224/', val=True, transform=transforms.Compose( [ToTensor()])) train_dataloader = DataLoader(train_tarnsformed_dataset, batch_size=8,
shuffle=False, num_workers=0) len_testdata = len(test_loader) #dir_model = model_dir + "\\model_epoch400" #fcn_model = torch.load(dir_model) fcn_model = FCNmodel_3pool(n_class) fcn_model.cuda() criterion = nn.CrossEntropyLoss() criterion.cuda() optimizer = optim.SGD(fcn_model.parameters(), lr=lr, momentum=momentum, weight_decay=L2_factor) scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma) use_gpu = torch.cuda.is_available() num_gpu = list(range(torch.cuda.device_count())) print('use_gpu:', use_gpu) print('num_gpu:', num_gpu) #pdb.set_trace() def train_model(): fcn_model.train() for epoch in epochs: scheduler.step() train_data = dataloader(training=True) train_loader = torch.utils.data.DataLoader(train_data,
model.eval() original_labels = [] pred_lst = [] with torch.no_grad(): for i, (inputs, labels) in enumerate(dataloaders['val']): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs, 1) original_labels.extend(labels) pred_lst.extend(preds) precision, recall, f1, support = prfs(original_labels, pred_lst, average='weighted') print("Precision: {:.2%}\nRecall: {:.2%}\nF1 score: {:.2%}".format(precision, recall, f1)) model_ft = models.resnet18(pretrained=True) num_ftrs = model_ft.fc.in_features model_ft.fc = nn.Linear(num_ftrs, 16) model_ft = model_ft.to(device) criterion = nn.CrossEntropyLoss() optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=20) visualize_model(model_ft, num_images=20) success_metrics(model_ft) torch.save(model_ft, "./tlmodelv3.1")
elif args.optim == 'SGD': optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=rate, momentum=args.momentum) optimizer_pretrain = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=rate_pretrain, momentum=0.9) else: raise Exception(f'Invalid optimizer {args.optim} selected.') optimizers = [optimizer, optimizer_pretrain] scheduler = lr_scheduler.StepLR(optimizer, step_size=sched_step, gamma=sched_gamma) scheduler_pretrain = lr_scheduler.StepLR(optimizer_pretrain, step_size=sched_step_pretrain, gamma=sched_gamma_pretrain) schedulers = [scheduler, scheduler_pretrain] utils.print_both(f, 'Mode: {}\n'.format(args.mode)) if args.mode == 'train_full': model = training_functions.train_model(model, dataloader, criteria, optimizers, schedulers, epochs, params) elif args.mode == 'pretrain': model = training_functions.pretraining(model, dataloader, criteria[0],