def main(): cudnn.benchmark = True base_path = '/NSL/data/images/HyperspectralImages/ICVL/' # Dataset val_data = DatasetFromHdf5(base_path + '/testclean_si50_st80.h5') print(len(val_data)) # Data Loader (Input Pipeline) val_loader = DataLoader(dataset=val_data, num_workers=1, batch_size=1, shuffle=False, pin_memory=True) # Model model_path = base_path + 'hscnn_5layer_dim10_93.pkl' result_path = base_path + '/test_results/' var_name = 'rad' save_point = torch.load(model_path) model_param = save_point['state_dict'] model = resblock(conv_relu_res_relu_block, 16, 3, 31) model = nn.DataParallel(model) model.load_state_dict(model_param) model = model.cuda() model.eval() model_path = base_path if not os.path.exists(model_path): os.makedirs(model_path) loss_csv = open(os.path.join(model_path, 'loss.csv'), 'w+') log_dir = os.path.join(model_path, 'train.log') logger = initialize_logger(log_dir) test_loss = validate(val_loader, model, rrmse_loss) print("Test Loss: %.9f " % (test_loss)) # save loss record_loss(loss_csv, test_loss)
def main(): cudnn.benchmark = True # Dataset train_data = DatasetFromHdf5('./Data/train_Material_.h5') print(len(train_data)) val_data = DatasetFromHdf5('./Data/valid_Material_.h5') print(len(val_data)) # Data Loader (Input Pipeline) train_data_loader = DataLoader(dataset=train_data, num_workers=1, batch_size=64, shuffle=True, pin_memory=True) val_loader = DataLoader(dataset=val_data, num_workers=1, batch_size=1, shuffle=False, pin_memory=True) # Model model = resblock(conv_bn_relu_res_block, 10, 25, 25) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) if torch.cuda.is_available(): model.cuda() # Parameters, Loss and Optimizer start_epoch = 0 end_epoch = 100 init_lr = 0.0001 iteration = 0 record_test_loss = 1000 # criterion_RRMSE = torch.nn.L1Loss() criterion_RRMSE = rrmse_loss criterion_Angle = Angle_Loss criterion_MSE = torch.nn.MSELoss() criterion_SSIM = pytorch_msssim.SSIM() # criterion_Div = Divergence_Loss criterion_Div = torch.nn.KLDivLoss() optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01) model_path = './models/' if not os.path.exists(model_path): os.makedirs(model_path) loss_csv = open(os.path.join(model_path, 'loss_material.csv'), 'w+') log_dir = os.path.join(model_path, 'train_material.log') logger = initialize_logger(log_dir) # Resume resume_file = '' if resume_file: if os.path.isfile(resume_file): print("=> loading checkpoint '{}'".format(resume_file)) checkpoint = torch.load(resume_file) start_epoch = checkpoint['epoch'] iteration = checkpoint['iter'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) for epoch in range(start_epoch + 1, end_epoch): start_time = time.time() train_loss, iteration, lr = train(train_data_loader, model, criterion_MSE, criterion_RRMSE, criterion_Angle, criterion_SSIM, criterion_Div, optimizer, iteration, init_lr, end_epoch, epoch) test_loss, loss_angle, loss_reconstruct, loss_SSIM, loss_Div = validate( val_loader, model, criterion_MSE, criterion_RRMSE, criterion_Angle, criterion_SSIM, criterion_Div) # xxx_loss = validate_save(val_loader, model, criterion_MSE, criterion_RRMSE, epoch) save_checkpoint_material(model_path, epoch, iteration, model, optimizer) # print loss end_time = time.time() epoch_time = end_time - start_time print( "Epoch [%d], Iter[%d], Time:%.9f, learning rate : %.9f, Train Loss: %.9f Test Loss: %.9f , Angle Loss: %.9f, Recon Loss: %.9f, SSIM Loss: %.9f , Div Loss: %.9f" % (epoch, iteration, epoch_time, lr, train_loss, test_loss, loss_angle, loss_reconstruct, loss_SSIM, loss_Div)) # save loss record_loss(loss_csv, epoch, iteration, epoch_time, lr, train_loss, test_loss) logger.info( "Epoch [%d], Iter[%d], Time:%.9f, learning rate : %.9f, Train Loss: %.9f Test Loss: %.9f, Angle Loss: %.9f, Recon Loss: %.9f, SSIM Loss: %.9f, Div Loss: %.9f " % (epoch, iteration, epoch_time, lr, train_loss, test_loss, loss_angle, loss_reconstruct, loss_SSIM, loss_Div))
def train(self): x_train = self.data[0] y_train = self.data[1] x_val = self.data[2] y_val = self.data[3] x_test = self.data[4] y_test = self.data[5] N = x_train.shape[0] if not N % self.batch_size == 0: print 'Illegal Batch Size' return num_batch = N // self.batch_size optimize = getattr(__import__('optimizer'), self.optimizer) accuracy_record = [0.0] loss_record = [] param = [] validation_flag = x_val.shape[0] > 0 flags.RECORD_FLAG = False for epoch in range(self.epochs): flags.EPOCH = epoch flags.MODE = 'Train' for batch in range(num_batch): data = x_train[batch * self.batch_size:(batch + 1) * self.batch_size] label = y_train[batch * self.batch_size:(batch + 1) * self.batch_size] gradient, loss = self.model.loss(data, label) for p in range(len(self.model.param)): self.model.param[p] = optimize(self.model.param[p], gradient[p], **self.update_setting) loss_record.append(loss.asnumpy()) if batch % self.batch_size == 0: print 'epoch %d batch %d loss: %f' % (epoch, batch, loss.val) flags.MODE = 'Test' if validation_flag: flags.RECORD_FLAG = True validation_accuracy = utils.get_accuracy( np.argmax(self.model.loss(x_val), axis=1), y_val) print 'validation accuracy: %f' % (validation_accuracy) if validation_accuracy > max(accuracy_record): param = [np.copy(p) for p in self.model.param] accuracy_record.append(validation_accuracy) self.model.param = [np.copy(p) for p in param] flags.RECORD_FLAG = False test_accuracy = utils.get_accuracy( np.argmax(self.model.loss(x_test), axis=1), y_test) else: test_accuracy = utils.get_accuracy( np.argmax(self.model.loss(x_test), axis=1), y_test) if test_accuracy > max(accuracy_record): param = [np.copy(p) for p in self.model.param] accuracy_record.append(test_accuracy) self.model.param = [np.copy(p) for p in param] print 'test accuracy: %f' % test_accuracy print 'optimal accuracy: %f' % max(accuracy_record) if (epoch + 1) % self.decay_interval == 0: self.update_setting['learning_rate'] *= self.decay_rate print 'learning rate decayed to %f' % self.update_setting[ 'learning_rate'] utils.record_loss(loss_record) return accuracy_record[1:]
def main(): cudnn.benchmark = True base_path = '/NSL/data/images/HyperspectralImages/ICVL/' # Dataset train_data = DatasetFromHdf5(base_path + '/train.h5') print(len(train_data)) val_data = DatasetFromHdf5(base_path + '/valid.h5') print(len(val_data)) # Data Loader (Input Pipeline) train_data_loader = DataLoader(dataset=train_data, num_workers=1, batch_size=64, shuffle=True, pin_memory=True) val_loader = DataLoader(dataset=val_data, num_workers=1, batch_size=1, shuffle=False, pin_memory=True) # Model model = resblock(conv_batch_relu_res_block, 16, 3, 31) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) if torch.cuda.is_available(): model.cuda() # Parameters, Loss and Optimizer start_epoch = 0 end_epoch = 1000 init_lr = 0.0002 iteration = 0 record_test_loss = 1000 criterion = rrmse_loss optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) model_path = base_path + '/models/' if not os.path.exists(model_path): os.makedirs(model_path) loss_csv = open(os.path.join(model_path, 'loss.csv'), 'w+') log_dir = os.path.join(model_path, 'train.log') logger = initialize_logger(log_dir) # Resume resume_file = '' if resume_file: if os.path.isfile(resume_file): print("=> loading checkpoint '{}'".format(resume_file)) checkpoint = torch.load(resume_file) start_epoch = checkpoint['epoch'] iteration = checkpoint['iter'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) for epoch in range(start_epoch + 1, end_epoch): print("epoch [%d]" % (epoch)) start_time = time.time() train_loss, iteration, lr = train(train_data_loader, model, criterion, optimizer, iteration, init_lr, end_epoch) print("train done! epoch [%d]" % (epoch)) test_loss = validate(val_loader, model, criterion) print("test done! epoch [%d]" % (epoch)) # Save model if test_loss < record_test_loss: record_test_loss = test_loss save_checkpoint(model_path, epoch, iteration, model, optimizer) # print loss end_time = time.time() epoch_time = end_time - start_time print( "Epoch [%d], Iter[%d], Time:%.9f, learning rate : %.9f, Train Loss: %.9f Test Loss: %.9f " % (epoch, iteration, epoch_time, lr, train_loss, test_loss)) # save loss record_loss(loss_csv, epoch, iteration, epoch_time, lr, train_loss, test_loss) logger.info( "Epoch [%d], Iter[%d], Time:%.9f, learning rate : %.9f, Train Loss: %.9f Test Loss: %.9f " % (epoch, iteration, epoch_time, lr, train_loss, test_loss))
def main(): cudnn.benchmark = True train_data = Dataset_cave_train('./data/train') print('number of train data: ', len(train_data)) val_data = Dataset_cave_val('./data/test') print('number of validate data: ', len(val_data)) # Model model = Net(HSI_num_residuals=args.HSI_num_residuals, RGB_num_residuals=args.RGB_num_residuals) # multi-GPU setup device = torch.device("cuda:0, 1" if torch.cuda.is_available() else "cpu") model = nn.DataParallel(model) model = model.to(device=device, dtype=torch.float) # float32 model.apply(weights_init_kaiming) # Parameters, Loss and Optimizer start_epoch = 0 end_epoch = 501 init_lr = 0.0002 iteration = 0 criterion = MyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=False, # threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08) model_path = args.model_path if not os.path.exists(model_path): os.makedirs(model_path) loss_csv = open(os.path.join(model_path, 'loss.csv'), 'w+') log_dir = os.path.join(model_path, 'train.log') logger = initialize_logger(log_dir) # Resume resume_file = '' if resume_file: if os.path.isfile(resume_file): print("=> loading checkpoint '{}'".format(resume_file)) checkpoint = torch.load(resume_file) # start_epoch = checkpoint['epoch'] # iteration = checkpoint['iter'] model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) for epoch in range(start_epoch + 1, end_epoch): train_data = Dataset_cave_train('./data/train') train_data_loader = DataLoader( dataset=train_data, num_workers=8, batch_size=16, shuffle=True, pin_memory=True, ) val_data = Dataset_cave_val('./data/test') val_data_loader = DataLoader(dataset=val_data, num_workers=8, batch_size=16, shuffle=False, pin_memory=True) start_time = time.time() train_loss, iteration = train(train_data_loader, model, criterion, optimizer, iteration, device) val_loss = validate(val_data_loader, model, criterion, device) # Save model if epoch % 100 == 0: save_checkpoint(model_path, epoch, iteration, model, optimizer) # # Update learning rate for param_group in optimizer.param_groups: lr = param_group['lr'] # scheduler.step(val_loss) # print loss end_time = time.time() epoch_time = end_time - start_time print( "Epoch [%d], Iter[%d], Time:%.9f, learning rate : %.9f, Train Loss: %.9f Test Loss: %.9f " % (epoch, iteration, epoch_time, lr, train_loss, val_loss)) # save loss record_loss(loss_csv, epoch, iteration, epoch_time, lr, train_loss, val_loss) # 调用record_方法:将epoch等6个指标写到csv文件中 logger.info( "Epoch [%d], Iter[%d], Time:%.9f, learning rate : %.9f, Train Loss: %.9f Test Loss: %.9f " % (epoch, iteration, epoch_time, lr, train_loss, val_loss))
def main(): #https://drive.google.com/file/d/1QxQxf2dzfSbvCgWlI9VuxyBgfmQyCmfE/view?usp=sharing - train data #https://drive.google.com/file/d/11INkjd_ajT-RSCSFqfB7reLI6_m1jCAC/view?usp=sharing - val data #https://drive.google.com/file/d/1m0EZaRjla2o_eL3hOd7UMkSwoME5mF4A/view?usp=sharing - extra val data cudnn.benchmark = True # train_data = DatasetFromHdf5('C:/Users/alawy/Desktop/Training/Training-shadesofgrey/train_tbands.h5') train_data = DatasetFromHdf5('/storage/train_cropped14.h5') print(len(train_data)) val_data_extra = DatasetFromHdf5('/storage/valid_extra99.h5') val_data = DatasetFromHdf5('/storage/valid_cropped89.h5') new_val=[] new_val.append(val_data) new_val.append(val_data_extra) print(len(new_val)) print('con') val_new = data.ConcatDataset(new_val) print(len(val_new)) # Data Loader (Input Pipeline) train_data_loader = DataLoader(dataset=train_data, num_workers=4, batch_size=512, shuffle=True, pin_memory=True) val_loader = DataLoader(dataset=val_new, num_workers=1, batch_size=1, shuffle=False, pin_memory=True) # Dataset # torch.set_num_threads(12) # Model model = resblock(conv_relu_res_relu_block, 16, 3, 25) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) if torch.cuda.is_available(): model = model.to('cuda') # Parameters, Loss and Optimizer start_epoch = 0 end_epoch = 1000 init_lr = 0.0002 iteration = 0 record_test_loss = 1000 criterion = rrmse_loss #optimizer=torch.optim.AdamW(model.parameters(), lr=init_lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) optimizer=torch.optim.Adam(model.parameters(), lr=init_lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01) # model_path = '/storage/models-crop/' model_path = './models-crop/' if not os.path.exists(model_path): os.makedirs(model_path) loss_csv = open(os.path.join(model_path,'loss.csv'), 'w+') log_dir = os.path.join(model_path,'train.log') logger = initialize_logger(log_dir) # Resume resume_file = '' #resume_file = '/storage/notebooks/r9h1kyhq8oth90j/models/hscnn_5layer_dim10_69.pkl' #resume_file = '/storage/notebooks/r9h1kyhq8oth90j/models-crop/hscnn_5layer_dim10_95.pkl' if resume_file: if os.path.isfile(resume_file): print("=> loading checkpoint '{}'".format(resume_file)) checkpoint = torch.load(resume_file) start_epoch = checkpoint['epoch'] iteration = checkpoint['iter'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) for epoch in range(start_epoch+1, end_epoch): start_time = time.time() train_loss, iteration, lr = train(train_data_loader, model, criterion, optimizer, iteration, init_lr, end_epoch) test_loss = validate(val_loader, model, criterion) # Save model if test_loss < record_test_loss: record_test_loss = test_loss save_checkpoint(model_path, epoch, iteration, model, optimizer) else: save_checkpoint(model_path, epoch, iteration, model, optimizer) # print loss end_time = time.time() epoch_time = end_time - start_time print ("Epoch [%d], Iter[%d], Time:%.9f, learning rate : %.9f, Train Loss: %.9f Test Loss: %.9f " %(epoch, iteration, epoch_time, lr, train_loss, test_loss)) # save loss record_loss(loss_csv,epoch, iteration, epoch_time, lr, train_loss, test_loss) logger.info("Epoch [%d], Iter[%d], Time:%.9f, learning rate : %.9f, Train Loss: %.9f Test Loss: %.9f " %(epoch, iteration, epoch_time, lr, train_loss, test_loss)) gc.collect()
def main(): cudnn.benchmark = True # load dataset print("\nloading dataset ...") train_data1 = HyperDatasetTrain1(mode='train') train_data2 = HyperDatasetTrain2(mode='train') train_data3 = HyperDatasetTrain3(mode='train') train_data4 = HyperDatasetTrain4(mode='train') print("Train1:%d,Train2:%d,Train3:%d,Train4:%d," % ( len(train_data1), len(train_data2), len(train_data3), len(train_data4), )) val_data = HyperDatasetValid(mode='valid') print("Validation set samples: ", len(val_data)) # Data Loader (Input Pipeline) train_loader1 = DataLoader(dataset=train_data1, batch_size=opt.batchSize, shuffle=True, num_workers=2, pin_memory=True, drop_last=True) train_loader2 = DataLoader(dataset=train_data2, batch_size=opt.batchSize, shuffle=True, num_workers=2, pin_memory=True, drop_last=True) train_loader3 = DataLoader(dataset=train_data3, batch_size=opt.batchSize, shuffle=True, num_workers=2, pin_memory=True, drop_last=True) train_loader4 = DataLoader(dataset=train_data4, batch_size=opt.batchSize, shuffle=True, num_workers=2, pin_memory=True, drop_last=True) train_loader = [train_loader1, train_loader2, train_loader3, train_loader4] val_loader = DataLoader(dataset=val_data, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) # model print("\nbuilding models_baseline ...") model = AWAN(3, 31, 200, 8) print('Parameters number is ', sum(param.numel() for param in model.parameters())) criterion_train = LossTrainCSS() criterion_valid = Loss_valid() if torch.cuda.device_count() > 1: model = nn.DataParallel(model) # batchsize integer times if torch.cuda.is_available(): model.cuda() criterion_train.cuda() criterion_valid.cuda() # Parameters, Loss and Optimizer start_epoch = 0 iteration = 0 record_val_loss = 1000 optimizer = optim.Adam(model.parameters(), lr=opt.init_lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # visualzation if not os.path.exists(opt.outf): os.makedirs(opt.outf) loss_csv = open(os.path.join(opt.outf, 'loss.csv'), 'a+') log_dir = os.path.join(opt.outf, 'train.log') logger = initialize_logger(log_dir) # Resume # resume_file = opt.outf + '/net_10epoch.pth' resume_file = '' if resume_file: if os.path.isfile(resume_file): print("=> loading checkpoint '{}'".format(resume_file)) checkpoint = torch.load(resume_file) start_epoch = checkpoint['epoch'] iteration = checkpoint['iter'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) # start epoch for epoch in range(start_epoch + 1, opt.end_epoch): start_time = time.time() train_loss, iteration, lr = train(train_loader, model, criterion_train, optimizer, epoch, iteration, opt.init_lr, opt.decay_power, opt.trade_off) val_loss = validate(val_loader, model, criterion_valid) # Save model if torch.abs(val_loss - record_val_loss) < 0.0001 or val_loss < record_val_loss: save_checkpoint(opt.outf, epoch, iteration, model, optimizer) if val_loss < record_val_loss: record_val_loss = val_loss # print loss end_time = time.time() epoch_time = end_time - start_time print( "Epoch [%02d], Iter[%06d], Time:%.9f, learning rate : %.9f, Train Loss: %.9f Test Loss: %.9f " % (epoch, iteration, epoch_time, lr, train_loss, val_loss)) # save loss record_loss(loss_csv, epoch, iteration, epoch_time, lr, train_loss, val_loss) logger.info( "Epoch [%02d], Iter[%06d], Time:%.9f, learning rate : %.9f, Train Loss: %.9f Test Loss: %.9f " % (epoch, iteration, epoch_time, lr, train_loss, val_loss))