class viewpoint_classifier(): def __init__(self, model,dataset_index=0,video_target = None): if args.video == None: self.video_target = video_target customset_train = CustomDataset(path = args.dataset_path,subset_type="training",dataset_index=dataset_index,video_target = video_target) customset_test = CustomDataset(path = args.dataset_path,subset_type="testing",dataset_index=dataset_index, video_target = video_target) self.trainloader = torch.utils.data.DataLoader(dataset=customset_train,batch_size=args.batch_size,shuffle=True,num_workers=args.num_workers) self.testloader = torch.utils.data.DataLoader(dataset=customset_test,batch_size=args.batch_size,shuffle=False,num_workers=args.num_workers) else: video_dataset = VideoDataset(video=args.video, batch_size=args.batch_size, frame_skip=int(args.frame_skip),image_folder=args.extract_frames_path, use_existing=args.use_existing_frames) self.videoloader = torch.utils.data.DataLoader(dataset=video_dataset, batch_size=1,shuffle=False,num_workers=args.num_workers) if (model == "alex"): self.model = AlexNet() elif (model == "vgg"): self.model = VGG() elif (model == "resnet"): self.model = ResNet() if args.pretrained_model != None: if args.pretrained_finetuning == False: self.model.load_state_dict(torch.load(args.pretrained_model)) else: print "DEBUG : Make it load only part of the resnet model" #print(self.model) #self.model.load_state_dict(torch.load(args.pretrained_model)) #for param in self.model.parameters(): # param.requires_grad = False self.model.fc = nn.Linear(512, 1000) #print(self.model) self.model.load_state_dict(torch.load(args.pretrained_model)) self.model.fc = nn.Linear(512,3) #print(self.model) self.model.cuda() print "Using weight decay: ",args.weight_decay self.optimizer = optim.SGD(self.model.parameters(), weight_decay=float(args.weight_decay),lr=0.01, momentum=0.9,nesterov=True) self.criterion = nn.CrossEntropyLoss().cuda()
config = parse_args() if config['model_config']['block_type'] == 'basic': model = ResNet(BasicModule, filter_map=[16, 32, 64], n=config['model_config']['depth'], option=config['model_config']['option']) else: model = ResNet(BottleNeckModule, [16, 32, 64], config['model_config']['depth'], config['model_config']['option']) optimizer = torch.optim.Adam( params=model.parameters(), lr=config['optim_config']['base_lr'], weight_decay=config['optim_config']['weight_decay']) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=config['optim_config']['milestones'], gamma=config['optim_config']['lr_decay']) def train(model, epochs, trainloader, testloader, device, criterion, optimizer, scheduler): model.train() start_time = time.time() train_losses = np.array([]) test_losses = np.array([])
def train(working_dir, grid_size, learning_rate, batch_size, num_walks, model_type, fn): train_props, val_props, test_props = get_props(working_dir, dtype=np.float32) means_stds = np.loadtxt(working_dir + "/means_stds.csv", dtype=np.float32, delimiter=',') # filter out redundant qm8 properties if train_props.shape[1] == 16: filtered_labels = list(range(0, 8)) + list(range(12, 16)) train_props = train_props[:, filtered_labels] val_props = val_props[:, filtered_labels] test_props = test_props[:, filtered_labels] means_stds = means_stds[:, filtered_labels] if model_type == "resnet18": model = ResNet(BasicBlock, [2, 2, 2, 2], grid_size, "regression", feat_nums, e_sizes, num_classes=train_props.shape[1]) elif model_type == "resnet34": model = ResNet(BasicBlock, [3, 4, 6, 3], grid_size, "regression", feat_nums, e_sizes, num_classes=train_props.shape[1]) elif model_type == "resnet50": model = ResNet(Bottleneck, [3, 4, 6, 3], grid_size, "regression", feat_nums, e_sizes, num_classes=train_props.shape[1]) elif model_type == "densenet121": model = densenet121(grid_size, "regression", feat_nums, e_sizes, num_classes=train_props.shape[1]) elif model_type == "densenet161": model = densenet161(grid_size, "regression", feat_nums, e_sizes, num_classes=train_props.shape[1]) elif model_type == "densenet169": model = densenet169(grid_size, "regression", feat_nums, e_sizes, num_classes=train_props.shape[1]) elif model_type == "densenet201": model = densenet201(grid_size, "regression", feat_nums, e_sizes, num_classes=train_props.shape[1]) else: print("specify a valid model") return model.float() model.cuda() loss_function_train = nn.MSELoss(reduction='none') loss_function_val = nn.L1Loss(reduction='none') optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # if model_type[0] == "r": # batch_size = 128 # optimizer = torch.optim.SGD(model.parameters(), lr=0.1, # momentum=0.9, weight_decay=5e-4, nesterov=True) # elif model_type[0] == "d": # batch_size = 512 # optimizer = torch.optim.SGD(model.parameters(), lr=0.1, # momentum=0.9, weight_decay=1e-4, nesterov=True) # else: # print("specify a vlid model") # return stds = means_stds[1, :] tl_list = [] vl_list = [] log_file = open(fn + "txt", "w") log_file.write("start") log_file.flush() for file_num in range(num_loads): if file_num % 20 == 0: model_file = open("../../scratch/" + fn + ".pkl", "wb") pickle.dump(model, model_file) model_file.close() log_file.write("load: " + str(file_num)) print("load: " + str(file_num)) # Get new random walks if file_num == 0: t = time.time() train_loader, val_loader, test_loader = get_loaders(working_dir, \ file_num, \ grid_size, \ batch_size, \ train_props, \ val_props=val_props, \ test_props=test_props) print("load time") print(time.time() - t) else: file_num = random.randint(0, num_walks - 1) t = time.time() train_loader, _, _ = get_loaders(working_dir, \ file_num, \ grid_size, \ batch_size, \ train_props) print("load time") print(time.time() - t) # Train on set of random walks, can do multiple epochs if desired for epoch in range(epochs_per_load): model.train() t = time.time() train_loss_list = [] train_mae_loss_list = [] for i, (walks_int, walks_float, props) in enumerate(train_loader): walks_int = walks_int.cuda() walks_int = walks_int.long() walks_float = walks_float.cuda() walks_float = walks_float.float() props = props.cuda() outputs = model(walks_int, walks_float) # Individual losses for each item loss_mae = torch.mean(loss_function_val(props, outputs), 0) train_mae_loss_list.append(loss_mae.cpu().detach().numpy()) loss = torch.mean(loss_function_train(props, outputs), 0) train_loss_list.append(loss.cpu().detach().numpy()) # Loss converted to single value for backpropagation loss = torch.sum(loss) optimizer.zero_grad() loss.backward() optimizer.step() model.eval() val_loss_list = [] with torch.no_grad(): for i, (walks_int, walks_float, props) in enumerate(val_loader): walks_int = walks_int.cuda() walks_int = walks_int.long() walks_float = walks_float.cuda() walks_float = walks_float.float() props = props.cuda() outputs = model(walks_int, walks_float) # Individual losses for each item loss = loss_function_val(props, outputs) val_loss_list.append(loss.cpu().detach().numpy()) # ith row of this array is the losses for each label in batch i train_loss_arr = np.array(train_loss_list) train_mae_arr = np.array(train_mae_loss_list) log_file.write("training mse loss\n") log_file.write(str(np.mean(train_loss_arr)) + "\n") log_file.write("training mae loss\n") log_file.write(str(np.mean(train_mae_arr)) + "\n") print("training mse loss") print(str(np.mean(train_loss_arr))) print("training mae loss") print(str(np.mean(train_mae_arr))) val_loss_arr = np.concatenate(val_loss_list, 0) val_loss = np.mean(val_loss_arr, 0) log_file.write("val loss\n") log_file.write(str(np.mean(val_loss_arr)) + "\n") print("val loss") print(str(np.mean(val_loss_arr))) # Unnormalized loss is for comparison to papers tnl = np.mean(train_mae_arr, 0) log_file.write("train normalized losses\n") log_file.write(" ".join(list(map(str, tnl))) + "\n") print("train normalized losses") print(" ".join(list(map(str, tnl)))) log_file.write("val normalized losses\n") log_file.write(" ".join(list(map(str, val_loss))) + "\n") print("val normalized losses") print(" ".join(list(map(str, val_loss)))) tunl = stds * tnl log_file.write("train unnormalized losses\n") log_file.write(" ".join(list(map(str, tunl))) + "\n") print("train unnormalized losses") print(" ".join(list(map(str, tunl)))) vunl = stds * val_loss log_file.write("val unnormalized losses\n") log_file.write(" ".join(list(map(str, vunl))) + "\n") log_file.write("\n") print("val unnormalized losses") print(" ".join(list(map(str, vunl)))) print("\n") print("time") print(time.time() - t) file_num += 1 log_file.flush() log_file.close() return model
def main(): global args, best_result, output_directory, train_csv, test_csv args = parser.parse_args() if args.modality == 'rgb' and args.num_samples != 0: print("number of samples is forced to be 0 when input modality is rgb") args.num_samples = 0 if args.modality == 'rgb' and args.max_depth != 0.0: print("max depth is forced to be 0.0 when input modality is rgb/rgbd") args.max_depth = 0.0 sparsifier = None max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf if args.sparsifier == UniformSampling.name: sparsifier = UniformSampling(num_samples=args.num_samples, max_depth=max_depth) elif args.sparsifier == SimulatedStereo.name: sparsifier = SimulatedStereo(num_samples=args.num_samples, max_depth=max_depth) # create results folder, if not already exists output_directory = os.path.join( '/media/kuowei/8EB89C8DB89C7585/results_CS', '{}'.format(args.outputdir), '{}.sparsifier={}.modality={}.arch={}.decoder={}.criterion={}.lr={}.bs={}' .format(args.data, sparsifier, args.modality, args.arch, args.decoder, args.criterion, args.lr, args.batch_size)) if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() out_channels = 1 # Data loading code print("=> creating data loaders ...") traindir = os.path.join('/media/kuowei/8EB89C8DB89C7585/data', args.data, 'train') valdir = os.path.join('/media/kuowei/8EB89C8DB89C7585/data', args.data, 'val') # traindir = os.path.join('data', args.data, 'train') # valdir = os.path.join('data', args.data, 'val') # if args.data == 'kitti': # pass # rgb_dir = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/train/rgb/' # sparse_depth_dir = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/train/sd/' # continuous_depth_dir = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/train/cd/' # ground_dir = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/train/ground/' # train_dataset = RgbdDataset(rgb_dir, sparse_depth_dir, continuous_depth_dir, ground_dir) # train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None)) # # set batch size to be 1 for validation # rgb_dir_val = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/validate/rgb/' # sparse_depth_dir_val = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/validate/sd/' # continuous_depth_dir_val = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/validate/cd/' # ground_dir_val = '/media/kuowei/c9cb78ce-3109-4880-adad-b628c4261d82/rgb/validate/ground/' # val_dataset = RgbdDataset(rgb_dir_val, sparse_depth_dir_val, continuous_depth_dir_val, ground_dir_val) # val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) # elif args.data == 'nyudepthv2': train_dataset = NYUDataset(traindir, type='train', modality=args.modality, sparsifier=sparsifier) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) # set batch size to be 1 for validation val_dataset = NYUDataset(valdir, type='val', modality=args.modality, sparsifier=sparsifier) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> data loaders created.") # evaluation mode if args.evaluate: best_model_filename = os.path.join(output_directory, 'model_best.pth.tar') if os.path.isfile(best_model_filename): print("=> loading best model '{}'".format(best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format( checkpoint['epoch'])) else: print("=> no best model found at '{}'".format(best_model_filename)) validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return # optionally resume from a checkpoint elif args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] print("=> loaded checkpoint (epoch {})".format( checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) return # create new model else: # define model print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) in_channels = len(args.modality) if args.arch == 'resnet50': model = ResNet(layers=50, decoder=args.decoder, in_channels=in_channels, out_channels=out_channels, pretrained=args.pretrained) elif args.arch == 'resnet18': model = ResNet(layers=18, decoder=args.decoder, in_channels=in_channels, out_channels=out_channels, pretrained=args.pretrained) elif args.arch == 'resnet152': model = ResNet(layers=152, decoder=args.decoder, in_channels=in_channels, out_channels=out_channels, pretrained=args.pretrained) print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # create new csv files with only header with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # model = torch.nn.DataParallel(model).cuda() model = model.cuda() print(model) print("=> model transferred to GPU.") for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set result, img_merge = validate(val_loader, model, epoch) # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch)
def train(name): record = pd.DataFrame(data=np.zeros((1, 4), dtype=np.float), columns=['precision', 'accuracy', 'recall', 'F1']) for _ in range(opt.runs): seed = random.randint(1, 10000) print("Random Seed: ", seed) torch.manual_seed(seed) # mkdirs for checkpoints output os.makedirs(opt.checkpoints_folder, exist_ok=True) os.makedirs('%s/%s' % (opt.checkpoints_folder, name), exist_ok=True) os.makedirs('report_metrics', exist_ok=True) root_dir = 'report_metrics/%s_aug_%s_IMBA/%s' % ( opt.model, str(opt.n_group), name) os.makedirs(root_dir, exist_ok=True) # 加载数据集 path = 'UCRArchive_2018/' + name + '/' + name + '_TRAIN.tsv' train_set, n_class = load_ucr(path) print('启用平衡数据增强!') stratified_train_set = stratify_by_label(train_set) data_aug_set = data_aug_by_dft(stratified_train_set, opt.n_group) total_set = np.concatenate((train_set, data_aug_set)) print('Shape of total set', total_set.shape) dataset = UcrDataset(total_set, channel_last=opt.channel_last) batch_size = int(min(len(dataset) / 10, 16)) dataloader = UCR_dataloader(dataset, batch_size) # Common behavior seq_len = dataset.get_seq_len() # 初始化序列长度 # 创建分类器对象\损失函数\优化器 if opt.model == 'r': net = ResNet(n_in=seq_len, n_classes=n_class).to(device) if opt.model == 'f': net = ConvNet(n_in=seq_len, n_classes=n_class).to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(net.parameters(), lr=opt.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=50, min_lr=0.0001) min_loss = 10000 print('############# Start to Train ###############') net.train() for epoch in range(opt.epochs): for i, (data, label) in enumerate(dataloader): data = data.float() data = data.to(device) label = label.long() label = label.to(device) optimizer.zero_grad() output = net(data) loss = criterion(output, label.view(label.size(0))) loss.backward() optimizer.step() scheduler.step(loss) # print('[%d/%d][%d/%d] Loss: %.8f ' % (epoch, opt.epochs, i + 1, len(dataloader), loss.item())) if loss < min_loss: min_loss = loss # End of the epoch,save model print('MinLoss: %.10f Saving the best epoch model.....' % min_loss) torch.save( net, '%s/%s/%s_%s_best_IMBA.pth' % (opt.checkpoints_folder, name, opt.model, str( opt.n_group))) net_path = '%s/%s/%s_%s_best_IMBA.pth' % (opt.checkpoints_folder, name, opt.model, str(opt.n_group)) one_record = eval_accuracy(net_path, name) print('The minimum loss is %.8f' % min_loss) record = record.append(one_record, ignore_index=True) record = record.drop(index=[0]) record.loc['mean'] = record.mean() record.loc['std'] = record.std() record.to_csv(root_dir + '/metrics.csv') # all_reprot_metrics.loc[name, 'acc_mean'] = record.at['mean', 'accuracy'] # all_reprot_metrics.loc[name, 'acc_std'] = record.at['std', 'accuracy'] # all_reprot_metrics.loc[name, 'F1_mean'] = record.at['mean', 'F1'] # all_reprot_metrics.loc[name, 'F1_std'] = record.at['std', 'F1'] print('\n')
def train(working_dir, grid_size, learning_rate, batch_size, num_cores): process = psutil.Process(os.getpid()) print(process.memory_info().rss / 1024 / 1024 / 1024) train_feat_dict = get_feat_dict(working_dir + "/train_smiles.csv") val_feat_dict = get_feat_dict(working_dir + "/val_smiles.csv") test_feat_dict = get_feat_dict(working_dir + "/test_smiles.csv") # There are about 0.08 gb process = psutil.Process(os.getpid()) print("pre model") print(process.memory_info().rss / 1024 / 1024 / 1024) torch.set_default_dtype(torch.float64) train_props, val_props, test_props = get_props(working_dir, dtype=int) print("pre model post props") print(process.memory_info().rss / 1024 / 1024 / 1024) model = ResNet(BasicBlock, [2, 2, 2, 2], grid_size, "classification", feat_nums, e_sizes, num_classes=train_props.shape[1]) model.float() model.cuda() print("model params") pytorch_total_params = sum(p.numel() for p in model.parameters()) print(pytorch_total_params) model.cpu() print("model") print(process.memory_info().rss / 1024 / 1024 / 1024) loss_function = masked_cross_entropy optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) tl_list = [] vl_list = [] tmra_list = [] vmra_list = [] for file_num in range(num_loads): # Get new random walks if file_num == 0: print("before get_loaders") process = psutil.Process(os.getpid()) print(process.memory_info().rss / 1024 / 1024 / 1024) train_loader, val_loader, test_loader = get_loaders(num_cores, \ working_dir, \ file_num, \ grid_size, \ batch_size, \ train_props, \ train_feat_dict, \ val_props=val_props, \ val_feat_dict=val_feat_dict, \ test_props=test_props, \ test_feat_dict=test_feat_dict) else: print("before get_loaders 2") process = psutil.Process(os.getpid()) print(process.memory_info().rss / 1024 / 1024 / 1024) train_loader, _, _ = get_loaders(num_cores, \ working_dir, \ file_num, \ grid_size, \ batch_size, \ train_props, \ train_feat_dict) # Train on a single set of random walks, can do multiple epochs if desired for epoch in range(epochs_per_load): model.train() model.cuda() t = time.time() train_loss_list = [] props_list = [] outputs_list = [] # change for i, (walks_int, walks_float, props) in enumerate(train_loader): walks_int = walks_int.cuda() walks_int = walks_int.long() walks_float = walks_float.cuda() walks_float = walks_float.float() props = props.cuda() props = props.long() props_list.append(props) outputs = model(walks_int, walks_float) outputs_list.append(outputs) loss = loss_function(props, outputs) train_loss_list.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() props = torch.cat(props_list, 0) props = props.cpu().numpy() outputs = torch.cat(outputs_list, 0) outputs = outputs.detach().cpu().numpy() # Get train rocauc value train_rocaucs = [] for i in range(props.shape[1]): mask = props[:, i] != 2 train_rocauc = roc_auc_score(props[mask, i], outputs[mask, i]) train_rocaucs.append(train_rocauc) model.eval() with torch.no_grad(): ds = val_loader.dataset walks_int = ds.int_feat_tensor walks_float = ds.float_feat_tensor props = ds.prop_tensor walks_int = walks_int.cuda() walks_int = walks_int.long() walks_float = walks_float.cuda() walks_float = walks_float.float() props = props.cuda() outputs = model(walks_int, walks_float) loss = loss_function(props, outputs) props = props.cpu().numpy() outputs = outputs.cpu().numpy() val_rocaucs = [] for i in range(props.shape[1]): mask = props[:, i] != 2 val_rocauc = roc_auc_score(props[mask, i], outputs[mask, i]) val_rocaucs.append(val_rocauc) print("load: " + str(file_num) + ", epochs: " + str(epoch)) print("training loss") # Slightly approximate since last batch can be smaller... tl = statistics.mean(train_loss_list) print(tl) print("val loss") vl = loss.item() print(vl) print("train mean roc auc") tmra = sum(train_rocaucs) / len(train_rocaucs) print(tmra) print("val mean roc auc") vmra = sum(val_rocaucs) / len(val_rocaucs) print(vmra) print("time") print(time.time() - t) tl_list.append(tl) vl_list.append(vl) tmra_list.append(tmra) vmra_list.append(vmra) model.cpu() file_num += 1 del train_loader save_plot(tl_list, vl_list, tmra_list, vmra_list) return model
def main(): # load table data df_train = pd.read_csv("../input/train_curated.csv") df_noisy = pd.read_csv("../input/train_noisy.csv") df_test = pd.read_csv("../input/sample_submission.csv") labels = df_test.columns[1:].tolist() for label in labels: df_train[label] = df_train['labels'].apply(lambda x: label in x) df_noisy[label] = df_noisy['labels'].apply(lambda x: label in x) df_train['path'] = "../input/mel128/train/" + df_train['fname'] df_test['path'] = "../input/mel128/test/" + df_train['fname'] df_noisy['path'] = "../input/mel128/noisy/" + df_noisy['fname'] # fold splitting folds = list(KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_train)))) # Training log_columns = ['epoch', 'bce', 'lwlrap', 'bce_noisy', 'lwlrap_noisy', 'val_bce', 'val_lwlrap', 'time'] for fold, (ids_train_split, ids_valid_split) in enumerate(folds): if fold+1 not in FOLD_LIST: continue print("fold: {}".format(fold + 1)) train_log = pd.DataFrame(columns=log_columns) # build model model = ResNet(NUM_CLASS).cuda() # prepare data loaders df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True) dataset_train = MelDataset(df_train_fold['path'], df_train_fold[labels].values, crop=CROP_LENGTH, crop_mode='random', mixup=True, freqmask=True, gain=True, ) train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) df_valid = df_train.iloc[ids_valid_split].reset_index(drop=True) dataset_valid = MelDataset(df_valid['path'], df_valid[labels].values,) valid_loader = DataLoader(dataset_valid, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, ) dataset_noisy = MelDataset(df_noisy['path'], df_noisy[labels].values, crop=CROP_LENGTH, crop_mode='random', mixup=True, freqmask=True, gain=True, ) noisy_loader = DataLoader(dataset_noisy, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) noisy_itr = cycle(noisy_loader) # set optimizer and loss optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR[0]) scheduler = CosineLR(optimizer, step_size_min=LR[1], t0=len(train_loader) * NUM_CYCLE, tmult=1) # training for epoch in range(NUM_EPOCH): # train for one epoch bce, lwlrap, bce_noisy, lwlrap_noisy = train((train_loader, noisy_itr), model, optimizer, scheduler, epoch) # evaluate on validation set val_bce, val_lwlrap = validate(valid_loader, model) # print log endtime = time.time() - starttime print("Epoch: {}/{} ".format(epoch + 1, NUM_EPOCH) + "CE: {:.4f} ".format(bce) + "LwLRAP: {:.4f} ".format(lwlrap) + "Noisy CE: {:.4f} ".format(bce_noisy) + "Noisy LWLRAP: {:.4f} ".format(lwlrap_noisy) + "Valid CE: {:.4f} ".format(val_bce) + "Valid LWLRAP: {:.4f} ".format(val_lwlrap) + "sec: {:.1f}".format(endtime) ) # save log and weights train_log_epoch = pd.DataFrame( [[epoch+1, bce, lwlrap, bce_noisy, lwlrap_noisy, val_bce, val_lwlrap, endtime]], columns=log_columns) train_log = pd.concat([train_log, train_log_epoch]) train_log.to_csv("{}/train_log_fold{}.csv".format(OUTPUT_DIR, fold+1), index=False) if (epoch+1)%NUM_CYCLE==0: torch.save(model.state_dict(), "{}/weight_fold_{}_epoch_{}.pth".format(OUTPUT_DIR, fold+1, epoch+1))
def main(): global args, best_result, output_directory, train_csv, test_csv # evaluation mode start_epoch = 0 if args.evaluate: assert os.path.isfile(args.evaluate), \ "=> no best model found at '{}'".format(args.evaluate) print("=> loading best model '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) output_directory = os.path.dirname(args.evaluate) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) _, val_loader = create_data_loaders(args) args.evaluate = True validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return elif args.crossTrain: print("Retraining loaded model on current input parameters") train_loader, val_loader = create_data_loaders(args) checkpoint = torch.load(args.crossTrain) model = checkpoint['model'] optimizer = torch.optim.SGD(model.parameters(), args.lr, \ momentum=args.momentum, weight_decay=args.weight_decay) model = model.cuda() # optionally resume from a checkpoint elif args.resume: chkpt_path = args.resume assert os.path.isfile(chkpt_path), \ "=> no checkpoint found at '{}'".format(chkpt_path) print("=> loading checkpoint '{}'".format(chkpt_path)) checkpoint = torch.load(chkpt_path) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] output_directory = os.path.dirname(os.path.abspath(chkpt_path)) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) train_loader, val_loader = create_data_loaders(args) args.resume = True # create new model else: train_loader, val_loader = create_data_loaders(args) print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) in_channels = len(args.modality) if args.arch == 'resnet50': model = ResNet(layers=50, decoder=args.decoder, output_size=train_loader.dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet18': model = ResNet(layers=18, decoder=args.decoder, output_size=train_loader.dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, \ momentum=args.momentum, weight_decay=args.weight_decay) # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training model = model.cuda() # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # create new csv files with only header if not args.resume: with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for epoch in range(start_epoch, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) train(train_loader, model, criterion, optimizer, epoch) # train for one epoch result, img_merge = validate(val_loader, model, epoch) # evaluate on validation set # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory)
def train(k, epochs): model = ResNet(k=k) opt = torch.optim.Adam(model.parameters(), lr=1e-4) criterion = nn.CrossEntropyLoss() if use_gpu: model.to('cuda') if use_horovod: # broadcast parameters and optimizer state from root device to other devices hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(opt, root_rank=0) # Wraps the opimizer for multiGPU operation opt = hvd.DistributedOptimizer( opt, named_parameters=model.named_parameters(), op=hvd.Adasum) loss_dict = {'epoch': [], 'train': [], 'val': []} for epoch in range(epochs): train_loss = 0 val_loss = 0 # train block for img_batch, labels_batch in train_loader: if use_gpu: img_batch = img_batch.to('cuda') labels_batch = labels_batch.to('cuda') pred = model(img_batch) opt.zero_grad() loss = criterion(pred, labels_batch) loss.backward() opt.step() train_loss += loss.item() #val block with torch.no_grad(): for img_batch, labels_batch in val_loader: if use_gpu: img_batch = img_batch.to('cuda') labels_batch = labels_batch.to('cuda') pred = model(img_batch) loss = criterion(pred, labels_batch) val_loss += loss.item() if use_horovod: train_loss = average_loss(train_loss, 'avg_train_loss') val_loss = average_loss(val_loss, 'avg_val_loss') loss_dict['epoch'].append(epoch + 1) loss_dict['train'].append(train_loss) loss_dict['val'].append(val_loss) print(",".join([ "{}:{:.2f}".format(key, val[epoch]) for key, val in loss_dict.items() ])) torch.save(model.state_dict(), "models/modelsdata/ResNet18_Cifar10_d{}.ckpt".format(k)) save_obj(loss_dict, "models/modelsdata/losses/ResNet18_Cifar10_d{}".format(k)) return loss_dict
def main(): if not sys.warnoptions: warnings.simplefilter("ignore") # --- hyper parameters --- # BATCH_SIZE = 256 LR = 1e-3 WEIGHT_DECAY = 1e-4 N_layer = 18 device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # --- data process --- # # info src_path = './data/' target_path = './saved/ResNet18/' model_path = target_path + 'pkls/' pred_path = target_path + 'preds/' if not os.path.exists(model_path): os.makedirs(model_path) if not os.path.exists(pred_path): os.makedirs(pred_path) # evaluation: num of classify labels & image size # output testing id csv label2num_dict, num2label_dict = data_evaluation(src_path) # load train_data = dataLoader(src_path, 'train', label2num_dict) train_len = len(train_data) test_data = dataLoader(src_path, 'test') train_loader = Data.DataLoader( dataset=train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=12, ) test_loader = Data.DataLoader( dataset=test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=12, ) # --- model training --- # # fp: for storing data fp_train_acc = open(target_path + 'train_acc.txt', 'w') fp_time = open(target_path + 'time.txt', 'w') # train highest_acc, train_acc_seq = 0, [] loss_funct = nn.CrossEntropyLoss() net = ResNet(N_layer).to(device) optimizer = torch.optim.Adam(net.parameters(), lr=LR, weight_decay=WEIGHT_DECAY) print(net) for epoch_i in count(1): right_count = 0 # print('\nTraining epoch {}...'.format(epoch_i)) # for batch_x, batch_y in tqdm(train_loader): for batch_x, batch_y in train_loader: batch_x = batch_x.to(device) batch_y = batch_y.to(device) # clear gradient optimizer.zero_grad() # forward & backward output = net.forward(batch_x.float()) highest_out = torch.max(output, 1)[1] right_count += sum(batch_y == highest_out).item() loss = loss_funct(output, batch_y) loss.backward() # update parameters optimizer.step() # calculate accuracy train_acc = right_count / train_len train_acc_seq.append(train_acc * 100) if train_acc > highest_acc: highest_acc = train_acc # save model torch.save( net.state_dict(), '{}{}_{}_{}.pkl'.format(model_path, target_path.split('/')[2], round(train_acc * 1000), epoch_i)) # write data fp_train_acc.write(str(train_acc * 100) + '\n') fp_time.write( str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + '\n') print('\n{} Epoch {}, Training accuracy: {}'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), epoch_i, train_acc)) # test net.eval() test_df = pd.read_csv(src_path + 'testing_data/testing_labels.csv') with torch.no_grad(): for i, (batch_x, _) in enumerate(test_loader): batch_x = batch_x.to(device) output = net.forward(batch_x.float()) highest_out = torch.max(output, 1)[1].cpu() labels = [ num2label_dict[out_j.item()] for out_j in highest_out ] test_df['label'].iloc[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] = labels test_df.to_csv('{}{}_{}_{}.csv'.format(pred_path, target_path.split('/')[2], round(train_acc * 1000), epoch_i), index=False) net.train() lr_decay(optimizer) fp_train_acc.close() fp_time.close()
print("data size: {} for training".format(len(trainloader.dataset))) print("data size: {} for testing".format(len(testloader.dataset))) # class classes = {0: 'CDRom', 1: 'HardDrive', 2: 'PowerSupply'} if MobileNet: print("BackBone: MobileNetV2") net = MobileNetV2(num_classes=3).cuda() net._modules.get('features')[-1].register_forward_hook(hook_feature) else: print("BackBone: ResNet18") net = ResNet(num_classes=3).cuda() net._modules.get('features')[-2].register_forward_hook(hook_feature) optimizer = torch.optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=5e-4) # load checkpoint if RESUME: # epoch38-acc99.24812316894531-1586176538.pt print("===> Resuming from checkpoint.") assert os.path.isfile('checkpoint/epoch50-acc99.24812316894531-1586534447.pt'), 'Error: no checkpoint found!' net.load_state_dict(torch.load('checkpoint/epoch50-acc99.24812316894531-1586534447.pt')) criterion = TCLoss(3) # test and generate CAM video if EPOCH == 0: test(testloader, net, USE_CUDA, criterion, 0) for epoch in range (1, EPOCH + 1):
def main(): # load table data df_train = pd.read_csv("../input/train_curated.csv") df_noisy = pd.read_csv("../input/train_noisy.csv") df_test = pd.read_csv("../input/sample_submission.csv") labels = df_test.columns[1:].tolist() for label in labels: df_train[label] = df_train['labels'].apply(lambda x: label in x) df_noisy[label] = df_noisy['labels'].apply(lambda x: label in x) df_train['path'] = "../input/mel128/train/" + df_train['fname'] df_test['path'] = "../input/mel128/test/" + df_train['fname'] df_noisy['path'] = "../input/mel128/noisy/" + df_noisy['fname'] # calc sampling weight df_train['weight'] = 1 df_noisy['weight'] = len(df_train) / len(df_noisy) # generate pseudo label with sharpening tmp = np.load("../input/pseudo_label/preds_noisy.npy").mean(axis=(0, 1)) tmp = tmp**TEMPERATURE tmp = tmp / tmp.sum(axis=1)[:, np.newaxis] df_noisy_pseudo = df_noisy.copy() df_noisy_pseudo[labels] = tmp # fold splitting folds = list( KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_train)))) folds_noisy = list( KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_noisy)))) # Training log_columns = [ 'epoch', 'bce', 'lwlrap', 'bce_noisy', 'lwlrap_noisy', 'semi_mse', 'val_bce', 'val_lwlrap', 'time' ] for fold, (ids_train_split, ids_valid_split) in enumerate(folds): if fold + 1 not in FOLD_LIST: continue print("fold: {}".format(fold + 1)) train_log = pd.DataFrame(columns=log_columns) # build model model = ResNet(NUM_CLASS).cuda() model.load_state_dict( torch.load("{}/weight_fold_{}_epoch_512.pth".format( LOAD_DIR, fold + 1))) # prepare data loaders df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True) dataset_train = MelDataset( df_train_fold['path'], df_train_fold[labels].values, crop=CROP_LENGTH, crop_mode='additional', crop_rate=CROP_RATE, mixup=True, freqmask=True, gain=True, ) train_loader = DataLoader( dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) df_valid = df_train.iloc[ids_valid_split].reset_index(drop=True) dataset_valid = MelDataset( df_valid['path'], df_valid[labels].values, ) valid_loader = DataLoader( dataset_valid, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, ) dataset_noisy = MelDataset( df_noisy['path'], df_noisy[labels].values, crop=CROP_LENGTH, crop_mode='additional', crop_rate=CROP_RATE, mixup=True, freqmask=True, gain=True, ) noisy_loader = DataLoader( dataset_noisy, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) noisy_itr = cycle(noisy_loader) df_semi = pd.concat([ df_train.iloc[ids_train_split], df_noisy_pseudo.iloc[folds_noisy[fold][0]] ]).reset_index(drop=True) semi_sampler = torch.utils.data.sampler.WeightedRandomSampler( df_semi['weight'].values, len(df_semi)) dataset_semi = MelDataset( df_semi['path'], df_semi[labels].values, crop=CROP_LENGTH, crop_mode='additional', crop_rate=CROP_RATE, mixup=True, freqmask=True, gain=True, ) semi_loader = DataLoader( dataset_semi, batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=True, sampler=semi_sampler, ) semi_itr = cycle(semi_loader) # set optimizer and loss optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR[0]) scheduler = CosineLR(optimizer, step_size_min=LR[1], t0=len(train_loader) * NUM_CYCLE, tmult=1) # training for epoch in range(NUM_EPOCH): # train for one epoch bce, lwlrap, bce_noisy, lwlrap_noisy, mse_semi = train( (train_loader, noisy_itr, semi_itr), model, optimizer, scheduler, epoch) # evaluate on validation set val_bce, val_lwlrap = validate(valid_loader, model) # print log endtime = time.time() - starttime print("Epoch: {}/{} ".format(epoch + 1, NUM_EPOCH) + "CE: {:.4f} ".format(bce) + "LwLRAP: {:.4f} ".format(lwlrap) + "Noisy CE: {:.4f} ".format(bce_noisy) + "Noisy LWLRAP: {:.4f} ".format(lwlrap_noisy) + "Semi MSE: {:.4f} ".format(mse_semi) + "Valid CE: {:.4f} ".format(val_bce) + "Valid LWLRAP: {:.4f} ".format(val_lwlrap) + "sec: {:.1f}".format(endtime)) # save log and weights train_log_epoch = pd.DataFrame([[ epoch + 1, bce, lwlrap, bce_noisy, lwlrap_noisy, mse_semi, val_bce, val_lwlrap, endtime ]], columns=log_columns) train_log = pd.concat([train_log, train_log_epoch]) train_log.to_csv("{}/train_log_fold{}.csv".format( OUTPUT_DIR, fold + 1), index=False) if (epoch + 1) % NUM_CYCLE == 0: torch.save( model.state_dict(), "{}/weight_fold_{}_epoch_{}.pth".format( OUTPUT_DIR, fold + 1, epoch + 1))
def main(): global args, best_result, output_directory, train_csv, test_csv # 全局变量 args = parser.parse_args() # 获取参数值 args.data = os.path.join('data', args.data) # os.path.join()函数:将多个路径组合后返回 # 语法:os.path.join(path1[,path2[,......]]) # 注:第一个绝对路径之前的参数将被忽略 # 注意if的语句后面有冒号 # args中modality的参数值。modality之前定义过 if args.modality == 'rgb' and args.num_samples != 0: print("number of samples is forced to be 0 when input modality is rgb") args.num_samples = 0 # 若是RGB的sparse-to-dense,则在生成训练数据时将稀疏深度点设为0 # create results folder, if not already exists output_directory = os.path.join( 'results', 'NYUDataset.modality={}.nsample={}.arch={}.decoder={}.criterion={}.lr={}.bs={}' .format(args.modality, args.num_samples, args.arch, args.decoder, args.criterion, args.lr, args.batch_size)) # 输出文件名的格式 # 如果路径不存在 if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda( ) # 调用别的py文件中的内容时,若被调用的是函数,则直接写函数名即可;若被调用的是类,则要按这句话的格式写 out_channels = 1 # elif: else if elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() out_channels = 1 # Data loading code print("=> creating data loaders ...") traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') train_dataset = NYUDataset(traindir, type='train', modality=args.modality, num_samples=args.num_samples) # DataLoader是导入数据的函数 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) # set batch size to be 1 for validation val_dataset = NYUDataset(valdir, type='val', modality=args.modality, num_samples=args.num_samples) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> data loaders created.") # evaluation mode if args.evaluate: best_model_filename = os.path.join(output_directory, 'model_best.pth.tar') if os.path.isfile(best_model_filename): print("=> loading best model '{}'".format(best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format( checkpoint['epoch'])) else: # else也要加: print("=> no best model found at '{}'".format(best_model_filename)) validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return # optionally resume from a checkpoint elif args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] print("=> loaded checkpoint (epoch {})".format( checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # create new model else: # define model print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) in_channels = len(args.modality) # len()返回对象的长度或项目个数 if args.arch == 'resnet50': model = ResNet(layers=50, decoder=args.decoder, in_channels=in_channels, out_channels=out_channels, pretrained=args.pretrained) elif args.arch == 'resnet18': model = ResNet(layers=18, decoder=args.decoder, in_channels=in_channels, out_channels=out_channels, pretrained=args.pretrained) print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # create new csv files with only header # with open() as xxx: 的用法详见https://www.cnblogs.com/ymjyqsx/p/6554817.html with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # model = torch.nn.DataParallel(model).cuda() model = model.cuda() print(model) print("=> model transferred to GPU.") # for循环也要有: # 一般情况下,循环次数未知采用while循环,循环次数已知采用for for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set result, img_merge = validate(val_loader, model, epoch) # Python的return可以返回多个值 # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: # 字符串格式化输出 # :3f中,3表示输出宽度,f表示浮点型。若输出位数小于此宽度,则默认右对齐,左边补空格。 # 若输出位数大于宽度,则按实际位数输出。 # :.3f中,.3表示指定除小数点外的输出位数,f表示浮点型。 txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) # None表示该值是一个空对象,空值是Python里一个特殊的值,用None表示。None不能理解为0,因为0是有意义的,而None是一个特殊的空值。 # 你可以将None赋值给任何变量,也可以将任何变量赋值给一个None值的对象 # None在判断的时候是False # NULL是空字符,和None不一样 if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) # Python中,万物皆对象,所有的操作都是针对对象的。一个对象包括两方面的特征: # 属性:去描述它的特征 # 方法:它所具有的行为 # 所以,对象=属性+方法 (其实方法也是一种属性,一种区别于数据属性的可调用属性) save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch)
def main() -> int: best_result = Result() best_result.set_to_worst() args: Any args = parser.parse_args() dataset = args.data if args.modality == 'rgb' and args.num_samples != 0: print("number of samples is forced to be 0 when input modality is rgb") args.num_samples = 0 image_shape = (192, 256) # if "my" in args.arch else (228, 304) # create results folder, if not already exists if args.transfer_from: output_directory = f"{args.transfer_from}_transfer" else: output_directory = utils.get_output_dir(args) args.data = os.path.join(os.environ["DATASET_DIR"], args.data) print("output directory :", output_directory) if not os.path.exists(output_directory): os.makedirs(output_directory) elif not args.evaluate: raise Exception("output directory allready exists") train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() out_channels = 1 # Data loading code print("=> creating data loaders ...") traindir = os.path.join(args.data, 'train') valdir = traindir if dataset == "SUNRGBD" else os.path.join( args.data, 'val') DatasetType = choose_dataset_type(dataset) train_dataset = DatasetType(traindir, phase='train', modality=args.modality, num_samples=args.num_samples, square_width=args.square_width, output_shape=image_shape, depth_type=args.depth_type) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) print("=> training examples:", len(train_dataset)) val_dataset = DatasetType(valdir, phase='val', modality=args.modality, num_samples=args.num_samples, square_width=args.square_width, output_shape=image_shape, depth_type=args.depth_type) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> validation examples:", len(val_dataset)) print("=> data loaders created.") # evaluation mode if args.evaluate: best_model_filename = os.path.join(output_directory, 'model_best.pth.tar') if os.path.isfile(best_model_filename): print("=> loading best model '{}'".format(best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format( checkpoint['epoch'])) else: print("=> no best model found at '{}'".format(best_model_filename)) avg_result, avg_result_inside, avg_result_outside, _, results, evaluator = validate( val_loader, args.square_width, args.modality, output_directory, args.print_freq, test_csv, model, checkpoint['epoch'], write_to_file=False) write_results(best_txt, avg_result, avg_result_inside, avg_result_outside, checkpoint['epoch']) for loss_name, losses in [ ("rmses", (res.result.rmse for res in results)), ("delta1s", (res.result.delta1 for res in results)), ("delta2s", (res.result.delta2 for res in results)), ("delta3s", (res.result.delta3 for res in results)), ("maes", (res.result.mae for res in results)), ("absrels", (res.result.absrel for res in results)), ("rmses_inside", (res.result_inside.rmse for res in results)), ("delta1s_inside", (res.result_inside.delta1 for res in results)), ("delta2s_inside", (res.result_inside.delta2 for res in results)), ("delta3s_inside", (res.result_inside.delta3 for res in results)), ("maes_inside", (res.result_inside.mae for res in results)), ("absrels_inside", (res.result_inside.absrel for res in results)), ("rmses_outside", (res.result_outside.rmse for res in results)), ("delta1s_outside", (res.result_outside.delta1 for res in results)), ("delta2s_outside", (res.result_outside.delta2 for res in results)), ("delta3s_outside", (res.result_outside.delta3 for res in results)), ("maes_outside", (res.result_outside.mae for res in results)), ("absrels_outside", (res.result_outside.absrel for res in results)), ]: with open( os.path.join(output_directory, f"validation_{loss_name}.csv"), "w") as csv_file: wr = csv.writer(csv_file, quoting=csv.QUOTE_ALL) wr.writerow(losses) evaluator.save_plot(os.path.join(output_directory, "best.png")) return 0 # optionally resume from a checkpoint elif args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] print("=> loaded checkpoint (epoch {})".format( checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) return 1 # create new model else: if args.transfer_from: if os.path.isfile(args.transfer_from): print(f"=> loading checkpoint '{args.transfer_from}'") checkpoint = torch.load(args.transfer_from) args.start_epoch = 0 model = checkpoint['model'] print("=> loaded checkpoint") train_params = list(model.conv3.parameters()) + list( model.decoder.layer4.parameters( )) if args.train_top_only else model.parameters() else: print(f"=> no checkpoint found at '{args.transfer_from}'") return 1 else: # define model print("=> creating Model ({}-{}) ...".format( args.arch, args.decoder)) in_channels = len(args.modality) if args.arch == 'resnet50': n_layers = 50 elif args.arch == 'resnet18': n_layers = 18 model = ResNet(layers=n_layers, decoder=args.decoder, in_channels=in_channels, out_channels=out_channels, pretrained=args.pretrained, image_shape=image_shape, skip_type=args.skip_type) print("=> model created.") train_params = model.parameters() adjusting_learning_rate = False if args.optimizer == "sgd": optimizer = torch.optim.SGD(train_params, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) adjusting_learning_rate = True elif args.optimizer == "adam": optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) else: raise Exception("We should never be here") if adjusting_learning_rate: print("=> Learning rate adjustment enabled.") scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=args.adjust_lr_ep, verbose=True) # create new csv files with only header with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # model = torch.nn.DataParallel(model).cuda() model = model.cuda() print(model) print("=> model transferred to GPU.") epochs_since_best = 0 train_results = [] val_results = [] for epoch in range(args.start_epoch, args.epochs): # train for one epoch res_train, res_train_inside, res_train_outside = train( train_loader, model, criterion, optimizer, epoch, args.print_freq, train_csv) train_results.append((res_train, res_train_inside, res_train_outside)) # evaluate on validation set res_val, res_val_inside, res_val_outside, img_merge, _, _ = validate( val_loader, args.square_width, args.modality, output_directory, args.print_freq, test_csv, model, epoch, True) val_results.append((res_val, res_val_inside, res_val_outside)) # remember best rmse and save checkpoint is_best = res_val.rmse < best_result.rmse if is_best: epochs_since_best = 0 best_result = res_val write_results(best_txt, res_val, res_val_inside, res_val_outside, epoch) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) else: epochs_since_best += 1 save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory) plot_progress(train_results, val_results, epoch, output_directory) if epochs_since_best > args.early_stop_epochs: print("early stopping") if adjusting_learning_rate: scheduler.step(res_val.rmse) return 0
def main(): global args, best_result, output_directory, train_csv, test_csv # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) #建立文件 train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # define loss function (criterion) and optimizer,定义误差函数和优化器 if args.criterion == 'l2': #均方差 criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() # sparsifier is a class for generating random sparse depth input from the ground truth sparsifier = None max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf if args.sparsifier == UniformSampling.name: sparsifier = UniformSampling(num_samples=args.num_samples, max_depth=max_depth) elif args.sparsifier == SimulatedStereo.name: sparsifier = SimulatedStereo(num_samples=args.num_samples, max_depth=max_depth) # Data loading code print("=> creating data loaders ...") traindir = os.path.join('data', args.data, 'train') valdir = os.path.join('data', args.data, 'val') if args.data == 'nyudepthv2': #需要的时候才把函数载入 from dataloaders.nyu_dataloader import NYUDataset train_dataset = NYUDataset(traindir, type='train', modality=args.modality, sparsifier=sparsifier) val_dataset = NYUDataset(valdir, type='val', modality=args.modality, sparsifier=sparsifier) elif args.data == 'kitti': from dataloaders.kitti_dataloader import KITTIDataset train_dataset = KITTIDataset(traindir, type='train', modality=args.modality, sparsifier=sparsifier) val_dataset = KITTIDataset(valdir, type='val', modality=args.modality, sparsifier=sparsifier) else: raise RuntimeError( 'Dataset not found.' + 'The dataset must be either of nyudepthv2 or kitti.') train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None, worker_init_fn=lambda work_id: np.random.seed(work_id)) # worker_init_fn ensures different sampling patterns for each data loading thread # set batch size to be 1 for validation val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> data loaders created.") # evaluation mode,测试模式,拿最好的效果进行测试 if args.evaluate: best_model_filename = os.path.join(output_directory, 'model_best.pth.tar') assert os.path.isfile(best_model_filename), \ "=> no best model found at '{}'".format(best_model_filename) print("=> loading best model '{}'".format(best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return # optionally resume from a checkpoint elif args.resume: assert os.path.isfile(args.resume), \ "=> no checkpoint found at '{}'".format(args.resume) print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) # create new model,建立模型,并且训练 else: # define model print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) in_channels = len( args.modality) #in_channels是modality的长度,如果输入rgbd那么就是4通道。 #这一边只提供了两个选择50或者18 if args.arch == 'resnet50': #调用ResNet的定义实例化model,这里的in_channels是 model = ResNet(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet18': model = ResNet(layers=18, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, \ momentum=args.momentum, weight_decay=args.weight_decay) # create new csv files with only header with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training model = model.cuda() # print(model) print("=> model transferred to GPU.") for epoch in range(args.start_epoch, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) train(train_loader, model, criterion, optimizer, epoch) # train for one epoch result, img_merge = validate( val_loader, model, epoch) # evaluate on validation set,每次训练完以后都要测试一下 # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory)
class viewpoint_classifier(): def weighted_sampling(self,dataset_index=0,path=None): if not os.path.isfile("./results/intermediate_data/sampling_weights_two_viewpoints.p"): customset_preprocess = CustomDatasetViewpoint(path = args.dataset_path,subset_type="training",dataset_index=dataset_index, retrieve_images=False) self.processloader = torch.utils.data.DataLoader(dataset=customset_preprocess,batch_size=int(1),shuffle=False,num_workers=int(args.num_workers)) sample_views = [] # when you start for batch_idx, (imgs, label) in enumerate(self.processloader): sample_views.append(label.numpy()[0][0]) class_presence = [0, 0] for view in sample_views: class_presence[view] += 1 for i in range(2): class_presence[i] /= len(sample_views)*1.0 class_weights = [0 for i in range(len(sample_views))] for i in range(len(sample_views)): class_weights[i] = 1.0/class_presence[sample_views[i]] m = 2*len(sample_views) class_weights = [i/m for i in class_weights] # Finished with sampler weighting sampler = torch.utils.data.sampler.WeightedRandomSampler(class_weights,len(self.processloader),replacement=True) pickle.dump(sampler,open("./results/intermediate_data/sampling_weights_two_viewpoints.p","wb")) else: sampler = pickle.load(open("./results/intermediate_data/sampling_weights_two_viewpoints.p","rb")) return sampler def __init__(self, model,dataset_index=0, path = None): self.sampler = self.weighted_sampling(dataset_index=dataset_index,path=path) customset_train = CustomDatasetViewpoint(path = path,subset_type="training",dataset_index=dataset_index) customset_test = CustomDatasetViewpoint(path = path,subset_type="testing",dataset_index=dataset_index) self.trainloader = torch.utils.data.DataLoader(pin_memory=True,dataset=customset_train,sampler=self.sampler,batch_size=args.batch_size,shuffle=True,num_workers=args.num_workers) self.trainloader_acc = torch.utils.data.DataLoader(dataset=customset_train,batch_size=args.batch_size,shuffle=True,num_workers=args.num_workers) self.testloader_acc = torch.utils.data.DataLoader(dataset=customset_test,batch_size=args.batch_size,shuffle=True,num_workers=args.num_workers) if (model == "alex"): self.model = AlexNet() elif (model == "vgg"): self.model = VGG(num_classes=2) elif (model == "resnet"): self.model = ResNet() if args.pretrained_model != None: if args.pretrained_same_architecture: self.model.load_state_dict(torch.load(args.pretrained_model)) else: if args.arch == "vgg": self.model.soft = None classifier = list(self.model.classifier.children()) classifier.pop() classifier.append(torch.nn.Linear(4096,1000)) new_classifier = torch.nn.Sequential(*classifier) self.model.classifier = new_classifier self.model.load_state_dict(torch.load(args.pretrained_model)) classifier = list(self.model.classifier.children()) classifier.pop() classifier.append(torch.nn.Linear(4096,2)) new_classifier = torch.nn.Sequential(*classifier) self.model.classifier = new_classifier self.model.soft = nn.LogSoftmax() else: self.model.fc = nn.Linear(512, 1000) self.model.load_state_dict(torch.load(args.pretrained_model)) self.model.fc = nn.Linear(512,2) self.optimizer = optim.Adam(self.model.parameters(), weight_decay=float(args.weight_decay), lr=0.0001)
norm=False, dropout=0.5, num_classes=datareader.num_class) model.cuda() ''' ===================== Fine-tune model by ID classification ============ ''' n_iters = args.finetune_n_iters lr = args.finetune_lr batch_size = args.finetune_batch_size show_iters = args.show_iters # Criterion criterion = nn.CrossEntropyLoss().cuda() # Optimizer if hasattr(model, 'base'): base_param_ids = set(map(id, model.base.parameters())) new_params = [ p for p in model.parameters() if id(p) not in base_param_ids ] param_groups = [{ 'params': model.base.parameters(), 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] else: param_groups = model.parameters() optimizer = torch.optim.SGD(param_groups, lr=lr, momentum=0.9, weight_decay=5e-4, nesterov=True)
mixup=True, freqmask=True, gain=True, ) noisy_loader = DataLoader( dataset_noisy, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) noisy_itr = cycle(noisy_loader) # set optimizer and loss optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR[0]) scheduler = CosineLR(optimizer, step_size_min=LR[1], t0=len(train_loader) * NUM_CYCLE, tmult=1) # training min_val_lwlrap = 0 trigger = 0 for epoch in range(NUM_EPOCH): # train for one epoch bce, lwlrap, bce_noisy, lwlrap_noisy = train( (train_loader, noisy_itr), model, optimizer, scheduler, epoch) # evaluate on validation set
valid_set = Dataset(os.path.realpath('data/images4cls/valid/'), train=True) valid_loader = DataLoader(valid_set, batch_size=20, shuffle=True, num_workers=8, pin_memory=True) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = Model() model.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), weight_decay=5e-4) # optimizer = torch.optim.SGD( # params=model.parameters(), # lr=0.1, # momentum=0.9, # weight_decay=5e-4, # nesterov=True # ) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[6, 12, 16], gamma=0.2) print("Start training") start_time = time.time() for epoch in range(epochs):
def main(): global args, best_result, output_directory, train_csv, test_csv sparsifier = None max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf if args.sparsifier == UniformSampling.name: sparsifier = UniformSampling(num_samples=args.num_samples, max_depth=max_depth) elif args.sparsifier == SimulatedStereo.name: sparsifier = SimulatedStereo(num_samples=args.num_samples, max_depth=max_depth) # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() out_channels = 1 # Data loading code print("=> creating data loaders ...") traindir = os.path.join('data', args.data, 'train') valdir = os.path.join('data', args.data, 'val') train_dataset = NYUDataset(traindir, type='train', modality=args.modality, sparsifier=sparsifier) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) # set batch size to be 1 for validation val_dataset = NYUDataset(valdir, type='val', modality=args.modality, sparsifier=sparsifier) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> data loaders created.") # evaluation mode if args.evaluate: best_model_filename = os.path.join(output_directory, 'model_best.pth.tar') assert os.path.isfile(best_model_filename), \ "=> no best model found at '{}'".format(best_model_filename) print("=> loading best model '{}'".format(best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return # optionally resume from a checkpoint elif args.resume: assert os.path.isfile(args.resume), \ "=> no checkpoint found at '{}'".format(args.resume) print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) # create new model else: # define model print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) in_channels = len(args.modality) if args.arch == 'resnet50': model = ResNet(layers=50, decoder=args.decoder, in_channels=in_channels, out_channels=out_channels, pretrained=args.pretrained) elif args.arch == 'resnet18': model = ResNet(layers=18, decoder=args.decoder, in_channels=in_channels, out_channels=out_channels, pretrained=args.pretrained) print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # create new csv files with only header with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # model = torch.nn.DataParallel(model).cuda() model = model.cuda() print(model) print("=> model transferred to GPU.") for epoch in range(args.start_epoch, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set result, img_merge = validate(val_loader, model, epoch) # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory)
def train(cfg_trn, cfg_vld): base_lr = cfg_trn['base_lr'] batches_per_iter = cfg_trn['batches_per_iter'] log_after = cfg_trn['log_after'] checkpoint_after = cfg_trn['checkpoint_after'] # val_after = cfg_vld['val_after'] # val_labels = cfg_vld['annF'] # val_output_name = cfg_vld[''] # val_images_folder = cfg_vld['root'] net = ResNet(ResNet_Spec[18]) dataset = CocoDataset(cfg=cfg_trn) train_loader = DataLoader(dataset, batch_size=cfg_trn['batch_size'], num_workers=cfg_trn['num_workers'], shuffle=True) optimizer = opt.Adam(net.parameters(), lr=cfg_trn['base_lr'], weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] scheduler = opt.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if cfg_trn['checkpoint_path']: checkpoint = torch.load(cfg_trn['checkpoint_path']) # if from_mobilenet: # load_from_mobilenet(net, checkpoint) # else: # load_state(net, checkpoint) # if not weights_only: # optimizer.load_state_dict(checkpoint['optimizer']) # scheduler.load_state_dict(checkpoint['scheduler']) # num_iter = checkpoint['iter'] # current_epoch = checkpoint['current_epoch'] net = DataParallel(net).cuda() net.train() for epochId in range(current_epoch, 280): scheduler.step(epoch=epochId) total_losses = [0, 0] * (cfg_trn['num_hourglass_stages'] + 1) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append(l2loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append(l2loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join(['stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}']).format( loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}.pth'.format(checkpoints_folder, num_iter) torch.save({'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId}, snapshot_name)