def main(): global args, best_prec1 args = parser.parse_args() print(args.modality + " network trained whith the split " + str(args.split) + ".") # create model print("Building model ... ") exits_model, model = build_model(int(args.start_epoch)) if not exits_model: return else: print("Model %s is loaded. " % (args.arch)) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # create file where we allocate the models by each args.save_freq epochs if not os.path.exists(args.resume): os.makedirs(args.resume) print("Saving everything to directory %s." % (args.resume)) cudnn.benchmark = True # Data transforming if args.modality == "rgb" or args.modality == "rhythm" or args.modality == "history": is_color = True scale_ratios = [1.0, 0.875, 0.75, 0.66] clip_mean = [0.485, 0.456, 0.406] * args.new_length clip_std = [0.299, 0.224, 0.225] * args.new_length elif args.modality == "flow": is_color = False scale_ratios = [1.0, 0.875, 0.75] clip_mean = [0.5, 0.5] * args.new_length clip_std = [0.226, 0.226] * args.new_length else: print("No such modality. Only rgb and flow supported.") new_size = 299 if args.arch == 'rgb_inception_v3' else 224 normalize = video_transforms.Normalize(mean=clip_mean, std=clip_std) train_transform = video_transforms.Compose([ #video_transforms.Scale((256)), video_transforms.MultiScaleCrop((new_size, new_size), scale_ratios), video_transforms.RandomHorizontalFlip(), video_transforms.ToTensor(), normalize, ]) val_transform = video_transforms.Compose([ # video_transforms.Scale((256)), video_transforms.CenterCrop((new_size)), video_transforms.ToTensor(), normalize, ]) #createNewDataset("train_%s_split%d.txt" , "new_train.txt") #createNewDataset("val_%s_split%d.txt", "new_test.txt") # data loading #train_setting_file = 'new_train.txt' modality_ = "rgb" if (args.modality == "rhythm" or args.modality == "history") else args.modality train_setting_file = "train_%s_split%d.txt" % (modality_, args.split) train_split_file = os.path.join(args.settings, args.dataset, train_setting_file) #val_setting_file = 'new_test.txt' val_setting_file = "val_%s_split%d.txt" % (modality_, args.split) val_split_file = os.path.join(args.settings, args.dataset, val_setting_file) if not os.path.exists(train_split_file) or not os.path.exists( val_split_file): print( "No split file exists in %s directory. Preprocess the dataset first" % (args.settings)) train_dataset = datasets.__dict__['dataset']( root=args.data, source=train_split_file, phase="train", modality=args.modality, is_color=is_color, new_length=args.new_length, new_width=args.new_width, new_height=args.new_height, video_transform=train_transform) val_dataset = datasets.__dict__['dataset'](root=args.data, source=val_split_file, phase="val", modality=args.modality, is_color=is_color, new_length=args.new_length, new_width=args.new_width, new_height=args.new_height, video_transform=val_transform) print('{} samples found, {} train samples and {} test samples.'.format( len(val_dataset) + len(train_dataset), len(train_dataset), len(val_dataset))) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = 0.0 if (epoch + 1) % args.save_freq == 0: prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if (epoch + 1) % args.save_freq == 0: checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint_" + args.modality + "_split_" + str(args.split) + ".pth.tar") save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint_name, args.resume)
def main(): global args, best_prec1 args = parser.parse_args() # create model print("Building model ... ") model = build_model() print("Model %s is loaded. " % (args.modality + "_" + args.arch)) if not os.path.exists(args.resume): os.makedirs(args.resume) print("Saving everything to directory %s." % (args.resume)) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) cudnn.benchmark = True # Data transforming clip_mean = [0.485, 0.456, 0.406] * args.new_length clip_std = [0.229, 0.224, 0.225] * args.new_length normalize = video_transforms.Normalize(mean=clip_mean, std=clip_std) if args.modality == "rgb": scale_ratios = [1.0, 0.875, 0.75, 0.66] elif args.modality == "flow": scale_ratios = [1.0, 0.875, 0.75] else: print("No such modality. Only rgb and flow supported.") train_transform = video_transforms.Compose([ video_transforms.Scale((256)), video_transforms.MultiScaleCrop((224, 224), scale_ratios), video_transforms.RandomHorizontalFlip(), video_transforms.ToTensor(), normalize, ]) val_transform = video_transforms.Compose([ video_transforms.Scale((256)), video_transforms.CenterCrop((224)), video_transforms.ToTensor(), normalize, ]) # data loading train_setting_file = "train_%s_split%d.txt" % (args.modality, args.split) train_split_file = os.path.join(args.settings, args.dataset, train_setting_file) val_setting_file = "val_%s_split%d.txt" % (args.modality, args.split) val_split_file = os.path.join(args.settings, args.dataset, val_setting_file) if not os.path.exists(train_split_file) or not os.path.exists( val_split_file): print( "No split file exists in %s directory. Preprocess the dataset first" % (args.settings)) train_dataset = datasets.__dict__[args.dataset]( args.data, train_split_file, "train", args.new_length, video_transform=train_transform) val_dataset = datasets.__dict__[args.dataset]( args.data, val_split_file, "val", args.new_length, video_transform=val_transform) print('{} samples found, {} train samples and {} test samples.'.format( len(val_dataset) + len(train_dataset), len(train_dataset), len(val_dataset))) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if (epoch + 1) % args.save_freq == 0: checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint.pth.tar") save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint_name, args.resume)
def main(args): ############################################################################ # Path to optical flow images img_root = './data/human-annotated/' # Path to training and testing files load_path = './data/' # CPU or GPU? device = torch.device("cuda") # Training settings epochs = 30 batch_size = 64 learning_rate = 1e-5 num_workers = 8 pretrained = False weight_decay = 1e-2 NUM_FLOW_FRAMES = 9 model_load_path = args.model_load_path model_save_path = args.model_save_path # Transformers for training and validation transform_train = video_transforms.Compose([ video_transforms.MultiScaleCrop((224, 224), [1.0]), video_transforms.ToTensor(), ]) transform_val = video_transforms.Compose([ video_transforms.Scale((224)), video_transforms.ToTensor(), ]) ############################################################################ print('################### Training settings ###################') print('epochs:', epochs, ' batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_workers:', num_workers, ' model_load_path:', model_load_path, ' NUM_FLOW_FRAMES:', NUM_FLOW_FRAMES) results = pd.DataFrame() for fold in [1, 2, 3, 4, 5]: if pretrained: learning_rate = 1e-6 epochs = 30 else: learning_rate = 1e-5 epochs = 40 print('Training on fold ' + str(fold)) try: testset = LocationDatasetJAAD(filename='jaad_cv_test.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers) trainset = LocationDatasetJAAD(filename='jaad_cv_train_' + str(fold) + '.pkl', root_dir=load_path, transform=transform_train, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers) valset = LocationDatasetJAAD(filename='jaad_cv_val_' + str(fold) + '.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=num_workers) except: sys.exit( 'ERROR: Could not load pkl data file. Check the jaad .pkl files are in the correct path.' ) model = DynamicTrajectoryPredictor(NUM_FLOW_FRAMES).to(device) model = model.float() model = nn.DataParallel(model) if model_load_path is not None: print('loading model from', model_load_path) model.load_state_dict(torch.load(model_load_path)) optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) loss_function = torch.nn.MSELoss() best_FDE = np.inf best_MSE = np.inf best_model = copy.deepcopy(model) # Begin training for epoch in range(1, epochs + 1): # Set learning rate to 1e-6 after 30 epochs if epoch > 30: optimizer = optim.Adam(model.parameters(), lr=1e-6, weight_weight_decay=decay) train(model, device, train_loader, optimizer, epoch, loss_function) MSE_5, FDE_5, MSE_10, FDE_10, MSE_15, FDE_15, _, _ = test( model, device, val_loader, loss_function) if MSE_15 < best_MSE: best_MSE = MSE_15 best_model = copy.deepcopy(model) best_FDE = FDE_15 print(epoch) print('Best MSE:', round(best_MSE, 0)) test_mse_5, test_fde_5, test_mse_10, test_fde_10, test_mse_15, test_fde_15, all_outputs, all_targets = test( best_model, device, test_loader, loss_function) print('Test mse @ 15:', round(test_mse_15, 0)) # Save the model torch.save( best_model.state_dict(), model_save_path + 'rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_fold_' + str(fold) + '_pretrained-' + str(pretrained) + '_disp.weights') # Save the predictions and the targets np.save( './predictions_rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_jaad_fold_' + str(fold) + 'pretrained-' + str(pretrained) + '_disp.npy', all_outputs) np.save( './targets_rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_jaad_fold_' + str(fold) + 'pretrained-' + str(pretrained) + '_disp.npy', all_targets) # Save the results result = { 'NUM_FLOW_FRAMES': NUM_FLOW_FRAMES, 'fold': fold, 'val_mse': best_MSE, 'val_fde': best_FDE, 'test_mse_5': test_mse_5, 'test_fde_5': test_fde_5, 'test_mse_10': test_mse_10, 'test_fde_10': test_fde_10, 'test_mse_15': test_mse_15, 'test_fde_15': test_fde_15, 'pretrained': pretrained } results = results.append(result, ignore_index=True) results.to_csv('./results_rn18_jaad.csv', index=False)
def main(): global args, best_prec1, model, writer, best_loss, length, width, height, input_size, scheduler args = parser.parse_args() training_continue = args.contine if '3D' in args.arch: if 'I3D' in args.arch or 'MFNET3D' in args.arch: if '112' in args.arch: scale = 0.5 else: scale = 1 else: if '224' in args.arch: scale = 1 else: scale = 0.5 elif 'r2plus1d' in args.arch: scale = 0.5 else: scale = 1 print('scale: %.1f' % (scale)) input_size = int(224 * scale) width = int(340 * scale) height = int(256 * scale) saveLocation = "./checkpoint/" + args.dataset + "_" + args.arch + "_split" + str( args.split) if not os.path.exists(saveLocation): os.makedirs(saveLocation) writer = SummaryWriter(saveLocation) # create model if args.evaluate: print("Building validation model ... ") model = build_model_validate() optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif training_continue: model, startEpoch, optimizer, best_prec1 = build_model_continue() for param_group in optimizer.param_groups: lr = param_group['lr'] #param_group['lr'] = lr print( "Continuing with best precision: %.3f and start epoch %d and lr: %f" % (best_prec1, startEpoch, lr)) else: print("Building model with ADAMW... ") model = build_model() optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) startEpoch = 0 if HALF: model.half() # convert to half precision for layer in model.modules(): if isinstance(layer, nn.BatchNorm2d): layer.float() print("Model %s is loaded. " % (args.arch)) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() criterion2 = nn.MSELoss().cuda() scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, verbose=True) print("Saving everything to directory %s." % (saveLocation)) if args.dataset == 'ucf101': dataset = './datasets/ucf101_frames' elif args.dataset == 'hmdb51': dataset = './datasets/hmdb51_frames' elif args.dataset == 'smtV2': dataset = './datasets/smtV2_frames' elif args.dataset == 'window': dataset = './datasets/window_frames' elif args.dataset == 'haa500_basketball': dataset = './datasets/haa500_basketball_frames' else: print("No convenient dataset entered, exiting....") return 0 cudnn.benchmark = True modality = args.arch.split('_')[0] if "3D" in args.arch or 'tsm' in args.arch or 'slowfast' in args.arch or 'r2plus1d' in args.arch: if '64f' in args.arch: length = 64 elif '32f' in args.arch: length = 32 else: length = 16 else: length = 1 # Data transforming if modality == "rgb" or modality == "pose": is_color = True scale_ratios = [1.0, 0.875, 0.75, 0.66] if 'I3D' in args.arch: if 'resnet' in args.arch: clip_mean = [0.45, 0.45, 0.45] * args.num_seg * length clip_std = [0.225, 0.225, 0.225] * args.num_seg * length else: clip_mean = [0.5, 0.5, 0.5] * args.num_seg * length clip_std = [0.5, 0.5, 0.5] * args.num_seg * length #clip_std = [0.25, 0.25, 0.25] * args.num_seg * length elif 'MFNET3D' in args.arch: clip_mean = [0.48627451, 0.45882353, 0.40784314 ] * args.num_seg * length clip_std = [0.234, 0.234, 0.234] * args.num_seg * length elif "3D" in args.arch: clip_mean = [114.7748, 107.7354, 99.4750] * args.num_seg * length clip_std = [1, 1, 1] * args.num_seg * length elif "r2plus1d" in args.arch: clip_mean = [0.43216, 0.394666, 0.37645] * args.num_seg * length clip_std = [0.22803, 0.22145, 0.216989] * args.num_seg * length elif "rep_flow" in args.arch: clip_mean = [0.5, 0.5, 0.5] * args.num_seg * length clip_std = [0.5, 0.5, 0.5] * args.num_seg * length elif "slowfast" in args.arch: clip_mean = [0.45, 0.45, 0.45] * args.num_seg * length clip_std = [0.225, 0.225, 0.225] * args.num_seg * length else: clip_mean = [0.485, 0.456, 0.406] * args.num_seg * length clip_std = [0.229, 0.224, 0.225] * args.num_seg * length elif modality == "pose": is_color = True scale_ratios = [1.0, 0.875, 0.75, 0.66] clip_mean = [0.485, 0.456, 0.406] * args.num_seg clip_std = [0.229, 0.224, 0.225] * args.num_seg elif modality == "flow": is_color = False scale_ratios = [1.0, 0.875, 0.75, 0.66] if 'I3D' in args.arch: clip_mean = [0.5, 0.5] * args.num_seg * length clip_std = [0.5, 0.5] * args.num_seg * length elif "3D" in args.arch: clip_mean = [127.5, 127.5] * args.num_seg * length clip_std = [1, 1] * args.num_seg * length else: clip_mean = [0.5, 0.5] * args.num_seg * length clip_std = [0.226, 0.226] * args.num_seg * length elif modality == "both": is_color = True scale_ratios = [1.0, 0.875, 0.75, 0.66] clip_mean = [0.485, 0.456, 0.406, 0.5, 0.5] * args.num_seg * length clip_std = [0.229, 0.224, 0.225, 0.226, 0.226] * args.num_seg * length else: print("No such modality. Only rgb and flow supported.") normalize = video_transforms.Normalize(mean=clip_mean, std=clip_std) if "3D" in args.arch and not ('I3D' in args.arch): train_transform = video_transforms.Compose([ video_transforms.MultiScaleCrop((input_size, input_size), scale_ratios), video_transforms.RandomHorizontalFlip(), video_transforms.ToTensor2(), normalize, ]) val_transform = video_transforms.Compose([ video_transforms.CenterCrop((input_size)), video_transforms.ToTensor2(), normalize, ]) else: train_transform = video_transforms.Compose([ video_transforms.MultiScaleCrop((input_size, input_size), scale_ratios), video_transforms.RandomHorizontalFlip(), video_transforms.ToTensor(), normalize, ]) val_transform = video_transforms.Compose([ video_transforms.CenterCrop((input_size)), video_transforms.ToTensor(), normalize, ]) # data loading train_setting_file = "train_%s_split%d.txt" % (modality, args.split) train_split_file = os.path.join(args.settings, args.dataset, train_setting_file) val_setting_file = "val_%s_split%d.txt" % (modality, args.split) val_split_file = os.path.join(args.settings, args.dataset, val_setting_file) if not os.path.exists(train_split_file) or not os.path.exists( val_split_file): print( "No split file exists in %s directory. Preprocess the dataset first" % (args.settings)) train_dataset = datasets.__dict__[args.dataset]( root=dataset, source=train_split_file, phase="train", modality=modality, is_color=is_color, new_length=length, new_width=width, new_height=height, video_transform=train_transform, num_segments=args.num_seg) val_dataset = datasets.__dict__[args.dataset]( root=dataset, source=val_split_file, phase="val", modality=modality, is_color=is_color, new_length=length, new_width=width, new_height=height, video_transform=val_transform, num_segments=args.num_seg) print('{} samples found, {} train samples and {} test samples.'.format( len(val_dataset) + len(train_dataset), len(train_dataset), len(val_dataset))) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: prec1, prec3, _ = validate(val_loader, model, criterion, criterion2, modality) return for epoch in range(startEpoch, args.epochs): # if learning_rate_index > max_learning_rate_decay_count: # break # adjust_learning_rate(optimizer, epoch) train(train_loader, model, criterion, criterion2, optimizer, epoch, modality) # evaluate on validation set prec1 = 0.0 lossClassification = 0 if (epoch + 1) % args.save_freq == 0: prec1, prec3, lossClassification = validate( val_loader, model, criterion, criterion2, modality) writer.add_scalar('data/top1_validation', prec1, epoch) writer.add_scalar('data/top3_validation', prec3, epoch) writer.add_scalar('data/classification_loss_validation', lossClassification, epoch) scheduler.step(lossClassification) # remember best prec@1 and save checkpoint is_best = prec1 >= best_prec1 best_prec1 = max(prec1, best_prec1) # best_in_existing_learning_rate = max(prec1, best_in_existing_learning_rate) # # if best_in_existing_learning_rate > prec1 + 1: # learning_rate_index = learning_rate_index # best_in_existing_learning_rate = 0 if (epoch + 1) % args.save_freq == 0: checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint.pth.tar") if is_best: print("Model works well") save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'best_loss': best_loss, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint_name, saveLocation) checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint.pth.tar") save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'best_loss': best_loss, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint_name, saveLocation) writer.export_scalars_to_json("./all_scalars.json") writer.close()
def main(args): ############################################################################ # Path to optical flow images img_root = './data/human-annotated/' # Path to training and testing files load_path = './data/' # CPU or GPU? device = torch.device("cuda") # Training settings epochs = 30 batch_size = 64 learning_rate = 1e-5 num_workers = 8 pretrained = False weight_decay = 1e-2 NUM_FLOW_FRAMES = 9 model_load_path = args.model_load_path model_save_path = args.model_save_path # Transformers for training and validation transform_train = video_transforms.Compose([ video_transforms.MultiScaleCrop((224, 224), [1.0]), video_transforms.ToTensor(), ]) transform_val = video_transforms.Compose([ video_transforms.Scale((224)), video_transforms.ToTensor(), ]) ############################################################################ print('################### Training settings ###################') print('epochs:', epochs, ' batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_workers:', num_workers, ' model_load_path:', model_load_path, ' NUM_FLOW_FRAMES:', NUM_FLOW_FRAMES) results = pd.DataFrame() testset = LocationDatasetJAAD(filename='jaad_cv_test.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers) trainset = LocationDatasetJAAD(filename='jaad_cv_train_' + str(1) + '.pkl', root_dir=load_path, transform=transform_train, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers) valset = LocationDatasetJAAD(filename='jaad_cv_val_' + str(1) + '.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=num_workers) print('test_loader flow_stack size = ', test_loader['flow_stack'])
def main(): global args, best_prec1 args = parser.parse_args() # create model print("Building model ... ") model = build_model() if torch.cuda.is_available(): model = torch.nn.DataParallel(model) print("Model %s is loaded. " % (args.arch)) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if not os.path.exists(args.resume): os.makedirs(args.resume) print("Saving everything to directory %s." % (args.resume)) cudnn.benchmark = True # Data transforming if args.modality == "rgb": is_color = True scale_ratios = [1.0, 0.875, 0.75, 0.66] clip_mean = [0.485, 0.456, 0.406] * args.new_length clip_std = [0.229, 0.224, 0.225] * args.new_length elif args.modality == "flow": is_color = False scale_ratios = [1.0, 0.875, 0.75] clip_mean = [0.5, 0.5] * args.new_length clip_std = [0.226, 0.226] * args.new_length else: print("No such modality. Only rgb and flow supported.") normalize = video_transforms.Normalize(mean=clip_mean, std=clip_std) train_transform = video_transforms.Compose([ # video_transforms.Scale((256)), video_transforms.MultiScaleCrop((224, 224), scale_ratios), video_transforms.RandomHorizontalFlip(), video_transforms.ToTensor(), normalize, ]) val_transform = video_transforms.Compose([ # video_transforms.Scale((256)), video_transforms.CenterCrop((224)), video_transforms.ToTensor(), normalize, ]) # data loading # train_setting_file = "train_%s_split%d.txt" % (args.modality, args.split) # train_split_file = os.path.join(args.settings, args.dataset, train_setting_file) # val_setting_file = "val_%s_split%d.txt" % (args.modality, args.split) # val_split_file = os.path.join(args.settings, args.dataset, val_setting_file) # if not os.path.exists(train_split_file) or not os.path.exists(val_split_file): # print("No split file exists in %s directory. Preprocess the dataset first" % (args.settings)) train_split_file = './datasets/settings/train_set_detail.csv' val_split_file = './datasets/settings/val_set_detail.csv' train_dataset = datasets.__dict__[args.dataset]( root=args.data, #neet to change source=train_split_file, phase="train", modality=args.modality, is_color=is_color, new_length=args.new_length, new_width=args.new_width, new_height=args.new_height, video_transform=train_transform, name_pattern="frame%06d.jpg") # frame000001 val_dataset = datasets.__dict__[args.dataset]( root=args.data, source=val_split_file, phase="val", modality=args.modality, is_color=is_color, new_length=args.new_length, new_width=args.new_width, new_height=args.new_height, video_transform=val_transform, name_pattern="frame%06d.jpg") print('{} samples found, {} train samples and {} test samples.'.format( len(val_dataset) + len(train_dataset), len(train_dataset), len(val_dataset))) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return model_path = '/home/thl/Desktop/challeng/checkpoints/Mulity_100step_900epoch_batch80/model_best.pth.tar' params = torch.load(model_path) model.load_state_dict(params['state_dict']) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = 0.0 if (epoch + 1) % args.save_freq == 0: prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if (epoch + 1) % args.save_freq == 0: checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint.pth.tar") save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint_name, args.resume)
def main(): global best_prec1 # create model print("Building model ... ") print("Building model ... ", file = f_log) model = build_model(resume_path = args.resume) print("Model %s is loaded. " % (args.arch)) print("Model %s is loaded. " % (args.arch), file = f_log) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) cudnn.benchmark = True # Data transforming if args.modality == "rgb": is_color = True scale_ratios = [1.0, 0.875, 0.75, 0.66] clip_mean = [0.485, 0.456, 0.406] clip_std = [0.229, 0.224, 0.225] elif args.modality == "tvl1_flow" or args.modality == "lk_flow": is_color = False scale_ratios = [1.0, 0.875, 0.75] clip_mean = [0.5, 0.5] clip_std = [0.226, 0.226] else: print("No such modality. Only rgb and flow supported.") print("No such modality. Only rgb and flow supported.", file = f_log) normalize = video_transforms.Normalize(mean=clip_mean, std=clip_std) train_transform = video_transforms.Compose([ #video_transforms.Scale((288)), video_transforms.MultiScaleCrop((256, 256), scale_ratios), video_transforms.RandomHorizontalFlip(), video_transforms.ToTensor(), normalize, ]) val_transform = video_transforms.Compose([ #video_transforms.Scale((288)), video_transforms.CenterCrop((256)), video_transforms.ToTensor(), normalize, ]) # data loading train_setting_file = "train_%s_split%d.txt" % (args.modality, args.split) train_split_file = os.path.join(args.settings, args.dataset, train_setting_file) val_setting_file = "val_%s_split%d.txt" % (args.modality, args.split) val_split_file = os.path.join(args.settings, args.dataset, val_setting_file) if not os.path.exists(train_split_file) or not os.path.exists(val_split_file): print("No split file exists in %s directory. Preprocess the dataset first" % (args.settings)) print("No split file exists in %s directory. Preprocess the dataset first" % (args.settings), file = f_log ) train_dataset = datasets.__dict__[args.dataset](setting=train_split_file, root=args.data, train=True, new_width=args.new_width, new_height=args.new_height, new_length=args.new_length, target_width=args.new_width, target_height=args.new_height, modality=args.modality, num_segments=args.num_segments, transform=train_transform, name_pattern='frame%06d.jpg') val_dataset = datasets.__dict__[args.dataset](setting=val_split_file, root=args.data, train=False, new_width=args.new_width, new_height=args.new_height, new_length=args.new_length, target_width=args.new_width, target_height=args.new_height, modality=args.modality, num_segments=args.num_segments, transform=val_transform, name_pattern='frame%06d.jpg') print('{} samples found, {} train samples and {} test samples.'.format(len(val_dataset)+len(train_dataset), len(train_dataset), len(val_dataset))) print('{} samples found, {} train samples and {} test samples.'.format(len(val_dataset)+len(train_dataset), len(train_dataset), len(val_dataset)), file = f_log ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) #, pin_memory=True) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) #, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch print("start epoch ", epoch) train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = 0.0 if (epoch + 1) % args.save_freq == 0: prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if (epoch + 1) % args.save_freq == 0: checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint.pth.tar") save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint_name, args.save_path)
def main(): global args, prec_list prec_list = [] args = parser.parse_args() full_path = logging(args) print(args.modality + " network trained with the split " + str(args.split) + ".") # create model print("Building model ... ") exits_model, model = build_model(int(args.start_epoch), args.pretrain_weights) if not exits_model: return else: print("Model %s is loaded. " % (args.arch)) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) cudnn.benchmark = True # Data transforming if args.modality == "rgb" or args.modality == "rgb2": is_color = True scale_ratios = [1.0, 0.875, 0.75, 0.66] clip_mean = [0.485, 0.456, 0.406] * args.new_length clip_std = [0.229, 0.224, 0.225] * args.new_length elif args.modality == "flow" or args.modality == "rhythm": is_color = False scale_ratios = [1.0, 0.875, 0.75] clip_mean = [0.5, 0.5] * args.new_length clip_std = [0.226, 0.226] * args.new_length else: print("No such modality. Only rgb and flow supported.") new_size = 299 if args.arch.find("inception_v3") > 0 else 224 normalize = video_transforms.Normalize(mean=clip_mean, std=clip_std) train_transform = video_transforms.Compose([ #video_transforms.Scale((256)), video_transforms.MultiScaleCrop((new_size, new_size), scale_ratios), video_transforms.RandomHorizontalFlip(), video_transforms.ToTensor(), normalize, ]) if args.es: val_transform = video_transforms.Compose([ # video_transforms.Scale((256)), video_transforms.CenterCrop((new_size)), video_transforms.ToTensor(), normalize, ]) modality_ = "rgb" if (args.modality == "rhythm" or args.modality[:3] == "rgb") else "flow" if args.modality == "rgb2": createNewDataset("train_split%d.txt", "new_train.txt", modality_) #createNewDataset("val_%s_split%d.txt", "new_val.txt",modality_) # data loading train_setting_file = "new_train.txt" if args.modality == "rgb2" else "train_split%d.txt" % ( args.split) train_split_file = os.path.join(args.settings, args.dataset, train_setting_file) if not os.path.exists( train_split_file): # or not os.path.exists(val_split_file): print( "No split file exists in %s directory. Preprocess the dataset first" % (args.settings)) extension = ".png" if args.dataset == "hmdb51" and args.modality == "rhythm" else ".jpg" direction_file = "direction.txt" if args.vr_approach == 3 else "direction_video.txt" direction_path = os.path.join(args.settings, args.dataset, direction_file) train_dataset = datasets.__dict__['dataset']( root=args.data, source=train_split_file, phase="train", modality=args.modality, is_color=is_color, new_length=args.new_length, new_width=args.new_width, new_height=args.new_height, video_transform=train_transform, approach_VR=args.vr_approach, extension=extension, direction_path=direction_path) if args.es: val_setting_file = "val_split%d.txt" % (args.split) val_split_file = os.path.join(args.settings, args.dataset, val_setting_file) if not os.path.exists(val_split_file): print( "No split file exists in %s directory. Preprocess the dataset first" % (args.settings)) val_dataset = datasets.__dict__['dataset']( root=args.data, source=val_split_file, phase="val", modality=args.modality, is_color=is_color, new_length=args.new_length, new_width=args.new_width, new_height=args.new_height, video_transform=val_transform, approach_VR=args.vr_approach, extension=extension, direction_path=direction_path) print('{} samples found, {} train samples and {} validation samples.'. format( len(val_dataset) + len(train_dataset), len(train_dataset), len(val_dataset))) else: print('{} train samples found.'.format(len(train_dataset))) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) if args.es: val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) early_stop = EarlyStopping(verbose=True, log_path=os.path.join( full_path, "early_stopping.json")) is_best = False for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) if args.es: # evaluate on validation set losses = validate(val_loader, model, criterion) is_best = early_stop(losses.avg, epoch) if (epoch + 1) % args.save_freq == 0 or is_best: checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint_" + args.modality + "_split_" + str(args.split) + ".pth.tar") es_val = float('inf') if not args.es else early_stop.val_loss_min save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'val_loss_min': es_val }, is_best, checkpoint_name, os.path.join(full_path, "checkpoints")) prec_name = "%03d_%s" % (epoch + 1, "prec_split_" + str(args.split) + ".txt") save_precision(prec_name, os.path.join(full_path, "precision")) if args.es and early_stop.early_stop: break if not args.es: # Final model checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint_" + args.modality + "_split_" + str(args.split) + ".pth.tar") save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'val_loss_min': float('inf') }, True, checkpoint_name, os.path.join(full_path, "checkpoints"))
def main(args): global best_prec1, best_loss input_size = int(224 * args.scale) width = int(340 * args.scale) height = int(256 * args.scale) if not os.path.exists(args.savelocation): os.makedirs(args.savelocation) now = time.time() savelocation = os.path.join(args.savelocation, str(now)) os.makedirs(savelocation) logging.basicConfig(filename=os.path.join(savelocation, "log.log"), level=logging.INFO) model = build_model(args.arch, args.pre, args.num_seg, args.resume) optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss().cuda() criterion2 = nn.MSELoss().cuda() scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, verbose=True) # if args.dataset=='sign': # dataset="/data/AUTSL/train_img_c" # elif args.dataset=="signd": # dataset="/data/AUTSL/train_img_c" # elif args.dataset=="customd": # dataset="/data/AUTSL/train_img_c" # else: # print("no dataset") # return 0 cudnn.benchmark = True length = 64 scale_ratios = [1.0, 0.875, 0.75, 0.66] clip_mean = [114.7748, 107.7354, 99.4750] * args.num_seg * length clip_std = [1, 1, 1] * args.num_seg * length normalize = video_transforms.Normalize(mean=clip_mean, std=clip_std) train_transform = video_transforms.Compose([ video_transforms.MultiScaleCrop((input_size, input_size), scale_ratios), video_transforms.RandomHorizontalFlip(), video_transforms.ToTensor2(), normalize, ]) val_transform = video_transforms.Compose([ video_transforms.CenterCrop((input_size)), video_transforms.ToTensor2(), normalize, ]) # test_transform = video_transforms.Compose([ # video_transforms.CenterCrop((input_size)), # video_transforms.ToTensor2(), # normalize, # ]) # test_file = os.path.join(args.datasetpath, args.testlist) if not os.path.exists(args.trainlist) or not os.path.exists(args.vallist): print( "No split file exists in %s directory. Preprocess the dataset first" % (args.datasetpath)) train_dataset = datasets.__dict__[args.dataset]( root=args.datasetpath, source=args.trainlist, phase="train", modality="rgb", is_color=True, new_length=length, new_width=width, new_height=height, video_transform=train_transform, num_segments=args.num_seg) val_dataset = datasets.__dict__[args.dataset]( root=args.datasetpath, source=args.vallist, phase="val", modality="rgb", is_color=True, new_length=length, new_width=width, new_height=height, video_transform=val_transform, num_segments=args.num_seg) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) best_prec1 = 0 for epoch in range(0, args.epochs): train(length, input_size, train_loader, model, criterion, criterion2, optimizer, epoch) if (epoch + 1) % args.save_freq == 0: is_best = False prec1, prec3, lossClassification = validate( length, input_size, val_loader, model, criterion, criterion2) scheduler.step(lossClassification) if prec1 >= best_prec1: is_best = True best_prec1 = prec1 checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint.pth.tar") text = "save checkpoint {}".format(checkpoint_name) print(text) logging.info(text) save_checkpoint( { "epoch": epoch + 1, "arch": args.arch, "state_dict": model.state_dict(), "prec1": prec1, "optimizer": optimizer.state_dict() }, is_best, checkpoint_name, savelocation)
def main(args): ############################################################################ # Path to optical flow images if args.detector == 'yolo': img_root = './data/yolov3/' else: img_root = './data/faster-rcnn/' # Path to training and testing files load_path = './data/' # CPU or GPU? device = torch.device("cuda") # Model saving and loading model_save_path = './data/' model_load_path = './data/' # Training settings epochs = 15 batch_size = 64 learning_rate = 1e-5 num_workers = 8 weight_decay = 1e-2 NUM_FLOW_FRAMES = 9 training_proportion = 100 # How much of the dataset to use? 100 = 100percent # Transformers for training and validation transform_train = video_transforms.Compose([ video_transforms.MultiScaleCrop((224, 224), [1.0]), video_transforms.ToTensor(), ]) transform_val = video_transforms.Compose([ video_transforms.Scale((224)), video_transforms.ToTensor(), ]) ############################################################################ print('################### Training settings ###################') print('epochs:', epochs, ' batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_workers:', num_workers, ' NUM_FLOW_FRAMES:', NUM_FLOW_FRAMES) results = pd.DataFrame() print('Training model') print(args.detector + '_bdd10k_val.pkl') try: testset = LocationDatasetBDD(filename='bdd10k_val_' + args.detector + '.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers) trainset = LocationDatasetBDD(filename='bdd10k_train_' + args.detector + '.pkl', root_dir=load_path, transform=transform_train, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES, proportion=training_proportion) train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers) valset = LocationDatasetBDD(filename='bdd10k_val_' + args.detector + '.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=num_workers) except: sys.exit( 'ERROR: Could not load pkl data file. Check the bdd .pkl files are in the correct path.' ) model = DynamicTrajectoryPredictor(NUM_FLOW_FRAMES).to(device) model = model.float() model = nn.DataParallel(model) optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) loss_function = torch.nn.MSELoss() best_FDE = np.inf best_MSE = np.inf best_model = copy.deepcopy(model) # Begin training for epoch in range(1, epochs + 1): train(model, device, train_loader, optimizer, epoch, loss_function) MSE_5, FDE_5, MSE_10, FDE_10, MSE_15, FDE_15, _, _ = test( model, device, val_loader, loss_function) if MSE_15 < best_MSE: best_MSE = MSE_15 best_model = copy.deepcopy(model) best_FDE = FDE_15 torch.save( best_model.state_dict(), model_save_path + args.detector + '_rn18_bdd10k_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_training_proportion_' + str(training_proportion) + '_shuffled_disp.weights') print(epoch) print('Best MSE:', round(best_MSE, 0)) test_mse_5, test_fde_5, test_mse_10, test_fde_10, test_mse_15, test_fde_15, all_outputs, all_targets = test( best_model, device, test_loader, loss_function) print('Test mse @ 15:', round(test_mse_15, 0)) # Save the model torch.save( best_model.state_dict(), model_save_path + args.detector + 'bdd10k_rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_training_proportion_' + str(training_proportion) + '_shuffled_disp.weights') # Save the predictions and the targets np.save( './' + args.detector + '_predictions_rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_bdd10k_training_proportion_' + str(training_proportion) + '_shuffled_disp.npy', all_outputs) np.save( './' + args.detector + '_targets_rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_bdd10k__shuffled_disp.npy', all_targets) # Save the results result = { 'NUM_FLOW_FRAMES': NUM_FLOW_FRAMES, 'training_proportion': training_proportion, 'val_mse': best_MSE, 'val_fde': best_FDE, 'test_mse_5': test_mse_5, 'test_fde_5': test_fde_5, 'test_mse_10': test_mse_10, 'test_fde_10': test_fde_10, 'test_mse_15': test_mse_15, 'test_fde_15': test_fde_15 } results = results.append(result, ignore_index=True) results.to_csv('./' + args.detector + '_results_rn18_bdd10k.csv', index=False)
batch_size = 1 eval_freq = 1 # define transform import video_transforms scale = 1 input_size = int(224 * scale) width = int(340 * scale) height = int(256 * scale) length = 32 clip_mean = [0.5, 0.5, 0.5] * length clip_std = [0.5, 0.5, 0.5] * length scale_ratios = [1.0, 0.875, 0.75, 0.66] normalize = video_transforms.Normalize(mean=clip_mean, std=clip_std) train_transform = video_transforms.Compose([ video_transforms.MultiScaleCrop((input_size, input_size), scale_ratios), video_transforms.RandomHorizontalFlip(), video_transforms.ToTensor(), normalize, ]) val_transform = video_transforms.Compose([ video_transforms.CenterCrop((input_size)), video_transforms.ToTensor(), normalize, ]) #define dataset and dataloader train_split_file = "./datasets/settings/haa500_instruments/train_rgb_split1.txt" val_split_file = "./datasets/settings/haa500_instruments/val_rgb_split1.txt"