def get_activations(files, data_type, model, batch_size, size, length, dims, device): """Calculates the activations of the pool_3 layer for all images. Params: -- files : List of image files paths -- model : Instance of inception model -- batch_size : Batch size of images for the model to process at once. Make sure that the number of samples is a multiple of the batch size, otherwise some samples are ignored. This behavior is retained to match the original FID score implementation. -- dims : Dimensionality of features returned by Inception -- device : Device to run calculations Returns: -- A numpy array of dimension (num images, dims) that contains the activations of the given tensor when feeding inception with the query tensor. """ model.eval() if batch_size > len(files): print(('Warning: batch size is bigger than the data size. Setting batch size to data size')) batch_size = len(files) transform = torchvision.transforms.Compose([ transforms_vid.ClipResize((size, size)), transforms_vid.ClipToTensor(), transforms_vid.ClipNormalize(mean=[114.7748, 107.7354, 99.4750], std=[1, 1, 1])] ) if data_type == 'video': ds = VideoDataset(files, length, transform) elif data_type == 'frame': ds = FrameDataset(files, length, transform) else: raise NotImplementedError dl = torch.utils.data.DataLoader(ds, batch_size=batch_size, drop_last=False, num_workers=cpu_count()) pred_arr = torch.zeros(len(files), dims).to(device) start_idx = 0 for batch in tqdm(dl): batch = batch.to(device) with torch.no_grad(): pred = model(batch) if pred.size(2) != 1 or pred.size(3) != 1 or pred.size(4) != 1: pred = adaptive_avg_pool3d(pred, output_size=(1, 1, 1)) pred = pred.squeeze(4).squeeze(3).squeeze(2) pred_arr[start_idx:start_idx + pred.shape[0]] = pred start_idx = start_idx + pred.shape[0] pred_arr = pred_arr.cpu().numpy() return pred_arr
def main(): config = get_args() config.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # preprocess if config.preprocess_frames: preprocess.get_frames(config.train_vid, config.train_frames) preprocess.get_frames(config.test_vid, config.test_frames) if config.create_csv: train_speeds = preprocess.read_speed(config.train_speeds) preprocess.create_csv(config.train_frames, train_speeds, config.csv_path) # dataset creation dataset = FrameDataset(config.csv_path, config.train_frames) train_set, val_set = random_split(dataset, [16320, 4080]) # test set creation transform = transforms.Compose([ transforms.Resize((66, 220)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) test_set = datasets.ImageFolder(config.test_frames, transform=transform) # model selection if config.model == 'simpleCNN': model = models.simpleCNN().to(config.device) elif config.model == 'ResNet': model = models.ResNet().to(config.device) # train/val/test if config.train: runner.train(config, model, train_set) elif config.val: runner.validate(config, model, val_set) elif config.test: runner.test(config, model, test_set)
def predict_all(): test_csv = "./notebooks/valid.csv.gz" model_name = 'vggresnet_ds' log_dir = f"/media/ngxbac/DATA/logs_omg/{model_name}/learnable_weight/" model = DeepBranchResnet( n_class=7, pretrained= "/media/ngxbac/DATA/logs_emotiw_temporal/feature_extractor/vggresnet/checkpoints/best.pth" ) checkpoint = f"{log_dir}/checkpoints/best.pth" checkpoint = torch.load(checkpoint) model.load_state_dict(checkpoint['model_state_dict']) model = model.to(device) # Dataset dataset = FrameDataset(csv_file=test_csv, transform=valid_aug(224), mode='test') loader = DataLoader( dataset=dataset, batch_size=32, shuffle=False, num_workers=4, ) pred = predict(model, loader) # pred = np.asarray(pred).mean(axis=0) all_preds = np.argmax(pred, axis=1) df = pd.read_csv(test_csv) submission = df.copy() submission['EmotionMaxVote'] = all_preds.astype(int) os.makedirs("prediction", exist_ok=True) submission.to_csv(f'./prediction/{model_name}.csv', index=False, columns=['video', 'utterance', 'EmotionMaxVote']) np.save(f"./prediction/{model_name}.npy", pred)
def main(args, logger): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) feature_extract = args.feature_extract writer_dir = os.path.join(args.log, "tensorboard") # writer_dir = os.path.join('./2D/results/tensorboard/', args.name) if not os.path.exists(writer_dir): os.makedirs(writer_dir) writers = { x: SummaryWriter(log_dir=os.path.join(writer_dir, x)) for x in ['train', 'valid'] } model, input_size = initialize_model(args.model_name, 1, feature_extract) logger.info(f"{torch.cuda.device_count()} GPUs are being used.") if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model = model.to(device) cudnn.benchmark = True transform = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) video_paths = { x: os.path.join(args.video_path, x) for x in ['train', 'valid'] } frame_paths = { x: os.path.join(args.frame_path, x) for x in ['train', 'valid'] } label_paths = { x: os.path.join(args.label_path, f"{x}.csv") for x in ['train', 'valid'] } calc_paths = { x: os.path.join(args.log, f"calc/{x}") for x in ['train', 'valid'] } datasets = { x: FrameDataset(video_paths[x], frame_paths[x], label_paths[x], calc_paths[x], input_size, transform=transform) for x in ['train', 'valid'] } dataloaders = { x: torch.utils.data.DataLoader(datasets[x], batch_size=args.batch, num_workers=args.workers, pin_memory=True, shuffle=True) for x in ['train', 'valid'] } params_to_update = model.parameters() logger.info("Params to learn:") if feature_extract: params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) logger.info(name) else: for name, param in model.named_parameters(): if param.requires_grad == True: logger.info(name) optimizer = optim.SGD(params_to_update, lr=args.lr, momentum=0.7) # optimizer = optim.Adam(params_to_update, lr=0.001, weight_decay=0) start_epoch = 0 model_save_dir = os.path.join(args.log, 'models') if os.path.exists(model_save_dir) and len(os.listdir(model_save_dir)) > 0: logger.debug("Load pretrained model...") latest_path = os.path.join(model_save_dir, sorted(os.listdir(model_save_dir))[-1]) checkpoint = torch.load(latest_path) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] criterion = nn.BCEWithLogitsLoss() train(model, dataloaders, criterion, optimizer, args.epochs, model_save_dir, args.interval, writers, start_epoch=start_epoch, is_inception=(args.model_name == "inception"), threshold=args.threshold) for x in ['train', 'valid']: writers[x].close()
def main(args, logger): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) model, input_size = initialize_model(args.model_name, 1) logger.info(f"{torch.cuda.device_count()} GPUs are being used.") if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model = model.to(device) cudnn.benchmark = True logger.debug("Load pretrained model...") checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['model_state_dict']) transform = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) video_path = os.path.join(args.video_path, 'valid') frame_path = os.path.join(args.frame_path, 'valid') label_path = os.path.join(args.label_path, "valid.csv") calc_path = os.path.join(args.log, "calc/valid") dataset = FrameDataset(video_path, frame_path, label_path, calc_path, input_size, transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, num_workers=args.workers, pin_memory=True, shuffle=False) model.eval() labels = torch.zeros(len(dataloader.dataset)) outputs = torch.zeros(len(dataloader.dataset)) for i, (inputs, label, vnames) in enumerate(tqdm(dataloader)): inputs = inputs.to(device) with torch.no_grad(): output = model(inputs) labels[i] = label.data outputs[i] = output.data thresholds = np.arange(0.0, 1.05, 0.01) precisions = np.zeros(len(thresholds)) recalls = np.zeros(len(thresholds)) for i, threshold in enumerate(thresholds): preds = torch.sigmoid(outputs) >= threshold tp = torch.sum((preds.data == 1.0) & (labels.data == 1.0)) tn = torch.sum((preds.data == 0.0) & (labels.data == 0.0)) fp = torch.sum((preds.data == 1.0) & (labels.data == 0.0)) fn = torch.sum((preds.data == 0.0) & (labels.data == 1.0)) assert tp + fp + tn + fn == len(dataset) precision = tp.double() / (tp.double() + fp.double()) precision = precision.to('cpu').detach().numpy().copy() recall = tp.double() / (tp.double() + fn.double()) recall = recall.to('cpu').detach().numpy().copy() if np.isnan(precision): precision = 1.0 precisions[i] = precision recalls[i] = recall print("-" * 20) print(precisions) print("-" * 20) print(recalls) plt.plot(recalls, precisions, marker='o') plt.xlabel("recall") plt.ylabel("precision") plt.xlim(0.0, 1.0) plt.ylim(0.0, 1.0) plt.grid(True) plt.savefig(os.path.join(args.log, "pr.png"))
def objective(trial): name = 'optuna' video_path = "./data/" frame_path = "./2D/frame" label_path = "./label" log_path = os.path.join("./2D/results/", name) workers = 12 model_name = "resnext" batch_size = 64 seed = 31 epochs = 10 feature_extract = False if not os.path.exists(log_path): os.makedirs(log_path) logger = init_logger(f"{log_path}/result.log") torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) model, input_size = initialize_model(model_name, 1, feature_extract) logger.info(f"{torch.cuda.device_count()} GPUs are being used.") if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model = model.to(device) cudnn.benchmark = True optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]) lr = trial.suggest_loguniform("lr", 1e-4, 1e-2) optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr) threshold = trial.suggest_uniform("threshold", 0.1, 0.9) transform = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) video_paths = {x: os.path.join(video_path, x) for x in ['train', 'valid']} frame_paths = {x: os.path.join(frame_path, x) for x in ['train', 'valid']} label_paths = { x: os.path.join(label_path, f"{x}.csv") for x in ['train', 'valid'] } calc_paths = { x: os.path.join(log_path, f"calc/{x}") for x in ['train', 'valid'] } datasets = { x: FrameDataset(video_paths[x], frame_paths[x], label_paths[x], calc_paths[x], input_size, transform=transform) for x in ['train', 'valid'] } dataloaders = { x: torch.utils.data.DataLoader(datasets[x], batch_size=batch_size, num_workers=workers, pin_memory=True, shuffle=True) for x in ['train', 'valid'] } criterion = nn.BCEWithLogitsLoss() precision = train(trial, model, dataloaders, criterion, optimizer, epochs, logger, is_inception=(model_name == "inception"), threshold=threshold) return precision
def get_params_from_arg(args): # === Save & load params === save_params, load_params = get_save_load_params_from_arg(args) # === Model params === model_func_params = get_model_func_params(args) vgg_emb_node, lstm_state_node, loss_node = [], [], [] def build_output(inputs, train, **kwargs): # res = {'loss': sseLoss, 'lstm_state': new_state, 'vgg_emb': vgg_emb} res = main_model.build_output(inputs, train, **model_func_params) outputs, logged_cfg, _vgg_emb, _lstm_state, _loss = res vgg_emb_node.append(_vgg_emb) lstm_state_node.append(_lstm_state) loss_node.append(_loss) return outputs, logged_cfg model_params = {'func': build_output} multi_gpu = len(args.gpu.split(',')) - args.gpu_offset if multi_gpu > 1: model_params['num_gpus'] = multi_gpu model_params['devices'] = ['/gpu:%i' \ % (idx + args.gpu_offset) \ for idx in range(multi_gpu)] # === Train params === if args.pure_test: train_params, loss_params = {}, {} learning_rate_params, optimizer_params = {}, {} else: # Data enumerator train_frame_dataset = FrameDataset(args.frame_root, args.meta_path, args.batch_size, args.num_frames, flip_frame=args.flip_frame, file_tmpl=args.file_tmpl, crop_size=args.crop_size, shuffle=args.shuffle) num_steps_per_epoch = train_frame_dataset.num_batch_per_epoch * args.num_frames train_frame_generator = train_frame_dataset.batch_of_frames_generator() train_frame_enumerator = [enumerate(train_frame_generator)] # Data params (defining input placeholders) train_data_param = get_train_data_param_from_arg(args) prev_emb_np, prev_state_np = [], [] start_step = [] # Train_loop def train_loop(sess, train_targets, num_minibatches=1, **params): global_step_vars = [v for v in tf.global_variables() \ if 'global_step' in v.name] assert len(global_step_vars) == 1 global_step = sess.run(global_step_vars[0]) # Record the starting step if len(start_step) == 0: start_step.append(global_step) curr_global_step = global_step - start_step[0] # Update the data_loader for each epoch if curr_global_step % num_steps_per_epoch == 0: print("====== Epoch {} ======".format( int(curr_global_step / num_steps_per_epoch))) if curr_global_step != 0: train_frame_enumerator.pop() train_frame_generator = train_frame_dataset.batch_of_frames_generator( ) train_frame_enumerator.append( enumerate(train_frame_generator)) # Initialization of prev_emb & prev_state # at the beginning of each batch if curr_global_step % args.num_frames == 0: print("--- Batch {} ---".format( int(curr_global_step / args.num_frames))) _, (image, index, step) = train_frame_enumerator[0].next() assert step == 0 assert len(prev_state_np) <= 1 if len(prev_state_np) == 1: prev_state_np.pop() np.random.seed(6) # Test my reimplementation prev_state_np.append(np.random.uniform(low=-0.5, high=0.5, \ size=(args.batch_size, 2*args.num_units))) assert len(prev_emb_np) <= 1 if len(prev_emb_np) == 1: prev_emb_np.pop() vgg_feed_dict = data.get_vgg_feeddict(image, index) # Try multi-gpu # prev_emb_np.append(sess.run(vgg_emb_node[0], feed_dict=vgg_feed_dict)) prev_emb_list = [] for vgg_emb_n in vgg_emb_node: prev_emb_list.append( sess.run(vgg_emb_n, feed_dict=vgg_feed_dict)) prev_emb_np.append(np.vstack(prev_emb_list)) # Normal train step # Get data from the enumerator _, (image, index, step) = train_frame_enumerator[0].next() assert curr_global_step % args.num_frames + 1 == step # Feed input data and run # TODO: Learning rate for adaptive learning feed_dict = data.get_feeddict(image, index, \ prev_emb_np[0], prev_state_np[0]) sess_res = sess.run(train_targets + loss_node + vgg_emb_node + lstm_state_node, feed_dict=feed_dict) _, vgg_emb_list, lstm_state_list = sess_res[ -3 * multi_gpu:-2 * multi_gpu], sess_res[-2 * multi_gpu:-multi_gpu], sess_res[ -multi_gpu:] # _ is the pred errors [bs] sess_res = [sess_res[0]] vgg_emb = np.vstack(vgg_emb_list) lstm_state = np.vstack(lstm_state_list) prev_emb_np[0], prev_state_np[0] = vgg_emb, lstm_state return sess_res train_params = { 'validate_first': False, 'data_params': train_data_param, 'queue_params': None, 'thres_loss': float('Inf'), 'num_steps': float('Inf'), 'train_loop': { 'func': train_loop }, } # === Loss, learning_rate & optimizer params === loss_params, learning_rate_params, optimizer_params \ = get_loss_lr_opt_params_from_arg(args) # === Validation params === if args.pure_train: validation_params = {} else: val_data_param = get_valid_data_param_from_arg(args) val_targets = {'func': valid_get_pred_error_func} valid_frame_dataset = FrameDataset(args.frame_root, args.test_meta_path, 1, None, flip_frame=args.flip_frame, file_tmpl=args.file_tmpl, crop_size=args.crop_size, shuffle=False) val_step_num = valid_frame_dataset.valid_num_step() valid_frame_generator = valid_frame_dataset.valid_single_frame_generator( ) valid_frame_enumerator = [enumerate(valid_frame_generator)] # val_counter = [0] is_new_video = [True] prev_emb_np, prev_state_np = [], [] def valid_loop(sess, target): # NOTE: only a batch size of 1 is supported for testing. # NOTE: multi-gpu is not supported # val_counter[0] += 1 """ # Only run testing for 1 epoch if val_counter[0] % val_step_num == 0: valid_frame_enumerator.pop() valid_frame_generator = valid_frame_dataset.valid_single_frame_generator() valid_frame_enumerator.append(enumerate(valid_frame_generator)) """ # Initialization of prev_emb & prev_state # at the beginning of each video if is_new_video[0]: _, (image, index, step, is_new_video[0]) = valid_frame_enumerator[0].next() assert step == 0 assert len(prev_state_np) <= 1 if len(prev_state_np) == 1: prev_state_np.pop() np.random.seed(6) # Test my reimplementation prev_state_np.append(np.random.uniform(low=-0.5, high=0.5, \ size=(1, 2*args.num_units))) assert len(prev_emb_np) <= 1 if len(prev_emb_np) == 1: prev_emb_np.pop() vgg_feed_dict = data.get_vgg_feeddict(image, index, name_prefix='VALID') prev_emb_np.append( sess.run(vgg_emb_node[0], feed_dict=vgg_feed_dict)) # Normal train step # Get data from the enumerator _, (image, index, step, is_new_video[0]) = valid_frame_enumerator[0].next() # Feed input data and run feed_dict = data.get_feeddict(image, index, prev_emb_np[0], prev_state_np[0], name_prefix='VALID') sess_res = sess.run([target] + vgg_emb_node + lstm_state_node, feed_dict=feed_dict) vgg_emb, lstm_state = sess_res[-2], sess_res[-1] sess_res = sess_res[0] prev_emb_np[0], prev_state_np[0] = vgg_emb, lstm_state return sess_res pred_error_val_param = { 'data_params': val_data_param, 'queue_params': None, 'targets': val_targets, 'num_steps': val_step_num, 'agg_func': final_agg_emb, 'online_agg_func': online_agg_emb, 'valid_loop': { 'func': valid_loop } } save_to_gfs = ['loss', 'index'] save_params['save_to_gfs'] = save_to_gfs validation_params = { 'pred_error': pred_error_val_param, } params = { 'save_params': save_params, 'load_params': load_params, 'model_params': model_params, 'train_params': train_params, 'loss_params': loss_params, 'learning_rate_params': learning_rate_params, 'optimizer_params': optimizer_params, 'log_device_placement': False, 'validation_params': validation_params, 'skip_check': True, } return params