示例#1
0
def get_activations(files, data_type, model, batch_size, size, length, dims, device):
	"""Calculates the activations of the pool_3 layer for all images.
	Params:
	-- files       : List of image files paths
	-- model       : Instance of inception model
	-- batch_size  : Batch size of images for the model to process at once.
					 Make sure that the number of samples is a multiple of
					 the batch size, otherwise some samples are ignored. This
					 behavior is retained to match the original FID score
					 implementation.
	-- dims        : Dimensionality of features returned by Inception
	-- device      : Device to run calculations
	Returns:
	-- A numpy array of dimension (num images, dims) that contains the
	   activations of the given tensor when feeding inception with the
	   query tensor.
	"""
	model.eval()

	if batch_size > len(files):
		print(('Warning: batch size is bigger than the data size. Setting batch size to data size'))
		batch_size = len(files)
	
	transform = torchvision.transforms.Compose([
		transforms_vid.ClipResize((size, size)),
		transforms_vid.ClipToTensor(),
		transforms_vid.ClipNormalize(mean=[114.7748, 107.7354, 99.4750], std=[1, 1, 1])]
	)

	if data_type == 'video':
		ds = VideoDataset(files, length, transform)
	elif data_type == 'frame':
		ds = FrameDataset(files, length, transform)
	else:
		raise NotImplementedError
	dl = torch.utils.data.DataLoader(ds, batch_size=batch_size, drop_last=False, num_workers=cpu_count())

	pred_arr = torch.zeros(len(files), dims).to(device)

	start_idx = 0

	for batch in tqdm(dl):

		batch = batch.to(device)

		with torch.no_grad():
			pred = model(batch)

		if pred.size(2) != 1 or pred.size(3) != 1 or pred.size(4) != 1:
			pred = adaptive_avg_pool3d(pred, output_size=(1, 1, 1))

		pred = pred.squeeze(4).squeeze(3).squeeze(2)
		pred_arr[start_idx:start_idx + pred.shape[0]] = pred
		start_idx = start_idx + pred.shape[0]

	pred_arr = pred_arr.cpu().numpy()

	return pred_arr
def main():
    config = get_args()
    config.device = torch.device(
        "cuda" if torch.cuda.is_available() else "cpu")

    # preprocess
    if config.preprocess_frames:
        preprocess.get_frames(config.train_vid, config.train_frames)
        preprocess.get_frames(config.test_vid, config.test_frames)

    if config.create_csv:
        train_speeds = preprocess.read_speed(config.train_speeds)
        preprocess.create_csv(config.train_frames, train_speeds,
                              config.csv_path)

    # dataset creation
    dataset = FrameDataset(config.csv_path, config.train_frames)
    train_set, val_set = random_split(dataset, [16320, 4080])

    # test set creation
    transform = transforms.Compose([
        transforms.Resize((66, 220)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    test_set = datasets.ImageFolder(config.test_frames, transform=transform)

    # model selection
    if config.model == 'simpleCNN':
        model = models.simpleCNN().to(config.device)
    elif config.model == 'ResNet':
        model = models.ResNet().to(config.device)

    # train/val/test
    if config.train:
        runner.train(config, model, train_set)
    elif config.val:
        runner.validate(config, model, val_set)
    elif config.test:
        runner.test(config, model, test_set)
示例#3
0
def predict_all():
    test_csv = "./notebooks/valid.csv.gz"
    model_name = 'vggresnet_ds'
    log_dir = f"/media/ngxbac/DATA/logs_omg/{model_name}/learnable_weight/"

    model = DeepBranchResnet(
        n_class=7,
        pretrained=
        "/media/ngxbac/DATA/logs_emotiw_temporal/feature_extractor/vggresnet/checkpoints/best.pth"
    )

    checkpoint = f"{log_dir}/checkpoints/best.pth"
    checkpoint = torch.load(checkpoint)
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device)

    # Dataset
    dataset = FrameDataset(csv_file=test_csv,
                           transform=valid_aug(224),
                           mode='test')

    loader = DataLoader(
        dataset=dataset,
        batch_size=32,
        shuffle=False,
        num_workers=4,
    )

    pred = predict(model, loader)

    # pred = np.asarray(pred).mean(axis=0)
    all_preds = np.argmax(pred, axis=1)
    df = pd.read_csv(test_csv)
    submission = df.copy()
    submission['EmotionMaxVote'] = all_preds.astype(int)
    os.makedirs("prediction", exist_ok=True)
    submission.to_csv(f'./prediction/{model_name}.csv',
                      index=False,
                      columns=['video', 'utterance', 'EmotionMaxVote'])
    np.save(f"./prediction/{model_name}.npy", pred)
示例#4
0
def main(args, logger):

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    np.random.seed(args.seed)

    feature_extract = args.feature_extract

    writer_dir = os.path.join(args.log, "tensorboard")
    # writer_dir = os.path.join('./2D/results/tensorboard/', args.name)
    if not os.path.exists(writer_dir):
        os.makedirs(writer_dir)
    writers = {
        x: SummaryWriter(log_dir=os.path.join(writer_dir, x))
        for x in ['train', 'valid']
    }

    model, input_size = initialize_model(args.model_name, 1, feature_extract)
    logger.info(f"{torch.cuda.device_count()} GPUs are being used.")
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model = model.to(device)
    cudnn.benchmark = True

    transform = transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

    video_paths = {
        x: os.path.join(args.video_path, x)
        for x in ['train', 'valid']
    }
    frame_paths = {
        x: os.path.join(args.frame_path, x)
        for x in ['train', 'valid']
    }
    label_paths = {
        x: os.path.join(args.label_path, f"{x}.csv")
        for x in ['train', 'valid']
    }
    calc_paths = {
        x: os.path.join(args.log, f"calc/{x}")
        for x in ['train', 'valid']
    }

    datasets = {
        x: FrameDataset(video_paths[x],
                        frame_paths[x],
                        label_paths[x],
                        calc_paths[x],
                        input_size,
                        transform=transform)
        for x in ['train', 'valid']
    }

    dataloaders = {
        x: torch.utils.data.DataLoader(datasets[x],
                                       batch_size=args.batch,
                                       num_workers=args.workers,
                                       pin_memory=True,
                                       shuffle=True)
        for x in ['train', 'valid']
    }

    params_to_update = model.parameters()
    logger.info("Params to learn:")
    if feature_extract:
        params_to_update = []
        for name, param in model.named_parameters():
            if param.requires_grad == True:
                params_to_update.append(param)
                logger.info(name)
    else:
        for name, param in model.named_parameters():
            if param.requires_grad == True:
                logger.info(name)
    optimizer = optim.SGD(params_to_update, lr=args.lr, momentum=0.7)
    # optimizer = optim.Adam(params_to_update, lr=0.001, weight_decay=0)

    start_epoch = 0
    model_save_dir = os.path.join(args.log, 'models')
    if os.path.exists(model_save_dir) and len(os.listdir(model_save_dir)) > 0:
        logger.debug("Load pretrained model...")
        latest_path = os.path.join(model_save_dir,
                                   sorted(os.listdir(model_save_dir))[-1])
        checkpoint = torch.load(latest_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch']

    criterion = nn.BCEWithLogitsLoss()

    train(model,
          dataloaders,
          criterion,
          optimizer,
          args.epochs,
          model_save_dir,
          args.interval,
          writers,
          start_epoch=start_epoch,
          is_inception=(args.model_name == "inception"),
          threshold=args.threshold)

    for x in ['train', 'valid']:
        writers[x].close()
示例#5
0
def main(args, logger):

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    np.random.seed(args.seed)

    model, input_size = initialize_model(args.model_name, 1)
    logger.info(f"{torch.cuda.device_count()} GPUs are being used.")
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model = model.to(device)
    cudnn.benchmark = True
    logger.debug("Load pretrained model...")
    checkpoint = torch.load(args.model_path)
    model.load_state_dict(checkpoint['model_state_dict'])

    transform = transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

    video_path = os.path.join(args.video_path, 'valid')
    frame_path = os.path.join(args.frame_path, 'valid')
    label_path = os.path.join(args.label_path, "valid.csv")
    calc_path = os.path.join(args.log, "calc/valid")

    dataset = FrameDataset(video_path,
                           frame_path,
                           label_path,
                           calc_path,
                           input_size,
                           transform=transform)

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             num_workers=args.workers,
                                             pin_memory=True,
                                             shuffle=False)

    model.eval()

    labels = torch.zeros(len(dataloader.dataset))
    outputs = torch.zeros(len(dataloader.dataset))
    for i, (inputs, label, vnames) in enumerate(tqdm(dataloader)):
        inputs = inputs.to(device)
        with torch.no_grad():
            output = model(inputs)
        labels[i] = label.data
        outputs[i] = output.data

    thresholds = np.arange(0.0, 1.05, 0.01)
    precisions = np.zeros(len(thresholds))
    recalls = np.zeros(len(thresholds))
    for i, threshold in enumerate(thresholds):
        preds = torch.sigmoid(outputs) >= threshold

        tp = torch.sum((preds.data == 1.0) & (labels.data == 1.0))
        tn = torch.sum((preds.data == 0.0) & (labels.data == 0.0))
        fp = torch.sum((preds.data == 1.0) & (labels.data == 0.0))
        fn = torch.sum((preds.data == 0.0) & (labels.data == 1.0))

        assert tp + fp + tn + fn == len(dataset)

        precision = tp.double() / (tp.double() + fp.double())
        precision = precision.to('cpu').detach().numpy().copy()
        recall = tp.double() / (tp.double() + fn.double())
        recall = recall.to('cpu').detach().numpy().copy()
        if np.isnan(precision):
            precision = 1.0
        precisions[i] = precision
        recalls[i] = recall

    print("-" * 20)
    print(precisions)
    print("-" * 20)
    print(recalls)

    plt.plot(recalls, precisions, marker='o')
    plt.xlabel("recall")
    plt.ylabel("precision")
    plt.xlim(0.0, 1.0)
    plt.ylim(0.0, 1.0)
    plt.grid(True)
    plt.savefig(os.path.join(args.log, "pr.png"))
示例#6
0
def objective(trial):

    name = 'optuna'
    video_path = "./data/"
    frame_path = "./2D/frame"
    label_path = "./label"
    log_path = os.path.join("./2D/results/", name)
    workers = 12
    model_name = "resnext"
    batch_size = 64
    seed = 31
    epochs = 10
    feature_extract = False

    if not os.path.exists(log_path):
        os.makedirs(log_path)
    logger = init_logger(f"{log_path}/result.log")

    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

    model, input_size = initialize_model(model_name, 1, feature_extract)
    logger.info(f"{torch.cuda.device_count()} GPUs are being used.")
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model = model.to(device)
    cudnn.benchmark = True

    optimizer_name = trial.suggest_categorical("optimizer",
                                               ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    threshold = trial.suggest_uniform("threshold", 0.1, 0.9)

    transform = transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

    video_paths = {x: os.path.join(video_path, x) for x in ['train', 'valid']}
    frame_paths = {x: os.path.join(frame_path, x) for x in ['train', 'valid']}
    label_paths = {
        x: os.path.join(label_path, f"{x}.csv")
        for x in ['train', 'valid']
    }
    calc_paths = {
        x: os.path.join(log_path, f"calc/{x}")
        for x in ['train', 'valid']
    }

    datasets = {
        x: FrameDataset(video_paths[x],
                        frame_paths[x],
                        label_paths[x],
                        calc_paths[x],
                        input_size,
                        transform=transform)
        for x in ['train', 'valid']
    }

    dataloaders = {
        x: torch.utils.data.DataLoader(datasets[x],
                                       batch_size=batch_size,
                                       num_workers=workers,
                                       pin_memory=True,
                                       shuffle=True)
        for x in ['train', 'valid']
    }

    criterion = nn.BCEWithLogitsLoss()

    precision = train(trial,
                      model,
                      dataloaders,
                      criterion,
                      optimizer,
                      epochs,
                      logger,
                      is_inception=(model_name == "inception"),
                      threshold=threshold)

    return precision
示例#7
0
def get_params_from_arg(args):

    # === Save & load params ===
    save_params, load_params = get_save_load_params_from_arg(args)

    # === Model params ===
    model_func_params = get_model_func_params(args)
    vgg_emb_node, lstm_state_node, loss_node = [], [], []

    def build_output(inputs, train, **kwargs):
        # res = {'loss': sseLoss, 'lstm_state': new_state, 'vgg_emb': vgg_emb}
        res = main_model.build_output(inputs, train, **model_func_params)
        outputs, logged_cfg, _vgg_emb, _lstm_state, _loss = res
        vgg_emb_node.append(_vgg_emb)
        lstm_state_node.append(_lstm_state)
        loss_node.append(_loss)
        return outputs, logged_cfg

    model_params = {'func': build_output}
    multi_gpu = len(args.gpu.split(',')) - args.gpu_offset
    if multi_gpu > 1:
        model_params['num_gpus'] = multi_gpu
        model_params['devices'] = ['/gpu:%i' \
                                   % (idx + args.gpu_offset) \
                                   for idx in range(multi_gpu)]

    # === Train params ===
    if args.pure_test:
        train_params, loss_params = {}, {}
        learning_rate_params, optimizer_params = {}, {}
    else:
        # Data enumerator
        train_frame_dataset = FrameDataset(args.frame_root,
                                           args.meta_path,
                                           args.batch_size,
                                           args.num_frames,
                                           flip_frame=args.flip_frame,
                                           file_tmpl=args.file_tmpl,
                                           crop_size=args.crop_size,
                                           shuffle=args.shuffle)
        num_steps_per_epoch = train_frame_dataset.num_batch_per_epoch * args.num_frames
        train_frame_generator = train_frame_dataset.batch_of_frames_generator()
        train_frame_enumerator = [enumerate(train_frame_generator)]

        # Data params (defining input placeholders)
        train_data_param = get_train_data_param_from_arg(args)

        prev_emb_np, prev_state_np = [], []
        start_step = []

        # Train_loop
        def train_loop(sess, train_targets, num_minibatches=1, **params):
            global_step_vars = [v for v in tf.global_variables() \
                                if 'global_step' in v.name]
            assert len(global_step_vars) == 1
            global_step = sess.run(global_step_vars[0])

            # Record the starting step
            if len(start_step) == 0:
                start_step.append(global_step)
            curr_global_step = global_step - start_step[0]

            # Update the data_loader for each epoch
            if curr_global_step % num_steps_per_epoch == 0:
                print("====== Epoch {} ======".format(
                    int(curr_global_step / num_steps_per_epoch)))
                if curr_global_step != 0:
                    train_frame_enumerator.pop()
                    train_frame_generator = train_frame_dataset.batch_of_frames_generator(
                    )
                    train_frame_enumerator.append(
                        enumerate(train_frame_generator))

            # Initialization of prev_emb & prev_state
            # at the beginning of each batch
            if curr_global_step % args.num_frames == 0:
                print("--- Batch {} ---".format(
                    int(curr_global_step / args.num_frames)))
                _, (image, index, step) = train_frame_enumerator[0].next()
                assert step == 0
                assert len(prev_state_np) <= 1
                if len(prev_state_np) == 1:
                    prev_state_np.pop()

                np.random.seed(6)  # Test my reimplementation
                prev_state_np.append(np.random.uniform(low=-0.5, high=0.5, \
                                    size=(args.batch_size, 2*args.num_units)))

                assert len(prev_emb_np) <= 1
                if len(prev_emb_np) == 1:
                    prev_emb_np.pop()
                vgg_feed_dict = data.get_vgg_feeddict(image, index)
                # Try multi-gpu
                # prev_emb_np.append(sess.run(vgg_emb_node[0], feed_dict=vgg_feed_dict))
                prev_emb_list = []
                for vgg_emb_n in vgg_emb_node:
                    prev_emb_list.append(
                        sess.run(vgg_emb_n, feed_dict=vgg_feed_dict))
                prev_emb_np.append(np.vstack(prev_emb_list))

            # Normal train step
            # Get data from the enumerator
            _, (image, index, step) = train_frame_enumerator[0].next()
            assert curr_global_step % args.num_frames + 1 == step
            # Feed input data and run
            # TODO: Learning rate for adaptive learning
            feed_dict = data.get_feeddict(image, index, \
                                          prev_emb_np[0], prev_state_np[0])
            sess_res = sess.run(train_targets + loss_node + vgg_emb_node +
                                lstm_state_node,
                                feed_dict=feed_dict)
            _, vgg_emb_list, lstm_state_list = sess_res[
                -3 * multi_gpu:-2 *
                multi_gpu], sess_res[-2 * multi_gpu:-multi_gpu], sess_res[
                    -multi_gpu:]  # _ is the pred errors [bs]
            sess_res = [sess_res[0]]
            vgg_emb = np.vstack(vgg_emb_list)
            lstm_state = np.vstack(lstm_state_list)
            prev_emb_np[0], prev_state_np[0] = vgg_emb, lstm_state
            return sess_res

        train_params = {
            'validate_first': False,
            'data_params': train_data_param,
            'queue_params': None,
            'thres_loss': float('Inf'),
            'num_steps': float('Inf'),
            'train_loop': {
                'func': train_loop
            },
        }

        # === Loss, learning_rate & optimizer params ===
        loss_params, learning_rate_params, optimizer_params \
            = get_loss_lr_opt_params_from_arg(args)

    # === Validation params ===
    if args.pure_train:
        validation_params = {}
    else:
        val_data_param = get_valid_data_param_from_arg(args)
        val_targets = {'func': valid_get_pred_error_func}

        valid_frame_dataset = FrameDataset(args.frame_root,
                                           args.test_meta_path,
                                           1,
                                           None,
                                           flip_frame=args.flip_frame,
                                           file_tmpl=args.file_tmpl,
                                           crop_size=args.crop_size,
                                           shuffle=False)
        val_step_num = valid_frame_dataset.valid_num_step()
        valid_frame_generator = valid_frame_dataset.valid_single_frame_generator(
        )
        valid_frame_enumerator = [enumerate(valid_frame_generator)]

        # val_counter = [0]
        is_new_video = [True]
        prev_emb_np, prev_state_np = [], []

        def valid_loop(sess, target):
            # NOTE: only a batch size of 1 is supported for testing.
            # NOTE: multi-gpu is not supported
            # val_counter[0] += 1
            """ 
            # Only run testing for 1 epoch
            if val_counter[0] % val_step_num == 0:
                valid_frame_enumerator.pop()
                valid_frame_generator = valid_frame_dataset.valid_single_frame_generator()
                valid_frame_enumerator.append(enumerate(valid_frame_generator))
            """

            # Initialization of prev_emb & prev_state
            # at the beginning of each video
            if is_new_video[0]:
                _, (image, index, step,
                    is_new_video[0]) = valid_frame_enumerator[0].next()
                assert step == 0
                assert len(prev_state_np) <= 1
                if len(prev_state_np) == 1:
                    prev_state_np.pop()

                np.random.seed(6)  # Test my reimplementation
                prev_state_np.append(np.random.uniform(low=-0.5, high=0.5, \
                                    size=(1, 2*args.num_units)))

                assert len(prev_emb_np) <= 1
                if len(prev_emb_np) == 1:
                    prev_emb_np.pop()
                vgg_feed_dict = data.get_vgg_feeddict(image,
                                                      index,
                                                      name_prefix='VALID')
                prev_emb_np.append(
                    sess.run(vgg_emb_node[0], feed_dict=vgg_feed_dict))

            # Normal train step
            # Get data from the enumerator
            _, (image, index, step,
                is_new_video[0]) = valid_frame_enumerator[0].next()
            # Feed input data and run
            feed_dict = data.get_feeddict(image,
                                          index,
                                          prev_emb_np[0],
                                          prev_state_np[0],
                                          name_prefix='VALID')
            sess_res = sess.run([target] + vgg_emb_node + lstm_state_node,
                                feed_dict=feed_dict)
            vgg_emb, lstm_state = sess_res[-2], sess_res[-1]
            sess_res = sess_res[0]
            prev_emb_np[0], prev_state_np[0] = vgg_emb, lstm_state
            return sess_res

        pred_error_val_param = {
            'data_params': val_data_param,
            'queue_params': None,
            'targets': val_targets,
            'num_steps': val_step_num,
            'agg_func': final_agg_emb,
            'online_agg_func': online_agg_emb,
            'valid_loop': {
                'func': valid_loop
            }
        }

        save_to_gfs = ['loss', 'index']
        save_params['save_to_gfs'] = save_to_gfs

        validation_params = {
            'pred_error': pred_error_val_param,
        }

    params = {
        'save_params': save_params,
        'load_params': load_params,
        'model_params': model_params,
        'train_params': train_params,
        'loss_params': loss_params,
        'learning_rate_params': learning_rate_params,
        'optimizer_params': optimizer_params,
        'log_device_placement': False,
        'validation_params': validation_params,
        'skip_check': True,
    }
    return params