Exemplo n.º 1
0
def generate_C2D_model(opt):
    if opt.c2d_model_name == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif opt.c2d_model_name == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif opt.c2d_model_name == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif opt.c2d_model_name == 'inceptionresnetv2':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionresnetv2(num_classes=1000, pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    model.last_linear = utils.Identity()

    if not opt.no_cuda:
        model = model.to(opt.device)

    return load_image_fn, model, (C, H, W)
Exemplo n.º 2
0
def extract_feats(args):
    params = args
    if params['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    elif params['model'] == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)

    else:
        print("doesn't support %s" % (params['model']))

    model.last_linear = utils.Identity()
    model = nn.DataParallel(model)
    model = model.cuda()
    prepro_feats.extract_feats(params, model, load_image_fn)
Exemplo n.º 3
0
def extract_image_feats(video_path):
    hasilPred.configure(text="Membuat Prediksi....")
    model = resnet152(pretrained='imagenet')
    model = model.cuda()
    model.last_linear = utils.Identity()
    model.eval()
    C, H, W = 3, 224, 224
    load_image_fn = utils.LoadTransformImage(model)
    dst = os.path.join(video_path.split('\\')[0], 'info')
    if os.path.exists(dst):
        print(" Menghapus Direktori: " + dst + "\\")
        shutil.rmtree(dst)
    os.makedirs(dst)
    with open(os.devnull, "w") as ffmpeg_log:
        command = 'ffmpeg -i ' + video_path + ' -vf scale=400:300 ' + '-qscale:v 2 ' + '{0}/%06d.jpg'.format(
            dst)
        subprocess.call(command,
                        shell=True,
                        stdout=ffmpeg_log,
                        stderr=ffmpeg_log)
    list_image = sorted(glob.glob(os.path.join(dst, '*.jpg')))
    samples = np.round(np.linspace(0, len(list_image) - 1, 80))
    list_image = [list_image[int(sample)] for sample in samples]
    images = torch.zeros((len(list_image), C, H, W))
    for i in range(len(list_image)):
        img = load_image_fn(list_image[i])
        images[i] = img
    with torch.no_grad():
        image_feats = model(images.cuda().squeeze())
    image_feats = image_feats.cpu().numpy()
    for file in os.listdir(dst):
        if file.endswith('.jpg'):
            os.remove(os.path.join(dst, file))

    return image_feats
def extract_image_feats(video_path):
    print('extracting image features...')
    model = resnet152(pretrained='imagenet')
    model = model.cuda()
    # model = nn.DataParallel(model)
    model.last_linear = utils.Identity()
    model.eval()
    C, H, W = 3, 224, 224
    load_image_fn = utils.LoadTransformImage(model)
    dst = os.path.join(video_path.split('/')[0], 'info')
    with open(os.devnull, "w") as ffmpeg_log:
        command = 'ffmpeg -i ' + video_path + ' -vf scale=400:300 ' + '-qscale:v 2 '+ '{0}/%06d.jpg'.format(dst)
        subprocess.call(command, shell=True, stdout=ffmpeg_log, stderr=ffmpeg_log)
    image_list = sorted(glob.glob(os.path.join(dst, '*.jpg')))
    samples = np.round(np.linspace(0, len(image_list) - 1, 80))
    image_list = [image_list[int(sample)] for sample in samples]
    images = torch.zeros((len(image_list), C, H, W))
    for i in range(len(image_list)):
        img = load_image_fn(image_list[i])
        images[i] = img
    with torch.no_grad():
        image_feats = model(images.cuda().squeeze())
    image_feats = image_feats.cpu().numpy()
    for file in os.listdir(dst):
        if file.endswith('.jpg'):
            os.remove(os.path.join(dst, file))

    return image_feats
Exemplo n.º 5
0
def fix_frame_extract(frame_path, feats_path, frames_num, model, video_name):
    # load model
    C, H, W = 3, 224, 224
    if model == 'resnet152':
        model = pretrainedmodels.resnet152(pretrained='imagenet')
    elif model == 'vgg16':
        model = pretrainedmodels.vgg16(pretrained='imagenet')
    elif model == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(pretrained='imagenet')
    model.last_linear = utils.Identity()
    model = model.to(device)
    model.eval()
    load_image_fn = utils.LoadTransformImage(model)

    # load data
    img_list = sorted(frame_path.glob('*.jpg'))
    # get index
    samples_ix = np.linspace(0, len(img_list) - 1, frames_num).astype(int)
    img_list = [img_list[i] for i in samples_ix]
    # build tensor
    imgs = torch.zeros([len(img_list), C, H, W])
    for i in range(len(img_list)):
        img = load_image_fn(img_list[i])
        imgs[i] = img
    imgs = imgs.to(device)
    with torch.no_grad():
        feats = model(imgs)
    feats = feats.cpu().numpy()
    # save
    np.save(os.path.join(feats_path, video_name + ".npy"), feats)
def generate_2D_model(opt):
    if opt['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'vgg16':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.vgg16(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'vgg19':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.vgg19(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'resnet50':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet50(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'resnet101':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet101(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'nasnet':
        C, H, W = 3, 331, 331
        model = pretrainedmodels.nasnetalarge(num_classes=1001,
                                              pretrained='imagenet+background')
        load_image_fn = utils.LoadTransformImage(model)
    else:
        print("doesn't support %s" % (opt['model']))

    model.last_linear = utils.Identity()
    model = nn.DataParallel(model)
    # if opt['saved_model'] != '':
    #     model.load_state_dict(torch.load(opt['saved_model']), strict=False)
    model = model.cuda()
    return model
Exemplo n.º 7
0
def extract_feats(frame_path, feats_path, interval, model, video_name):
    """
    extract feature from frames of one video
    :param video_name:
    :param model: name of model
    :param frame_path: path of frames
    :param feats_path: path to store results
    :param interval: (str) The interval when extract frames from videos
    :return: None
    """
    # load model
    C, H, W = 3, 224, 224
    if model == 'resnet152':
        model = pretrainedmodels.resnet152(pretrained='imagenet')
    elif model == 'vgg16':
        model = pretrainedmodels.vgg16(pretrained='imagenet')
    elif model == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(pretrained='imagenet')
    model.last_linear = utils.Identity()
    model = model.to(device)
    model.eval()
    load_image_fn = utils.LoadTransformImage(model)

    # load data
    img_list = sorted(frame_path.glob('*.jpg'))
    # get index
    samples_ix = np.arange(0, len(img_list), interval)
    img_list = [img_list[int(i)] for i in samples_ix]
    # build tensor
    imgs = torch.zeros([len(img_list), C, H, W])
    for i in range(len(img_list)):
        img = load_image_fn(img_list[i])
        imgs[i] = img
    imgs = imgs.to(device)
    with torch.no_grad():
        feats = model(imgs)
    feats = feats.cpu().numpy()
    # save
    np.save(os.path.join(feats_path, video_name + ".npy"), feats)
Exemplo n.º 8
0
def main(args):
    global C, H, W
    coco_labels = json.load(open(args.coco_labels))
    num_classes = coco_labels['num_classes']
    if args.model == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')

    elif args.model == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')

    elif args.model == 'inception_v4':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv4(num_classes=1000,
                                             pretrained='imagenet')

    else:
        print("doesn't support %s" % (args['model']))

    load_image_fn = utils.LoadTransformImage(model)
    dim_feats = model.last_linear.in_features
    model = MILModel(model, dim_feats, num_classes)
    model = model.cuda()
    dataset = CocoDataset(coco_labels)
    dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)
    exp_lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=args.learning_rate_decay_every,
        gamma=args.learning_rate_decay_rate)

    crit = nn.MultiLabelSoftMarginLoss()
    if not os.path.isdir(args.checkpoint_path):
        os.mkdir(args.checkpoint_path)
    train(dataloader, model, crit, optimizer, exp_lr_scheduler, load_image_fn,
          args)
    elif params['model'] == 'resnet34':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet34(pretrained='imagenet')
    elif params['model'] == 'inceptionresnetv2':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionresnetv2(
            num_classes=1001, pretrained='imagenet+background')
    elif params['model'] == 'googlenet':
        C, H, W = 3, 224, 224
        model = googlenet(pretrained=True)
        print(model)
    else:
        print("doesn't support %s" % (params['model']))

    if params['model'] != 'googlenet':
        load_image_fn = utils.LoadTransformImage(model)
        model.last_linear = utils.Identity()
    else:
        load_image_fn = google_load()

    model = model.cuda()

    #summary(model, (C, H, W))
    if params['test_latency']:
        test_latency(params, model, load_image_fn, C, H, W)
    else:
        extract_feats(params, model, load_image_fn, C, H, W)
'''
python extract_image_feats_from_frames.py \
--frame_path "/home/yangbang/VideoCaptioning/MSRVTT/all_frames/" \
--feat_path "/home/yangbang/VideoCaptioning/MSRVTT/feats/" \
Exemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--video_dir',
        type=str,
        default='../msrvtt_2017/train-video',
        help='The video dir that one would like to extract audio file from')
    parser.add_argument('--output_dir',
                        type=str,
                        default='../msrvtt_2017/preprocessed',
                        help='The file output directory')
    parser.add_argument(
        '--output_channels',
        type=int,
        default=1,
        help='The number of output audio channels, default to 1')
    parser.add_argument(
        '--output_frequency',
        type=int,
        default=16000,
        help='The output audio frequency in Hz, default to 16000')
    parser.add_argument(
        '--band_width',
        type=int,
        default=160,
        help=
        'Bandwidth specified to sample the audio (unit in kbps), default to 160'
    )
    parser.add_argument(
        '--model',
        type=str,
        default='resnet152',
        help=
        'The pretrained model to use for extracting image features, default to resnet152'
    )
    parser.add_argument('--gpu',
                        type=str,
                        default='0',
                        help='The CUDA_VISIBLE_DEVICES argument, default to 0')
    parser.add_argument(
        '--n_frame_steps',
        type=int,
        default=80,
        help='The number of frames to extract from a single video')
    opt = parser.parse_args()
    opt = vars(opt)

    if not os.path.exists(opt['output_dir']):
        os.mkdir(opt['output_dir'])
    vToA(opt)
    split_audio(opt)
    print('cleaning up original .wav files...')
    dir = opt['output_dir']
    dir = os.listdir(dir)
    for file in dir:
        if file.endswith('.wav'):
            os.remove(os.path.join(opt['output_dir'], file))

    os.environ['CUDA_VISIBLE_DEVICES'] = opt['gpu']
    if opt['model'] == 'resnet152':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.resnet152(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'inception_v3':
        C, H, W = 3, 299, 299
        model = pretrainedmodels.inceptionv3(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    elif opt['model'] == 'vgg16':
        C, H, W = 3, 224, 224
        model = pretrainedmodels.vgg16(pretrained='imagenet')
        load_image_fn = utils.LoadTransformImage(model)
    else:
        print('The image model is not supported')

    model.last_linear = utils.Identity()
    model = nn.DataParallel(model)

    model = model.cuda()
    extract_image_feats(opt, model, load_image_fn)