示例#1
0
def main(argv):
  parser = argparse.ArgumentParser()
  parser.add_argument(
    'video_list',
    help = 'Input video list. Put path to video file on each line.')
  parser.add_argument(
    'output_dir',
    help = 'Output directory.')
  parser.add_argument(
    '--sample_rate',
    type = float,
    default = 5.0,
    help = 'Number of frames sampled per second')
  parser.add_argument(
    '--model_def',
    default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers_deploy.prototxt',
    help = 'Model definition file (default VGG16)')
  parser.add_argument(
    '--pretrained_model',
    default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers.caffemodel',
    help = 'Model parameter file (default VGG16)')
  parser.add_argument(
    '--layers',
    default = 'fc6,fc7',
    help = 'Layers to be extracted, separated by commas')
  parser.add_argument(
    '--cpu',
    action = 'store_true',
    help = 'Use CPU if set')
  parser.add_argument(
    '--oversample',
    action = 'store_true',
    help = 'Oversample 10 patches per frame if set')
  args = parser.parse_args()
  if args.cpu:
    caffe.set_mode_cpu()
    print 'CPU mode'
  else:
    caffe.set_mode_gpu()
    print 'GPU mode'
  oversample = False
  if args.oversample:
    oversample = True
  #feture extraction
  extractor = FeatExtractor(args.model_def, args.pretrained_model, oversample=oversample)
  blobs = args.layers.split(',')
  with open(args.video_list) as f:
    videos = [l.rstrip() for l in f]
  for video_file in videos:
    frames = load_video(video_file, args.sample_rate)
    if len(frames) < 1: # failed to open the video
      continue
    start = time.time()
    feats = extractor.extract_batch(frames, blobs)
    print '%s feature extracted in %f seconds.' % (os.path.basename(video_file), time.time()-start)
    # save the features
    for blob in blobs:
      feats[blob] = np.array(feats[blob])
    save_matrix(feats, os.path.join(args.output_dir, '%s.mat' % os.path.basename(video_file).split('.')[0]))
  return
示例#2
0
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'video_list',
        help='Input video list. Put path to video file on each line.')
    parser.add_argument('output_dir', help='Output directory.')
    parser.add_argument('--sample_rate',
                        type=float,
                        default=5.0,
                        help='Number of frames sampled per second')
    parser.add_argument(
        '--model_def',
        default=
        '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers_deploy.prototxt',
        help='Model definition file (default VGG16)')
    parser.add_argument(
        '--pretrained_model',
        default=
        '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers.caffemodel',
        help='Model parameter file (default VGG16)')
    parser.add_argument('--layers',
                        default='fc6,fc7',
                        help='Layers to be extracted, separated by commas')
    parser.add_argument('--cpu', action='store_true', help='Use CPU if set')
    parser.add_argument('--oversample',
                        action='store_true',
                        help='Oversample 10 patches per frame if set')
    args = parser.parse_args()
    if args.cpu:
        caffe.set_mode_cpu()
        print 'CPU mode'
    else:
        caffe.set_mode_gpu()
        print 'GPU mode'
    oversample = False
    if args.oversample:
        oversample = True
    extractor = FeatExtractor(args.model_def,
                              args.pretrained_model,
                              oversample=oversample)
    blobs = args.layers.split(',')
    with open(args.video_list) as f:
        videos = [l.rstrip() for l in f]
    for video_file in videos:
        frames = load_video(video_file, args.sample_rate)
        if len(frames) < 1:  # failed to open the video
            continue
        start = time.time()
        feats = extractor.extract_batch(frames, blobs)
        print '%s feature extracted in %f seconds.' % (
            os.path.basename(video_file), time.time() - start)
        # save the features
        for blob in blobs:
            feats[blob] = np.array(feats[blob])
        save_matrix(
            feats,
            os.path.join(args.output_dir, '%s.mat' %
                         os.path.basename(video_file).split('.')[0]))
    return
示例#3
0
def main(argv):
  parser = argparse.ArgumentParser()
  parser.add_argument(
    'video_list',
    help = 'Input video list. Put path to video file on each line.')
  parser.add_argument(
    'output_dir',
    help = 'Output directory.')
  parser.add_argument(
    '--sample_rate',
    type = float,
    default = 5.0,
    help = 'Number of frames sampled per second')
  parser.add_argument(
    '--crop_dim',
    type = float,
    default = 224,
    help = 'Crop dim as defined in the prototxt file. width==height==224(default)')
  parser.add_argument(
    '--model_def',
    default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers_deploy.prototxt',
    help = 'Model definition file (default VGG16)')
  parser.add_argument(
    '--pretrained_model',
    default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers.caffemodel',
    help = 'Model parameter file (default VGG16)')
  parser.add_argument(
    '--layers',
    default = 'fc6,fc7',
    help = 'Layers to be extracted, separated by commas')
  parser.add_argument(
    '--cpu',
    action = 'store_true',
    help = 'Use CPU if set')
  parser.add_argument(
    '--oversample',
    action = 'store_true',
    help = 'Oversample 10 patches per frame if set')
  parser.add_argument(
    '--gpu_id',
    default = 0,
    help = 'The GPU ID to use.')
    
    

  args = parser.parse_args()
  if args.cpu:
    caffe.set_mode_cpu()
    print 'CPU mode'
  else:
    caffe.set_device(int(args.gpu_id))
    caffe.set_mode_gpu()
    print 'GPU mode'
  oversample = False
  if args.oversample:
    oversample = True
  extractor = FeatExtractor(args.model_def, args.pretrained_model, oversample=oversample, crop_dim=args.crop_dim)
  blobs = args.layers.split(',')
  with open(args.video_list) as f:
    videos = [l.rstrip() for l in f]
  

  print "batch_size=" + str(extractor.batch_size)
  if(args.sample_rate > 0):
    print "The input list is a video file list"
  elif(args.sample_rate == 0):
    print "The input list is tar of key frames list"

  for video_file in videos:
    if(args.sample_rate > 0):
      frames = load_video(video_file, args.sample_rate)
    elif(args.sample_rate == 0):
      try:
        frames = load_keyframes_targz(video_file)
      except:
        continue
    
    if len(frames) < 1: # failed to open the video
      continue
    start = time.time()
    feats = extractor.extract_batch(frames, blobs)
    print '%s feature extracted in %f seconds.' % (os.path.basename(video_file), time.time()-start)
    
    out_vid_feats = []
    #average pooling and normalization
    for blob in blobs:
        featpooling = np.mean(feats[blob], axis=0)
        l2norm = LA.norm(featpooling, 2)
        featpooling = featpooling/l2norm
        sign_pnorm = np.sign(featpooling) * np.sqrt(np.abs(featpooling))    #signed component-wise norm p = 1/2
        out_vid_feats.append(sign_pnorm)
    
    vid_feat = np.hstack(out_vid_feats)/len(out_vid_feats)    #concatnate the layer output
    
    #convert to libsvm format
    libsvm = []
    for idx, val in enumerate(vid_feat):
        if val != 0:
            libsvm.append("{}:{:6f}".format(int(idx)+1,val))
    svmout = " ".join(libsvm)
    
    
    #write out as the bow file
    outfilename = os.path.join(args.output_dir, os.path.basename(video_file).split('.')[0]+".bow")
    f_out = open(outfilename, 'w')
    f_out.write(svmout)
    f_out.write("\n")
    f_out.close()
    
    #write out as the gzip
    #outfilename = os.path.join(args.output_dir, os.path.basename(video_file).split('.')[0]+".gz")
    #f_out = gzip.open(outfilename, 'wb')
    #f_out.writelines(svmout)
    #f_out.close()
    
    # save the features
    #for blob in blobs:
    #  feats[blob] = np.array(feats[blob])
    #save_matrix(feats, os.path.join(args.output_dir, '%s.mat' % os.path.basename(video_file).split('.')[0]))
  return
示例#4
0
def main():
    global device
    global data_parallel
    print("=> will save everthing to {}".format(args.output_dir))
    output_dir = Path(args.output_dir)
    output_dir.makedirs_p()

    # Data loading code
    train_transform = pose_transforms.Compose([
        pose_transforms.RandomHorizontalFlip(),
        pose_transforms.ArrayToTensor()
    ])
    valid_transform = pose_transforms.Compose(
        [pose_transforms.ArrayToTensor()])

    print("=> fetching sequences in '{}'".format(args.dataset_dir))
    dataset_dir = Path(args.dataset_dir)
    print("=> preparing train set")
    train_set = dataset()  #transform=train_transform)
    print("=> preparing val set")
    val_set = pose_framework_KITTI(dataset_dir,
                                   args.test_sequences,
                                   transform=valid_transform,
                                   seed=args.seed,
                                   shuffle=False)
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # create model
    odometry_net = PoseExpNet().to(device)
    depth_net = DispNetS().to(device)
    feat_extractor = FeatExtractor().to(device)

    # init weights of model
    if args.odometry is None:
        odometry_net.init_weights()
    elif args.odometry:
        weights = torch.load(args.odometry)
        odometry_net.load_state_dict(weights)
    if args.depth is None:
        depth_net.init_weights()
    elif args.depth:
        weights = torch.load(args.depth)
        depth_net.load_state_dict(weights['state_dict'])

    feat_extractor.init_weights()

    cudnn.benchmark = True
    if args.cuda and args.gpu_id in range(2):
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
    elif args.cuda:
        data_parallel = True
        odometry_net = torch.nn.DataParallel(odometry_net)
        depth_net = torch.nn.DataParallel(depth_net)
        feat_extractor = torch.nn.DataParallel(feat_extractor)

    optim_params = [{
        'params': odometry_net.parameters(),
        'lr': args.lr
    }, {
        'params': depth_net.parameters(),
        'lr': args.lr
    }, {
        'params': feat_extractor.parameters(),
        'lr': args.lr
    }]

    # optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum)
    optimizer = optim.Adam(optim_params,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=args.weight_decay)
    print("=> validating before training")
    #validate(odometry_net, depth_net, val_loader, 0, output_dir, True)
    print("=> training & validating")
    #validate(odometry_net, depth_net, val_loader, 0, output_dir)
    for epoch in range(1, args.epochs + 1):
        train(odometry_net, depth_net, feat_extractor, train_loader, epoch,
              optimizer)
        validate(odometry_net, depth_net, feat_extractor, val_loader, epoch,
                 output_dir)