예제 #1
0
def do_predict(test_video_emd,
               test_video_list,
               cand_video_emd,
               cand_video_list,
               rel_index=None,
               n=5,
               output_dir=None,
               overwrite=0,
               no_imgnorm=False):

    if no_imgnorm:
        scores = cal_score(test_video_emd, cand_video_emd, measure='cosine')
    else:
        scores = cal_score(test_video_emd, cand_video_emd, measure='dot')

    video2predrank = score2result(scores, test_video_list, cand_video_list,
                                  rel_index, n)

    if output_dir is not None:
        output_file = os.path.join(output_dir, 'pred_scores_matrix.pth.tar')
        if checkToSkip(output_file, overwrite):
            sys.exit(0)
        makedirsforfile(output_file)
        torch.save(
            {
                'scores': scores,
                'test_videos': test_video_list,
                'cand_videos': cand_video_list
            }, output_file)
        print("write score matrix into: " + output_file)

    return video2predrank
예제 #2
0
def process(options, collection, feat_name):
    overwrite = options.overwrite
    rootpath = options.rootpath

    feature_dir = os.path.join(rootpath, collection, 'feature')
    resdir = os.path.join(rootpath, collection, 'FeatureData', feat_name)

    train_csv = os.path.join(rootpath, collection, 'split', 'train.csv')
    val_csv = os.path.join(rootpath, collection, 'split', 'val.csv')
    test_csv = os.path.join(rootpath, collection, 'split', 'test.csv')

    train_val_test_set = []
    train_val_test_set.extend(map(str.strip, open(train_csv).readlines()))
    train_val_test_set.extend(map(str.strip, open(val_csv).readlines()))
    train_val_test_set.extend(map(str.strip, open(test_csv).readlines()))

    target_feat_file = os.path.join(resdir, 'id.feature.txt')
    if checkToSkip(os.path.join(resdir, 'feature.bin'), overwrite):
        sys.exit(0)
    makedirsforfile(target_feat_file)

    frame_count = []
    print 'Processing %s - %s' % (collection, feat_name)
    with open(target_feat_file, 'w') as fw_feat:
        progbar = Progbar(len(train_val_test_set))
        for d in train_val_test_set:
            feat_file = os.path.join(feature_dir, d,
                                     '%s-%s.npy' % (d, feat_name))
            feats = np.load(feat_file)
            if len(feats.shape) == 1:  # video level feature
                dim = feats.shape[0]
                fw_feat.write('%s %s\n' %
                              (d, ' '.join(['%.6f' % x for x in feats])))
            elif len(feats.shape) == 2:  # frame level feature
                frames, dim = feats.shape
                frame_count.append(frames)
                for i in range(frames):
                    frame_id = d + '_' + str(i)
                    fw_feat.write(
                        '%s %s\n' %
                        (frame_id, ' '.join(['%.6f' % x for x in feats[i]])))
            progbar.add(1)

    text2bin(dim, [target_feat_file], resdir, 1)
    os.system('rm %s' % target_feat_file)
예제 #3
0
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument("--rootpath",
                        default=ROOT_PATH,
                        type=str,
                        help="rootpath (default: %s)" % ROOT_PATH)
    parser.add_argument("--overwrite",
                        default=0,
                        type=int,
                        help="overwrite existing file (default: 0)")
    parser.add_argument('--collection',
                        default='track_1_shows',
                        type=str,
                        help='collection')

    parser.add_argument('--feature',
                        default='inception-pool3',
                        type=str,
                        help="video feature.")
    parser.add_argument('--embed_size',
                        default=1024,
                        type=int,
                        help='Dimensionality of the video embedding.')

    parser.add_argument('--loss',
                        default='mrl',
                        type=str,
                        help='loss function.')
    parser.add_argument("--cost_style",
                        default='sum',
                        type=str,
                        help="cost_style (sum|mean)")
    parser.add_argument('--max_violation',
                        action='store_true',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--margin',
                        default=0.2,
                        type=float,
                        help='Rank loss margin.')
    parser.add_argument('--grad_clip',
                        default=2.,
                        type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--optimizer',
                        default='adam',
                        type=str,
                        help='optimizer. (adam|rmsprop)')
    parser.add_argument('--learning_rate',
                        default=.001,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_decay',
                        default=0.99,
                        type=float,
                        help='learning rate decay after each epoch')

    parser.add_argument('--num_epochs',
                        default=50,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=32,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--workers',
                        default=2,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=100,
                        type=int,
                        help='Number of steps to print and record the log.')

    parser.add_argument('--measure',
                        default='cosine',
                        help='Similarity measure used (cosine|order)')
    parser.add_argument('--no_imgnorm',
                        action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--postfix', default='run_0', type=str, help='')

    # augmentation for frame-level features
    parser.add_argument(
        '--stride',
        default='1',
        type=str,
        help='stride=1 means no frame-level data augmentation (default: 1)')
    # augmentation for video-level features
    parser.add_argument(
        '--aug_prob',
        default=0.0,
        type=float,
        help=
        'aug_prob=0 means no frame-level data augmentation, aug_prob=0.5 means half of video use augmented features(default: 0.0)'
    )
    parser.add_argument(
        '--perturb_intensity',
        default=1.0,
        type=float,
        help='perturbation intensity, epsilon  in Eq.2 (default: 1.0)')
    parser.add_argument(
        '--perturb_prob',
        default=0.5,
        type=float,
        help='perturbation probability, p in Eq.2 (default: 0.5)')

    opt = parser.parse_args()
    print json.dumps(vars(opt), indent=2)

    visual_info = 'feature_%s_embed_size_%d_no_imgnorm_%s' % (
        opt.feature, opt.embed_size, opt.no_imgnorm)
    loss_info = '%s_%s_margin_%.1f_max_violation_%s_%s' % (
        opt.loss, opt.measure, opt.margin, opt.max_violation, opt.cost_style)
    optimizer_info = '%s_lr_%.5f_%.2f_bs_%d' % (
        opt.optimizer, opt.learning_rate, opt.lr_decay, opt.batch_size)
    data_argumentation_info = 'frame_stride_%s_video_prob_%.1f_perturb_intensity_%.5f_perturb_prob_%.2f' % (
        opt.stride, opt.aug_prob, opt.perturb_intensity, opt.perturb_prob)

    opt.logger_name = os.path.join(opt.rootpath, opt.collection, 'cv',
                                   'ReLearning', visual_info, loss_info,
                                   optimizer_info, data_argumentation_info,
                                   opt.postfix)
    if checkToSkip(os.path.join(opt.logger_name, 'model_best.pth.tar'),
                   opt.overwrite):
        sys.exit(0)
    if checkToSkip(os.path.join(opt.logger_name, 'val_perf.txt'),
                   opt.overwrite):
        sys.exit(0)
    makedirsforfile(os.path.join(opt.logger_name, 'model_best.pth.tar'))

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # reading data
    train_video_set_file = os.path.join(opt.rootpath, opt.collection, 'split',
                                        'train.csv')
    val_video_set_file = os.path.join(opt.rootpath, opt.collection, 'split',
                                      'val.csv')
    train_video_list = read_video_set(train_video_set_file)
    val_video_list = read_video_set(val_video_set_file)

    train_rootpath = os.path.join(opt.rootpath, opt.collection,
                                  'relevance_train.csv')
    val_rootpath = os.path.join(opt.rootpath, opt.collection,
                                'relevance_val.csv')
    val_video2gtrank = read_csv_to_dict(val_rootpath)

    stride_list = map(int, opt.stride.strip().split('-'))
    opt.sum_subs = sum(stride_list)
    if opt.aug_prob <= 0:
        opt.feature = "avg-" + opt.feature + "-stride%s" % opt.stride

    video_feat_path = os.path.join(opt.rootpath, opt.collection, 'FeatureData',
                                   opt.feature)
    video_feats = BigFile(video_feat_path)
    opt.feature_dim = video_feats.ndims

    # Load data loaders
    if opt.sum_subs > 1:
        video2subvideo_path = os.path.join(video_feat_path,
                                           'video2subvideo.txt')
        video2subvideo = read_dict(video2subvideo_path)
        train_loader = data.get_video_da_loader(train_rootpath,
                                                video_feats,
                                                opt,
                                                opt.batch_size,
                                                True,
                                                opt.workers,
                                                video2subvideo,
                                                opt.sum_subs,
                                                feat_path=video_feat_path)
    else:
        train_loader = data.get_video_da_loader(train_rootpath,
                                                video_feats,
                                                opt,
                                                opt.batch_size,
                                                True,
                                                opt.workers,
                                                feat_path=video_feat_path)
    val_feat_loader = data.get_feat_loader(val_video_list, video_feats,
                                           opt.batch_size, False, 1)
    cand_feat_loader = data.get_feat_loader(train_video_list + val_video_list,
                                            video_feats, opt.batch_size, False,
                                            1)

    # Construct the model
    model = ReLearning(opt)

    # Train the Model
    best_rsum = 0
    best_hit_k_scores = 0
    best_recall_K_scoress = 0
    no_impr_counter = 0
    lr_counter = 0
    fout_val_perf_hist = open(
        os.path.join(opt.logger_name, 'val_perf_hist.txt'), 'w')

    for epoch in range(opt.num_epochs):

        # train for one epoch
        print "\nEpoch: ", epoch + 1
        print "learning rate: ", get_learning_rate(model.optimizer)
        train(opt, train_loader, model, epoch)

        # evaluate on validation set
        rsum, hit_k_scores, recall_K_scores = validate(val_feat_loader,
                                                       cand_feat_loader,
                                                       model,
                                                       val_video2gtrank,
                                                       log_step=opt.log_step,
                                                       opt=opt)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        if is_best:
            best_hit_k_scores = hit_k_scores
            best_recall_K_scoress = recall_K_scores
        print 'current perf: ', rsum
        print 'best perf: ', best_rsum
        print 'current hit_top_k: ', [round(x, 3) for x in hit_k_scores]
        print 'current recall_top_k: ', [round(x, 3) for x in recall_K_scores]
        fout_val_perf_hist.write("epoch_%d %f\n" % (epoch, rsum))
        fout_val_perf_hist.flush()

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            filename='checkpoint_epoch_%s.pth.tar' % epoch,
            prefix=opt.logger_name + '/')

        lr_counter += 1
        decay_learning_rate(opt, model.optimizer, opt.lr_decay)
        if not is_best:
            # Early stop occurs if the validation performance
            # does not improve in ten consecutive epochs.
            no_impr_counter += 1
            if no_impr_counter > 10:
                print("Early stopping happened")
                break

            # when the validation performance has decreased after an epoch,
            # we divide the learning rate by 2 and continue training;
            # but we use each learning rate for at least 3 epochs
            if lr_counter > 2:
                decay_learning_rate(opt, model.optimizer, 0.5)
                lr_counter = 0
        else:
            # lr_counter = 0
            no_impr_counter = 0

    fout_val_perf_hist.close()
    # output val performance
    print json.dumps(vars(opt), indent=2)
    print '\nbest performance on validation:'
    print 'hit_top_k', [round(x, 3) for x in best_hit_k_scores]
    print 'recall_top_k', [round(x, 3) for x in best_recall_K_scoress]
    with open(os.path.join(opt.logger_name, 'val_perf.txt'), 'w') as fout:
        fout.write('best performance on validation:')
        fout.write('\nhit_top_k: ' +
                   ", ".join(map(str, [round(x, 3)
                                       for x in best_hit_k_scores])))
        fout.write(
            '\necall_top_k: ' +
            ", ".join(map(str, [round(x, 3) for x in best_recall_K_scoress])))

    # generate and run the shell script for test
    templete = ''.join(open('TEMPLATE_eval.sh').readlines())
    striptStr = templete.replace('@@@rootpath@@@', opt.rootpath)
    striptStr = striptStr.replace('@@@collection@@@', opt.collection)
    striptStr = striptStr.replace('@@@overwrite@@@', str(opt.overwrite))
    striptStr = striptStr.replace('@@@model_path@@@', opt.logger_name)

    runfile = 'do_eval_%s.sh' % opt.collection
    open(runfile, 'w').write(striptStr + '\n')
    os.system('chmod +x %s' % runfile)
    os.system('./%s' % runfile)
예제 #4
0
파일: test.py 프로젝트: kiminh/cbvr
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument("--rootpath", default=ROOT_PATH, type=str, help="rootpath (default: %s)" % ROOT_PATH)
    parser.add_argument('--collection', default='track_1_shows', type=str, help='collection')
    parser.add_argument('--checkpoint_path', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
    parser.add_argument("--test_set", default="val", type=str, help="val or test")
    parser.add_argument('--batch_size', default=128, type=int, help='Size of a training mini-batch.')
    parser.add_argument("--overwrite", default=0, type=int,  help="overwrite existing file (default: 0)")

    opt = parser.parse_args()
    print json.dumps(vars(opt), indent = 2)


    assert opt.test_set in ['val', 'test']
    output_dir = os.path.dirname(opt.checkpoint_path.replace('/cv/', '/results/%s/' % opt.test_set ))
    output_file = os.path.join(output_dir,'pred_video2rank.csv')
    if checkToSkip(output_file, opt.overwrite):
        sys.exit(0)
    makedirsforfile(output_file)


    # reading data
    train_video_set_file = os.path.join(opt.rootpath, opt.collection, 'split', 'train.csv')
    val_video_set_file = os.path.join(opt.rootpath, opt.collection, 'split', 'val.csv')
    train_video_list = read_video_set(train_video_set_file)
    val_video_list = read_video_set(val_video_set_file)
    if opt.test_set ==  'test':
        test_video_set_file = os.path.join(opt.rootpath, opt.collection, 'split', 'test.csv' )
        test_video_list = read_video_set(test_video_set_file)


    # optionally resume from a checkpoint
    print("=> loading checkpoint '{}'".format(opt.checkpoint_path))
    checkpoint = torch.load(opt.checkpoint_path)
    options = checkpoint['opt']

    # set feature reader
    video_feat_path = os.path.join(opt.rootpath, opt.collection, 'FeatureData', options.feature)
    video_feats = BigFile(video_feat_path)

 
    # Construct the model
    if opt.test_set == 'val':
        val_rootpath = os.path.join(opt.rootpath, opt.collection, 'relevance_val.csv')
        val_video2gtrank = read_csv_to_dict(val_rootpath)
        val_feat_loader = data.get_feat_loader(val_video_list, video_feats, opt.batch_size, False, 1)
        cand_feat_loader = data.get_feat_loader(train_video_list + val_video_list, video_feats, opt.batch_size, False, 1)
    elif opt.test_set == 'test':
        val_feat_loader = data.get_feat_loader(test_video_list, video_feats, opt.batch_size, False, 1)
        cand_feat_loader = data.get_feat_loader(train_video_list + val_video_list + test_video_list, video_feats, opt.batch_size, False, 1)
    
    model = ReLearning(options)
    model.load_state_dict(checkpoint['model'])
    val_video_embs, val_video_ids_list = encode_data(model, val_feat_loader, options.log_step, logging.info)
    cand_video_embs, cand_video_ids_list = encode_data(model, cand_feat_loader, options.log_step, logging.info)


    video2predrank = do_predict(val_video_embs, val_video_ids_list, cand_video_embs, cand_video_ids_list, output_dir=output_dir, overwrite=1, no_imgnorm=options.no_imgnorm)
    write_csv_video2rank(output_file, video2predrank)

    if opt.test_set ==  'val':
        hit_top_k = [5, 10, 20, 30]
        recall_top_k = [50, 100, 200, 300]
        hit_k_scores = hit_k_own(val_video2gtrank, video2predrank, top_k=hit_top_k)
        recall_K_scores = recall_k_own(val_video2gtrank, video2predrank, top_k=recall_top_k)

        # output val performance
        print '\nbest performance on validation:'
        print 'hit_top_k', [round(x,3) for x in hit_k_scores]
        print 'recall_top_k', [round(x,3) for x in recall_K_scores]
        with open(os.path.join(output_dir,'perf.txt'), 'w') as fout:
            fout.write('best performance on validation:')
            fout.write('\nhit_top_k: ' + ", ".join(map(str, [round(x,3) for x in hit_k_scores])))
            fout.write('\necall_top_k: ' + ", ".join(map(str, [round(x,3) for x in recall_K_scores])))
예제 #5
0
def process(opt):

    rootpath = opt.rootpath
    collection = opt.collection
    feature = opt.feature
    stride = opt.stride
    overwrite = opt.overwrite
    pooling_style = opt.pooling_style


    feat_path = os.path.join(rootpath, collection, "FeatureData", feature)

    output_dir = os.path.join(rootpath, collection, "FeatureData", '%s-' % pooling_style + feature + "-stride%s" %  stride)
    feat_combined_file = os.path.join(output_dir, "id_feat.txt")
    if checkToSkip(os.path.join(output_dir, "feature.bin"), overwrite):
        sys.exit(0)
    makedirsforfile(feat_combined_file)

    print "Generate augmented frame-level features and operate mean pooling..."

    feat_data = BigFile(feat_path)
    video2fmnos = {}
    for frame_id in feat_data.names:
        data = frame_id.strip().split("_")
        video_id = '_'.join(data[:-1])
        fm_no = data[-1]
        video2fmnos.setdefault(video_id, []).append(int(fm_no))

    video2frames = {}
    for video_id, fmnos in video2fmnos.iteritems():
        for fm_no in sorted(fmnos):
            video2frames.setdefault(video_id, []).append(video_id + "_" + str(fm_no))
    

    stride = map(int, stride.strip().split('-'))
    f_auger = Frame_Level_Augmenter(stride)

    video2subvideo = {}
    fout = open(feat_combined_file, 'w')
    progbar = Progbar(len(video2frames))
    for video in video2frames:
        frame_ids = video2frames[video]

        # output the while video level feature
        video2subvideo.setdefault(video, []).append(video)
        reanme, feats  = feat_data.read(frame_ids)
        if pooling_style == 'avg':
            feat_vec = np.array(feats).mean(axis=0)
        elif pooling_style == 'max':
            feat_vec = np.array(feats).max(axis=0)
        fout.write(video + " " + " ".join(map(str,feat_vec)) + '\n')

    
        # output the sub video level feature
        counter = 0
        aug_index = f_auger.get_aug_index(len(frame_ids))  # get augmented frame list
        for sub_index in aug_index:
            sub_frames = [frame_ids[idx] for idx in sub_index]
            reanme, sub_feats  = feat_data.read(sub_frames)
            
            if pooling_style == 'avg':
                feat_vec = np.array(sub_feats).mean(axis=0)
            elif pooling_style == 'max':
                feat_vec = np.array(sub_feats).max(axis=0)

            video2subvideo.setdefault(video, []).append(video + "_sub%d" % counter)
            fout.write(video + "_sub%d" % counter + " " + " ".join(map(str,feat_vec)) + '\n')
            counter += 1
        progbar.add(1)

    fout.close()

    f = open(os.path.join(output_dir, "video2subvideo.txt"),'w')  
    f.write(str(video2subvideo))  
    f.close()  

    text2bin(len(feat_vec), [feat_combined_file], output_dir, 1)
    os.system('rm %s' % feat_combined_file)