Python path_from_tid 예제들, utils.path_from_tid Python 예제들

예제 #1

0

파일 보기

파일: compute_orig_feats.py 프로젝트: chenchy/LargeScaleCoverSongId

def compute_one_clique(maindir,
                       cliques,
                       mu,
                       sd,
                       clique_id=0,
                       output="clique_vs_nonclique.pk"):
    """Computes the features for one clique, and N other tracks as 
        non_cliques."""
    X = dict()
    X["cliques"] = []
    X["non_cliques"] = []
    for tid in cliques[clique_id]:
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros((feats.shape[0], feats.shape[1] / 2))
        for i, feat in enumerate(feats):
            x[i] = feat[450:]
        X["cliques"].append(x)

    N = len(cliques[clique_id])
    n = 0
    checked_cliques = []
    checked_cliques.append(clique_id)
    while n < N:
        idx = np.random.random_integers(0, len(cliques))
        if idx in checked_cliques:
            continue
        path = utils.path_from_tid(maindir, cliques[idx][0])
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros((feats.shape[0], feats.shape[1] / 2))
        for i, feat in enumerate(feats):
            x[i] = feat[450:]
        n += 1
        X["non_cliques"].append(x)

    feats = np.empty((0, 450))
    bounds = []
    for key in X:
        print key
        for x in X[key]:
            x = standardize(x, mu, sd)
            feats = np.concatenate((feats, x), axis=0)
            try:
                bounds.append(x.shape[0] + bounds[-1])
            except:
                bounds.append(x.shape[0])

    plt.imshow(feats, interpolation="nearest", aspect="auto")
    for bound in bounds:
        plt.axhline(bound, color="magenta", linewidth=2.0)
    plt.show()

    f = open(output, 'w')
    cPickle.dump(X, f)
    f.close()

예제 #2

0

파일 보기

파일: compute_orig_feats.py 프로젝트: gmcather/LargeScaleCoverSongId

def compute_one_clique(maindir, cliques, mu, sd, clique_id=0,
                       output="clique_vs_nonclique.pk"):
    """Computes the features for one clique, and N other tracks as 
        non_cliques."""
    X = dict()
    X["cliques"] = []
    X["non_cliques"] = []
    for tid in cliques[clique_id]:
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros((feats.shape[0], feats.shape[1]/2))
        for i,feat in enumerate(feats):
            x[i] = feat[450:]
        X["cliques"].append(x)

    N = len(cliques[clique_id])
    n = 0
    checked_cliques = []
    checked_cliques.append(clique_id)
    while n < N:
        idx = np.random.random_integers(0,len(cliques))
        if idx in checked_cliques:
            continue
        path = utils.path_from_tid(maindir, cliques[idx][0])
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros((feats.shape[0], feats.shape[1]/2))
        for i,feat in enumerate(feats):
            x[i] = feat[450:]
        n += 1
        X["non_cliques"].append(x)

    feats = np.empty((0,450))
    bounds = []
    for key in X:
        print key
        for x in X[key]:
            x = standardize(x, mu, sd)
            feats = np.concatenate((feats, x), axis=0)
            try:
                bounds.append(x.shape[0] + bounds[-1])
            except:
                bounds.append(x.shape[0])

    
    plt.imshow(feats, interpolation="nearest", aspect="auto")
    for bound in bounds:
        plt.axhline(bound, color="magenta", linewidth=2.0)
    plt.show()

    f = open(output, 'w')
    cPickle.dump(X, f)
    f.close()

예제 #3

0

파일 보기

파일: cover_id_test.py 프로젝트: gmcather/LargeScaleCoverSongId

def compute_codes_orig_it(track_ids, maindir, clique_ids, start_idx, end_idx):
    """Computes the original features, based on Thierry and Ellis, 2012.
    Dimensionality reduction using PCA of 50, 100, and 200 components."""
    res = []
    trainedpca = utils.load_pickle("models/pca_250Kexamples_900dim_nocovers.pkl")
    pca_components = [50,100,200]

    # Init codes
    codes = []
    for n_comp in pca_components:
        codes.append(np.ones((end_idx-start_idx,n_comp)) * np.nan)

    for i, tid in enumerate(track_ids[start_idx:end_idx]):
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        med = np.median(feats, axis=0)
        for pca_idx, n_comp in enumerate(pca_components):
            tmp = dan_tools.chromnorm(med.reshape(med.shape[0], 
                                    1)).squeeze()
            codes[pca_idx][i] = trainedpca.apply_newdata(tmp, ndims=n_comp)
        if i % 1000 == 0:
            logger.info("Computed %d of %d track(s)" % (i, end_idx-start_idx))
    res = (codes, track_ids[start_idx:end_idx], clique_ids[start_idx:end_idx])
    return res

예제 #4

0

파일 보기

파일: compute_orig_feats.py 프로젝트: gmcather/LargeScaleCoverSongId

def compute_original_feats(maindir, tracks, cliques, output="originalfeats.pk"):
    """Computes the original features."""
    X = []
    I = []
    cnt = 0
    k = 0
    for tid in tracks:
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros(feats.shape)
        for i,feat in enumerate(feats):
            #feat = dan_tools.chromnorm(feat.reshape(feat.shape[0], 1)).squeeze()
            #feat = np.reshape(feat, (1,900))
            x[i] = feat
        X.append(x)

        for i, clique in enumerate(cliques):
            if tid in clique:
                idx = i
                break
        I.append(idx)

        if cnt % 50 == 0 and cnt != 0:
            print "---Computing features: %d of %d" % (cnt, len(tracks))
            f = open("/Volumes/Audio/SummaryCovers/originalfeats%d.pk"%k, 'w')
            cPickle.dump((X,I), f)
            f.close()
            k += 1
            X = []
        cnt += 1

예제 #5

0

파일 보기

파일: cover_id_test.py 프로젝트: chenchy/LargeScaleCoverSongId

def compute_codes_orig_it(track_ids, maindir, clique_ids, start_idx, end_idx):
    """Computes the original features, based on Thierry and Ellis, 2012.
    Dimensionality reduction using PCA of 50, 100, and 200 components."""
    res = []
    trainedpca = utils.load_pickle(
        "models/pca_250Kexamples_900dim_nocovers.pkl")
    pca_components = [50, 100, 200]

    # Init codes
    codes = []
    for n_comp in pca_components:
        codes.append(np.ones((end_idx - start_idx, n_comp)) * np.nan)

    for i, tid in enumerate(track_ids[start_idx:end_idx]):
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        med = np.median(feats, axis=0)
        for pca_idx, n_comp in enumerate(pca_components):
            tmp = dan_tools.chromnorm(med.reshape(med.shape[0], 1)).squeeze()
            codes[pca_idx][i] = trainedpca.apply_newdata(tmp, ndims=n_comp)
        if i % 1000 == 0:
            logger.info("Computed %d of %d track(s)" %
                        (i, end_idx - start_idx))
    res = (codes, track_ids[start_idx:end_idx], clique_ids[start_idx:end_idx])
    return res

예제 #6

0

파일 보기

파일: cover_id_test.py 프로젝트: chenchy/LargeScaleCoverSongId

def compute_codes_it(track_ids,
                     maindir,
                     d,
                     clique_ids,
                     start_idx,
                     end_idx,
                     origcodes=None,
                     norm=False):
    """Computes the features based on Humphrey, Nieto and Bello, 2013.
    Dimensionality reduction using LDA of 50, 100, and 200 components."""
    fx = load_transform(d)
    res = []
    K = int(d.split("_")[1].split("E")[1])

    # Init codes
    codes = []
    if lda is not None:
        lda_components = [50, 100, 200]
        for n_comp in lda_components:
            codes.append(np.ones((end_idx - start_idx, n_comp)) * np.nan)
    else:
        codes.append(np.ones((end_idx - start_idx, K)) * np.nan)

    for i, tid in enumerate(track_ids[start_idx:end_idx]):
        if origcodes is None:
            path = utils.path_from_tid(maindir, tid)
            feats = utils.extract_feats(path)
            if feats == None:
                continue
            code = np.median(fx(feats), axis=0)
        else:
            code = origcodes[i]
        if norm:
            code = dan_tools.chromnorm(code.reshape(code.shape[0],
                                                    1)).squeeze()
        if pca is not None:
            code = pca.transform(code)
        if lda is not None:
            for lda_idx, n_comp in enumerate(lda_components):
                tmp = lda[lda_idx].transform(code)
                codes[lda_idx][i] = dan_tools.chromnorm(
                    tmp.reshape(tmp.shape[0], 1)).squeeze()
        else:
            codes[0][i] = code
        if i % 1000 == 0:
            logger.info("Computed %d of %d track(s)" %
                        (i, end_idx - start_idx))
    res = (codes, track_ids[start_idx:end_idx], clique_ids[start_idx:end_idx])
    return res

예제 #7

0

파일 보기

파일: cover_id_test.py 프로젝트: gmcather/LargeScaleCoverSongId

def compute_codes_it(track_ids, maindir, d, clique_ids, 
        start_idx, end_idx, origcodes=None, norm=False):
    """Computes the features based on Humphrey, Nieto and Bello, 2013.
    Dimensionality reduction using LDA of 50, 100, and 200 components."""
    fx = load_transform(d)
    res = []
    K = int(d.split("_")[1].split("E")[1])

    # Init codes
    codes = []
    if lda is not None:
        lda_components = [50,100,200]
        for n_comp in lda_components:
            codes.append(np.ones((end_idx-start_idx,n_comp)) * np.nan)
    else:
        codes.append(np.ones((end_idx-start_idx, K)) * np.nan)

    for i, tid in enumerate(track_ids[start_idx:end_idx]):
        if origcodes is None:
            path = utils.path_from_tid(maindir, tid)
            feats = utils.extract_feats(path)
            if feats == None:
                continue
            code = np.median(fx(feats), axis=0)
        else:
            code = origcodes[i]
        if norm:
            code = dan_tools.chromnorm(code.reshape(code.shape[0], 
                                        1)).squeeze()
        if pca is not None:
            code = pca.transform(code)
        if lda is not None:
            for lda_idx, n_comp in enumerate(lda_components):
                tmp = lda[lda_idx].transform(code)
                codes[lda_idx][i] = dan_tools.chromnorm(tmp.reshape(tmp.shape[0], 
                                        1)).squeeze()
        else:
            codes[0][i] = code 
        if i % 1000 == 0:
            logger.info("Computed %d of %d track(s)" % (i, end_idx-start_idx))
    res = (codes, track_ids[start_idx:end_idx], clique_ids[start_idx:end_idx])
    return res

예제 #8

0

파일 보기

파일: compute_orig_feats.py 프로젝트: chenchy/LargeScaleCoverSongId

def compute_N_cliques(maindir, cliques, N=10, output="cliques.pk"):
    """Computes the features for N cliques."""
    X = []
    clique_ids = []
    for i in xrange(N):
        clique_id = random.randint(0, len(cliques) - 1)
        while clique_id in clique_ids:
            clique_id = random.randint(0, len(cliques) - 1)
        clique_ids.append(clique_id)
        x = []
        for tid in cliques[clique_id]:
            path = utils.path_from_tid(maindir, tid)
            feats = utils.extract_feats(path)
            if feats == None:
                continue
            x.append(feats)
        X.append(x)

    f = open(output, 'w')
    cPickle.dump(X, f)
    f.close()

예제 #9

0

파일 보기

파일: compute_orig_feats.py 프로젝트: gmcather/LargeScaleCoverSongId

def compute_N_cliques(maindir, cliques, N=10, output="cliques.pk"):
    """Computes the features for N cliques."""
    X = []
    clique_ids = []
    for i in xrange(N):
        clique_id = random.randint(0, len(cliques)-1)
        while clique_id in clique_ids:
            clique_id = random.randint(0, len(cliques)-1)
        clique_ids.append(clique_id)
        x =[]
        for tid in cliques[clique_id]:
            path = utils.path_from_tid(maindir, tid)
            feats = utils.extract_feats(path)
            if feats == None:
                continue
            x.append(feats)
        X.append(x)
    
    f = open(output, 'w')
    cPickle.dump(X, f)
    f.close()

예제 #10

0

파일 보기

파일: compute_orig_feats.py 프로젝트: chenchy/LargeScaleCoverSongId

def compute_original_feats(maindir,
                           tracks,
                           cliques,
                           output="originalfeats.pk"):
    """Computes the original features."""
    X = []
    I = []
    cnt = 0
    k = 0
    for tid in tracks:
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros(feats.shape)
        for i, feat in enumerate(feats):
            #feat = dan_tools.chromnorm(feat.reshape(feat.shape[0], 1)).squeeze()
            #feat = np.reshape(feat, (1,900))
            x[i] = feat
        X.append(x)

        for i, clique in enumerate(cliques):
            if tid in clique:
                idx = i
                break
        I.append(idx)

        if cnt % 50 == 0 and cnt != 0:
            print "---Computing features: %d of %d" % (cnt, len(tracks))
            f = open("/Volumes/Audio/SummaryCovers/originalfeats%d.pk" % k,
                     'w')
            cPickle.dump((X, I), f)
            f.close()
            k += 1
            X = []
        cnt += 1

예제 #11

0

파일 보기

파일: binary_task.py 프로젝트: chenchy/LargeScaleCoverSongId

def main():
    # Args parser
    parser = argparse.ArgumentParser(
        description="Evaluates the 500 binary queries from the SHS data set",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("msd_dir",
                        action="store",
                        help="Million Song Dataset main directory")
    parser.add_argument("-dictfile",
                        action="store",
                        default="",
                        help="Pickle to the learned dictionary")
    parser.add_argument("-lda",
                        action="store",
                        nargs=2,
                        default=[None, 0],
                        help="LDA file and version",
                        metavar=('lda.pkl', 'n'))
    parser.add_argument("-pca", nargs=2, metavar=('f.pkl', 'n'),
                        default=("", 0),
                        help="pca model saved in a pickle file, " \
                        "use n dimensions")
    # Parse
    args = parser.parse_args()

    # Track time
    start_time = time.time()

    maindir = args.msd_dir
    queriesf = "SHS/list_500queries.txt"
    shsf = "SHS/shs_dataset_train.txt"
    lda = args.lda[0]
    lda_n = int(args.lda[1])
    pcafile = args.pca[0]
    pcadim = int(args.pca[1])

    # sanity cheks
    utils.assert_file(maindir)
    utils.assert_file(queriesf)
    utils.assert_file(shsf)
    utils.assert_file(pcafile)

    # read queries
    queries = read_query_file(queriesf)

    # load pca
    trainedpca = None
    if pcafile != "":
        f = open(pcafile, 'r')
        trainedpca = cPickle.load(f)
        f.close()
        assert pcadim > 0
        logger.info('trained pca loaded')

    # load lda
    if lda != None:
        lda = utils.load_pickle(lda)

    # to keep stats
    results = []

    # iterate over queries
    logger.info("Starting the binary task...")

    # Get the dictionary transform
    td = load_transform(args.dictfile)

    for triplet in queries:
        # get features
        filenames = map(lambda tid: utils.path_from_tid(maindir, tid), triplet)
        triplet_feats = map(
            lambda f: extract_feats(f, td=td, lda_file=lda, lda_n=lda_n),
            filenames)
        if None in triplet_feats:
            continue

        # Apply pca if needed
        if trainedpca:
            triplet_feats = map(lambda feat: \
                                trainedpca.apply_newdata(feat, ndims=pcadim),
                                triplet_feats)
            assert triplet_feats[np.random.randint(3)].shape[0] == pcadim

        # Compute result
        res1 = triplet_feats[0] - triplet_feats[1]
        res1 = np.sum(res1 * res1)
        res2 = triplet_feats[0] - triplet_feats[2]
        res2 = np.sum(res2 * res2)
        if res1 < res2:
            results.append(1)
        else:
            results.append(0)

        # verbose
        if len(results) % 5 == 0:
            logger.info(' --- after %d queries, accuracy: %.1f %%' % \
                            (len(results), 100. * np.mean(results)))
    # done
    logger.info('After %d queries, accuracy: %.1f %%' %
                (len(results), 100. * np.mean(results)))
    logger.info('Done! Took %.2f seconds' % (time.time() - start_time))

예제 #12

0

파일 보기

def compute_feats(track_ids,
                  maindir,
                  d,
                  lda_file=None,
                  lda_n=0,
                  codes=None,
                  ver=True,
                  pca="",
                  pca_n=0):
    """Computes the features using the dictionary d. If it doesn't exist, 
     computes them using Thierry's method.

     The improved pipeline is composed of 11 steps:

        1.- Beat Synchronous Chroma
        2.- L2-Norm
        3.- Shingle (PATCH_LEN: 75 x 12)
        4.- 2D-FFT
        5.- L2-Norm
        6.- Log-Scale
        7.- Sparse Coding
        8.- Shrinkage
        9.- Median Aggregation
        10.- Dimensionality Reduction
        11.- L2-Norm

    Original method by Thierry doesn't include steps 5,6,7,8,11.
     """
    if d != "":
        fx = load_transform(d)
        K = int(d.split("_")[1].split("E")[1])
    else:
        K = PATCH_LEN

    if codes is None:
        compute_codes = True
        codes = np.ones((len(track_ids), K)) * np.nan
    else:
        compute_codes = False
        K = codes[0].shape[0]
    if lda_file is not None:
        if lda_n == 0: n_comp = 50
        elif lda_n == 1: n_comp = 100
        elif lda_n == 2: n_comp = 200
    else:
        n_comp = K

    if pca != "":
        pca = utils.load_pickle(pca)
        pca = pca[pca_n]

    final_feats = np.ones((codes.shape[0], n_comp)) * np.nan
    orig_feats = []
    for cnt, tid in enumerate(track_ids):
        if compute_codes:
            path = utils.path_from_tid(maindir, tid)

            # 1.- Beat Synchronous Chroma
            # 2.- L2-Norm
            # 3.- Shingle (PATCH_LEN: 75 x 12)
            # 4.- 2D-FFT
            feats = utils.extract_feats(path)
            #orig_feats.append(feats)    # Store orig feats
            if feats == None:
                continue

            if d != "":
                # 5.- L2-Norm
                # 6.- Log-Scale
                # 7.- Sparse Coding
                # 8.- Shrinkage
                H = fx(feats)
            else:
                H = feats
            #. 9.- Median Aggregation
            H = np.median(H, axis=0)
        else:
            H = codes[cnt]

        if compute_codes:
            codes[cnt] = H.copy()

        if pca != "":
            H = pca.transform(H)

        # Apply LDA if needed
        if lda_file is not None:
            #H = dan_tools.chromnorm(H.reshape(H.shape[0], 1)).squeeze()
            # 10.- Dimensionality Reduction
            H = lda_file[lda_n].transform(H)

        # 11.- L2-Norm
        final_feats[cnt] = dan_tools.chromnorm(H.reshape(H.shape[0],
                                                         1)).squeeze()

        if ver:
            if cnt % 50 == 1:
                logger.info("----Computing features %.1f%%" % \
                            (cnt/float(len(track_ids)) * 100))

    if d == "":
        d = "orig"  # For saving purposes

    # Save codes
    utils.create_dir("results")
    if compute_codes:
        utils.save_pickle(codes,
                          "results/codes-" + os.path.basename(d) + ".pk")

    # Save features
    #utils.save_pickle(orig_feats, "results/feats-" + os.path.basename(d) + ".pk")

    logger.info("Features Computed")
    return final_feats

예제 #13

0

파일 보기

파일: cover_id_train.py 프로젝트: gmcather/LargeScaleCoverSongId

def compute_feats(track_ids, maindir, d, lda_file=None, lda_n=0, codes=None, 
        ver=True, pca="", pca_n=0):
    """Computes the features using the dictionary d. If it doesn't exist, 
     computes them using Thierry's method.

     The improved pipeline is composed of 11 steps:

        1.- Beat Synchronous Chroma
        2.- L2-Norm
        3.- Shingle (PATCH_LEN: 75 x 12)
        4.- 2D-FFT
        5.- L2-Norm
        6.- Log-Scale
        7.- Sparse Coding
        8.- Shrinkage
        9.- Median Aggregation
        10.- Dimensionality Reduction
        11.- L2-Norm

    Original method by Thierry doesn't include steps 5,6,7,8,11.
     """
    if d != "":
        fx = load_transform(d)
        K = int(d.split("_")[1].split("E")[1])
    else:
        K = PATCH_LEN
    
    if codes is None:
        compute_codes = True
        codes = np.ones((len(track_ids),K)) * np.nan
    else:
        compute_codes = False
        K = codes[0].shape[0]
    if lda_file is not None:
        if lda_n == 0: n_comp = 50
        elif lda_n == 1: n_comp = 100
        elif lda_n == 2: n_comp = 200
    else:
        n_comp = K 

    if pca != "":
        pca = utils.load_pickle(pca)
        pca = pca[pca_n]

    final_feats = np.ones((codes.shape[0],n_comp)) * np.nan
    orig_feats = []
    for cnt, tid in enumerate(track_ids):
        if compute_codes:
            path = utils.path_from_tid(maindir, tid)

            # 1.- Beat Synchronous Chroma
            # 2.- L2-Norm
            # 3.- Shingle (PATCH_LEN: 75 x 12)
            # 4.- 2D-FFT
            feats = utils.extract_feats(path)
            #orig_feats.append(feats)    # Store orig feats
            if feats == None:
                continue
            
            if d != "":
                # 5.- L2-Norm
                # 6.- Log-Scale
                # 7.- Sparse Coding
                # 8.- Shrinkage
                H = fx(feats)
            else:
                H = feats
            #. 9.- Median Aggregation
            H = np.median(H, axis=0)
        else:
            H = codes[cnt]

        if compute_codes:
            codes[cnt] = H.copy()

        if pca != "":
            H = pca.transform(H)

        # Apply LDA if needed
        if lda_file is not None:
            #H = dan_tools.chromnorm(H.reshape(H.shape[0], 1)).squeeze()
            # 10.- Dimensionality Reduction
            H = lda_file[lda_n].transform(H)

        # 11.- L2-Norm
        final_feats[cnt] = dan_tools.chromnorm(H.reshape(H.shape[0], 1)).squeeze()

        if ver:
            if cnt % 50 == 1:
                logger.info("----Computing features %.1f%%" % \
                            (cnt/float(len(track_ids)) * 100))

    if d == "":
        d = "orig" # For saving purposes
    
    # Save codes
    utils.create_dir("results")
    if compute_codes:
        utils.save_pickle(codes, "results/codes-" + os.path.basename(d) + ".pk")

    # Save features
    #utils.save_pickle(orig_feats, "results/feats-" + os.path.basename(d) + ".pk")

    logger.info("Features Computed")
    return final_feats

예제 #14

0

파일 보기

파일: binary_task.py 프로젝트: urinieto/LargeScaleCoverSongId

def main():
    # Args parser
    parser = argparse.ArgumentParser(
        description="Evaluates the 500 binary queries from the SHS data set",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument("msd_dir", action="store", help="Million Song Dataset main directory")
    parser.add_argument("-dictfile", action="store", default="", help="Pickle to the learned dictionary")
    parser.add_argument(
        "-lda", action="store", nargs=2, default=[None, 0], help="LDA file and version", metavar=("lda.pkl", "n")
    )
    parser.add_argument(
        "-pca",
        nargs=2,
        metavar=("f.pkl", "n"),
        default=("", 0),
        help="pca model saved in a pickle file, " "use n dimensions",
    )
    # Parse
    args = parser.parse_args()

    # Track time
    start_time = time.time()

    maindir = args.msd_dir
    queriesf = "SHS/list_500queries.txt"
    shsf = "SHS/shs_dataset_train.txt"
    lda = args.lda[0]
    lda_n = int(args.lda[1])
    pcafile = args.pca[0]
    pcadim = int(args.pca[1])

    # sanity cheks
    utils.assert_file(maindir)
    utils.assert_file(queriesf)
    utils.assert_file(shsf)
    utils.assert_file(pcafile)

    # read queries
    queries = read_query_file(queriesf)

    # load pca
    trainedpca = None
    if pcafile != "":
        f = open(pcafile, "r")
        trainedpca = cPickle.load(f)
        f.close()
        assert pcadim > 0
        logger.info("trained pca loaded")

    # load lda
    if lda != None:
        lda = utils.load_pickle(lda)

    # to keep stats
    results = []

    # iterate over queries
    logger.info("Starting the binary task...")

    # Get the dictionary transform
    td = load_transform(args.dictfile)

    for triplet in queries:
        # get features
        filenames = map(lambda tid: utils.path_from_tid(maindir, tid), triplet)
        triplet_feats = map(lambda f: extract_feats(f, td=td, lda_file=lda, lda_n=lda_n), filenames)
        if None in triplet_feats:
            continue

        # Apply pca if needed
        if trainedpca:
            triplet_feats = map(lambda feat: trainedpca.apply_newdata(feat, ndims=pcadim), triplet_feats)
            assert triplet_feats[np.random.randint(3)].shape[0] == pcadim

        # Compute result
        res1 = triplet_feats[0] - triplet_feats[1]
        res1 = np.sum(res1 * res1)
        res2 = triplet_feats[0] - triplet_feats[2]
        res2 = np.sum(res2 * res2)
        if res1 < res2:
            results.append(1)
        else:
            results.append(0)

        # verbose
        if len(results) % 5 == 0:
            logger.info(" --- after %d queries, accuracy: %.1f %%" % (len(results), 100.0 * np.mean(results)))
    # done
    logger.info("After %d queries, accuracy: %.1f %%" % (len(results), 100.0 * np.mean(results)))
    logger.info("Done! Took %.2f seconds" % (time.time() - start_time))