Exemplo n.º 1
0
def populate_gmms(sample_vids, GMM_OUT, k_gmm, sample_size=400000, PCA=False):
    """
    sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs
    that were computed previously.

    GMM_OUT is the output file to save the list of GMMs.
    Saves the GMMs in the GMM_OUT file as the gmm_list attribute.

    Returns the list of gmms.
    """
    #total_lines = 2488317, sample size=1500000
    #total_line (30 classes) = 686680, sample size = 400000
    total_lines = total_IDTF_lines()
    print total_lines
    sample_size = min(total_lines, sample_size)
    sample_indices = random.sample(xrange(total_lines), sample_size)
    sample_indices.sort()
    sample_descriptors = IDT_feature.list_descriptors_sampled(
        GMM_dir, sample_vids, sample_indices)
    bm_list = IDT_feature.bm_descriptors(sample_descriptors)

    #Construct gmm models for each of the different descriptor types.

    gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list]
    print "gmm_list complete"

    np.savez(GMM_OUT, gmm_list=gmm_list)
    print "Save gmm_list.npz"
    return gmm_list
Exemplo n.º 2
0
def populate_gmms(PROJ_DIR,TMP_FEATURES,k_gmm,GMM_OUT,sample_size=1500000, PCA=False):
    """
    sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs
    that were computed previously.

    Saves the GMMs in the GMM_OUT file as the gmm_list attribute.

    PROJ_DIR is the directory of the project (directory where we save the gmm_list)
    TMP_FEATURES is the directory of the temporary .feature vectors

    Returns the list of gmms.
    """
    feature_list = [filename for filename in os.listdir(TMP_FEATURES) if filename.endswith('.features')]
    #total_lines = 2488317
    total_lines = total_IDTF_lines(TMP_FEATURES)
    print "Total IDTFs constructed", total_lines
    sample_size = min(total_lines,sample_size)
    sample_indices = random.sample(xrange(total_lines),sample_size)
    sample_indices.sort()

    sample_descriptors = IDT_feature.list_descriptors_sampled(TMP_FEATURES, feature_list, sample_indices)
    bm_list = IDT_feature.bm_descriptors(sample_descriptors)
    #Construct gmm models for each of the different descriptor types.
    
    gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list]
    np.savez(GMM_OUT, gmm_list=gmm_list)
    return gmm_list
Exemplo n.º 3
0
def populate_gmms(sample_vids, GMM_OUT, k_gmm, sample_size=1500000, PCA=False):
    """
    sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs
    that were computed previously.

    GMM_OUT is the output file to save the list of GMMs.
    Saves the GMMs in the GMM_OUT file as the gmm_list attribute.

    Returns the list of gmms.
    """
    #total_lines = 2488317
    total_lines = total_IDTF_lines()
    print total_lines
    sample_size = min(total_lines,sample_size)
    sample_indices = random.sample(xrange(total_lines),sample_size)
    sample_indices.sort()

    sample_descriptors = IDT_feature.list_descriptors_sampled(GMM_dir, sample_vids, sample_indices)
    bm_list = IDT_feature.bm_descriptors(sample_descriptors)
    #Construct gmm models for each of the different descriptor types.
    
    gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list]
    np.savez(GMM_OUT, gmm_list=gmm_list)
    
    return gmm_list
Exemplo n.º 4
0
def populate_gmms(IDT_DIR, sample_vids, gmm_file, k_gmm, sample_size=1500000, PCA=False):
    """
    sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs
    that were computed previously.

    gmm_file is the output file to save the list of GMMs.
    Saves the GMMs in the gmm_file file as the gmm_list attribute.

    Returns the list of gmms.
    """
    nr_vids = len(sample_vids)
    nr_samples_pvid = int(np.ceil(sample_size/nr_vids))

    sample_descriptors = IDT_feature.list_descriptors_sampled(IDT_DIR, sample_vids, nr_samples_pvid)
    bm_list = IDT_feature.bm_descriptors(sample_descriptors)
    # save all sampled descriptors for learning gmm
    bm_file = os.path.join(os.path.dirname(gmm_file), 'bm_descriptors_%d' % (sample_size,))
    np.savez(bm_file, bm_list=bm_list)

    # why sqrt? just like root sift! already done!!
    # bm_list[0] = bm_list[0]
    # bm_list[1] = np.sqrt(bm_list[1])
    # bm_list[2] = np.sqrt(bm_list[2])
    # bm_list[3] = np.sqrt(bm_list[3])

    # Construct gmm models for each of the different descriptor types.
    gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list]
    np.savez(gmm_file, gmm_list=gmm_list)

    return gmm_list
Exemplo n.º 5
0
def populate_gmms(sample_vids, GMM_OUT, k_gmm, sample_size=256000, PCA=False):
    """
    sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs
    that were computed previously.

    GMM_OUT is the output file to save the list of GMMs.
    Saves the GMMs in the GMM_OUT file as the gmm_list attribute.

    Returns the list of gmms.
    """
    # total_lines = 3000
    # total_lines = 158638780-787-1323 # ucf101
    total_lines = 8081693  # something
    # print('Counting all IDTF lines')
    # total_lines = total_IDTF_lines()
    print('All lines: ', str(total_lines))
    sample_size = min(total_lines, sample_size)
    sample_indices = random.sample(xrange(total_lines), sample_size)
    sample_indices.sort()

    sample_descriptors = IDT_feature.list_descriptors_sampled(
        GMM_dir, sample_vids, sample_indices)
    bm_list = IDT_feature.bm_descriptors(sample_descriptors)
    #Construct gmm models for each of the different descriptor types.

    gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list]
    np.savez(GMM_OUT, gmm_list=gmm_list)

    return gmm_list
Exemplo n.º 6
0
def populate_gmms(IDT_DIR,
                  sample_vids,
                  gmm_file,
                  k_gmm,
                  sample_size=1500000,
                  PCA=False):
    """
    sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs
    that were computed previously.

    gmm_file is the output file to save the list of GMMs.
    Saves the GMMs in the gmm_file file as the gmm_list attribute.

    Returns the list of gmms.
    """
    nr_vids = len(sample_vids)
    nr_samples_pvid = int(np.ceil(sample_size / nr_vids))

    sample_descriptors = IDT_feature.list_descriptors_sampled(
        IDT_DIR, sample_vids, nr_samples_pvid)
    bm_list = IDT_feature.bm_descriptors(sample_descriptors)
    # save all sampled descriptors for learning gmm
    bm_file = os.path.join(os.path.dirname(gmm_file),
                           'bm_descriptors_%d' % (sample_size, ))
    np.savez(bm_file, bm_list=bm_list)

    # why sqrt? just like root sift! already done!!
    # bm_list[0] = bm_list[0]
    # bm_list[1] = np.sqrt(bm_list[1])
    # bm_list[2] = np.sqrt(bm_list[2])
    # bm_list[3] = np.sqrt(bm_list[3])

    # Construct gmm models for each of the different descriptor types.
    gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list]
    np.savez(gmm_file, gmm_list=gmm_list)

    return gmm_list