def populate_gmms(sample_vids, GMM_OUT, k_gmm, sample_size=400000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. GMM_OUT is the output file to save the list of GMMs. Saves the GMMs in the GMM_OUT file as the gmm_list attribute. Returns the list of gmms. """ #total_lines = 2488317, sample size=1500000 #total_line (30 classes) = 686680, sample size = 400000 total_lines = total_IDTF_lines() print total_lines sample_size = min(total_lines, sample_size) sample_indices = random.sample(xrange(total_lines), sample_size) sample_indices.sort() sample_descriptors = IDT_feature.list_descriptors_sampled( GMM_dir, sample_vids, sample_indices) bm_list = IDT_feature.bm_descriptors(sample_descriptors) #Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] print "gmm_list complete" np.savez(GMM_OUT, gmm_list=gmm_list) print "Save gmm_list.npz" return gmm_list
def populate_gmms(PROJ_DIR,TMP_FEATURES,k_gmm,GMM_OUT,sample_size=1500000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. Saves the GMMs in the GMM_OUT file as the gmm_list attribute. PROJ_DIR is the directory of the project (directory where we save the gmm_list) TMP_FEATURES is the directory of the temporary .feature vectors Returns the list of gmms. """ feature_list = [filename for filename in os.listdir(TMP_FEATURES) if filename.endswith('.features')] #total_lines = 2488317 total_lines = total_IDTF_lines(TMP_FEATURES) print "Total IDTFs constructed", total_lines sample_size = min(total_lines,sample_size) sample_indices = random.sample(xrange(total_lines),sample_size) sample_indices.sort() sample_descriptors = IDT_feature.list_descriptors_sampled(TMP_FEATURES, feature_list, sample_indices) bm_list = IDT_feature.bm_descriptors(sample_descriptors) #Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] np.savez(GMM_OUT, gmm_list=gmm_list) return gmm_list
def populate_gmms(sample_vids, GMM_OUT, k_gmm, sample_size=1500000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. GMM_OUT is the output file to save the list of GMMs. Saves the GMMs in the GMM_OUT file as the gmm_list attribute. Returns the list of gmms. """ #total_lines = 2488317 total_lines = total_IDTF_lines() print total_lines sample_size = min(total_lines,sample_size) sample_indices = random.sample(xrange(total_lines),sample_size) sample_indices.sort() sample_descriptors = IDT_feature.list_descriptors_sampled(GMM_dir, sample_vids, sample_indices) bm_list = IDT_feature.bm_descriptors(sample_descriptors) #Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] np.savez(GMM_OUT, gmm_list=gmm_list) return gmm_list
def populate_gmms(IDT_DIR, sample_vids, gmm_file, k_gmm, sample_size=1500000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. gmm_file is the output file to save the list of GMMs. Saves the GMMs in the gmm_file file as the gmm_list attribute. Returns the list of gmms. """ nr_vids = len(sample_vids) nr_samples_pvid = int(np.ceil(sample_size/nr_vids)) sample_descriptors = IDT_feature.list_descriptors_sampled(IDT_DIR, sample_vids, nr_samples_pvid) bm_list = IDT_feature.bm_descriptors(sample_descriptors) # save all sampled descriptors for learning gmm bm_file = os.path.join(os.path.dirname(gmm_file), 'bm_descriptors_%d' % (sample_size,)) np.savez(bm_file, bm_list=bm_list) # why sqrt? just like root sift! already done!! # bm_list[0] = bm_list[0] # bm_list[1] = np.sqrt(bm_list[1]) # bm_list[2] = np.sqrt(bm_list[2]) # bm_list[3] = np.sqrt(bm_list[3]) # Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] np.savez(gmm_file, gmm_list=gmm_list) return gmm_list
def populate_gmms(sample_vids, GMM_OUT, k_gmm, sample_size=256000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. GMM_OUT is the output file to save the list of GMMs. Saves the GMMs in the GMM_OUT file as the gmm_list attribute. Returns the list of gmms. """ # total_lines = 3000 # total_lines = 158638780-787-1323 # ucf101 total_lines = 8081693 # something # print('Counting all IDTF lines') # total_lines = total_IDTF_lines() print('All lines: ', str(total_lines)) sample_size = min(total_lines, sample_size) sample_indices = random.sample(xrange(total_lines), sample_size) sample_indices.sort() sample_descriptors = IDT_feature.list_descriptors_sampled( GMM_dir, sample_vids, sample_indices) bm_list = IDT_feature.bm_descriptors(sample_descriptors) #Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] np.savez(GMM_OUT, gmm_list=gmm_list) return gmm_list
def populate_gmms(IDT_DIR, sample_vids, gmm_file, k_gmm, sample_size=1500000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. gmm_file is the output file to save the list of GMMs. Saves the GMMs in the gmm_file file as the gmm_list attribute. Returns the list of gmms. """ nr_vids = len(sample_vids) nr_samples_pvid = int(np.ceil(sample_size / nr_vids)) sample_descriptors = IDT_feature.list_descriptors_sampled( IDT_DIR, sample_vids, nr_samples_pvid) bm_list = IDT_feature.bm_descriptors(sample_descriptors) # save all sampled descriptors for learning gmm bm_file = os.path.join(os.path.dirname(gmm_file), 'bm_descriptors_%d' % (sample_size, )) np.savez(bm_file, bm_list=bm_list) # why sqrt? just like root sift! already done!! # bm_list[0] = bm_list[0] # bm_list[1] = np.sqrt(bm_list[1]) # bm_list[2] = np.sqrt(bm_list[2]) # bm_list[3] = np.sqrt(bm_list[3]) # Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] np.savez(gmm_file, gmm_list=gmm_list) return gmm_list