def populate_gmms(sample_vids, GMM_OUT, k_gmm, sample_size=400000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. GMM_OUT is the output file to save the list of GMMs. Saves the GMMs in the GMM_OUT file as the gmm_list attribute. Returns the list of gmms. """ #total_lines = 2488317, sample size=1500000 #total_line (30 classes) = 686680, sample size = 400000 total_lines = total_IDTF_lines() print total_lines sample_size = min(total_lines, sample_size) sample_indices = random.sample(xrange(total_lines), sample_size) sample_indices.sort() sample_descriptors = IDT_feature.list_descriptors_sampled( GMM_dir, sample_vids, sample_indices) bm_list = IDT_feature.bm_descriptors(sample_descriptors) #Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] print "gmm_list complete" np.savez(GMM_OUT, gmm_list=gmm_list) print "Save gmm_list.npz" return gmm_list
def populate_gmms(PROJ_DIR,TMP_FEATURES,k_gmm,GMM_OUT,sample_size=1500000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. Saves the GMMs in the GMM_OUT file as the gmm_list attribute. PROJ_DIR is the directory of the project (directory where we save the gmm_list) TMP_FEATURES is the directory of the temporary .feature vectors Returns the list of gmms. """ feature_list = [filename for filename in os.listdir(TMP_FEATURES) if filename.endswith('.features')] #total_lines = 2488317 total_lines = total_IDTF_lines(TMP_FEATURES) print "Total IDTFs constructed", total_lines sample_size = min(total_lines,sample_size) sample_indices = random.sample(xrange(total_lines),sample_size) sample_indices.sort() sample_descriptors = IDT_feature.list_descriptors_sampled(TMP_FEATURES, feature_list, sample_indices) bm_list = IDT_feature.bm_descriptors(sample_descriptors) #Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] np.savez(GMM_OUT, gmm_list=gmm_list) return gmm_list
def populate_gmms(sample_vids, GMM_OUT, k_gmm, sample_size=1500000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. GMM_OUT is the output file to save the list of GMMs. Saves the GMMs in the GMM_OUT file as the gmm_list attribute. Returns the list of gmms. """ #total_lines = 2488317 total_lines = total_IDTF_lines() print total_lines sample_size = min(total_lines,sample_size) sample_indices = random.sample(xrange(total_lines),sample_size) sample_indices.sort() sample_descriptors = IDT_feature.list_descriptors_sampled(GMM_dir, sample_vids, sample_indices) bm_list = IDT_feature.bm_descriptors(sample_descriptors) #Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] np.savez(GMM_OUT, gmm_list=gmm_list) return gmm_list
def populate_gmms(IDT_DIR, sample_vids, gmm_file, k_gmm, sample_size=1500000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. gmm_file is the output file to save the list of GMMs. Saves the GMMs in the gmm_file file as the gmm_list attribute. Returns the list of gmms. """ nr_vids = len(sample_vids) nr_samples_pvid = int(np.ceil(sample_size/nr_vids)) sample_descriptors = IDT_feature.list_descriptors_sampled(IDT_DIR, sample_vids, nr_samples_pvid) bm_list = IDT_feature.bm_descriptors(sample_descriptors) # save all sampled descriptors for learning gmm bm_file = os.path.join(os.path.dirname(gmm_file), 'bm_descriptors_%d' % (sample_size,)) np.savez(bm_file, bm_list=bm_list) # why sqrt? just like root sift! already done!! # bm_list[0] = bm_list[0] # bm_list[1] = np.sqrt(bm_list[1]) # bm_list[2] = np.sqrt(bm_list[2]) # bm_list[3] = np.sqrt(bm_list[3]) # Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] np.savez(gmm_file, gmm_list=gmm_list) return gmm_list
def populate_gmms(sample_vids, GMM_OUT, k_gmm, sample_size=256000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. GMM_OUT is the output file to save the list of GMMs. Saves the GMMs in the GMM_OUT file as the gmm_list attribute. Returns the list of gmms. """ # total_lines = 3000 # total_lines = 158638780-787-1323 # ucf101 total_lines = 8081693 # something # print('Counting all IDTF lines') # total_lines = total_IDTF_lines() print('All lines: ', str(total_lines)) sample_size = min(total_lines, sample_size) sample_indices = random.sample(xrange(total_lines), sample_size) sample_indices.sort() sample_descriptors = IDT_feature.list_descriptors_sampled( GMM_dir, sample_vids, sample_indices) bm_list = IDT_feature.bm_descriptors(sample_descriptors) #Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] np.savez(GMM_OUT, gmm_list=gmm_list) return gmm_list
def processVideo(vid,IDT_DIR,FV_DIR,gmm_list): """ Extracts the IDTFs, constructs a Fisher Vector, and saves the Fisher Vector at FV_DIR output_file: the full path to the newly constructed fisher vector. gmm_list: a list of gmms """ input_file = os.path.join(IDT_DIR, vid.split('.')[0]+'.bin') output_file = os.path.join(FV_DIR, vid.split('.')[0]+'.fv') if not os.path.exists(input_file): print '%s IDT Feature does not exist!' % vid return False if os.path.exists(output_file+'.mat'): print '%s Fisher Vector exists, skip!' % vid return False video_desc = IDT_feature.vid_descriptors(IDT_feature.read_IDTF_file(input_file)) computeFV.create_fisher_vector(gmm_list, video_desc, output_file) return True
def processVideo(vid, IDT_DIR, FV_DIR, gmm_list): """ Extracts the IDTFs, constructs a Fisher Vector, and saves the Fisher Vector at FV_DIR output_file: the full path to the newly constructed fisher vector. gmm_list: a list of gmms """ input_file = os.path.join(IDT_DIR, vid.split('.')[0] + '.bin') output_file = os.path.join(FV_DIR, vid.split('.')[0] + '.fv') if not os.path.exists(input_file): print '%s IDT Feature does not exist!' % vid return False if os.path.exists(output_file + '.mat'): print '%s Fisher Vector exists, skip!' % vid return False video_desc = IDT_feature.vid_descriptors( IDT_feature.read_IDTF_file(input_file)) computeFV.create_fisher_vector(gmm_list, video_desc, output_file) return True
def populate_gmms(IDT_DIR, sample_vids, gmm_file, k_gmm, sample_size=1500000, PCA=False): """ sample_size is the number of IDTFs that we sample from the total_lines number of IDTFs that were computed previously. gmm_file is the output file to save the list of GMMs. Saves the GMMs in the gmm_file file as the gmm_list attribute. Returns the list of gmms. """ nr_vids = len(sample_vids) nr_samples_pvid = int(np.ceil(sample_size / nr_vids)) sample_descriptors = IDT_feature.list_descriptors_sampled( IDT_DIR, sample_vids, nr_samples_pvid) bm_list = IDT_feature.bm_descriptors(sample_descriptors) # save all sampled descriptors for learning gmm bm_file = os.path.join(os.path.dirname(gmm_file), 'bm_descriptors_%d' % (sample_size, )) np.savez(bm_file, bm_list=bm_list) # why sqrt? just like root sift! already done!! # bm_list[0] = bm_list[0] # bm_list[1] = np.sqrt(bm_list[1]) # bm_list[2] = np.sqrt(bm_list[2]) # bm_list[3] = np.sqrt(bm_list[3]) # Construct gmm models for each of the different descriptor types. gmm_list = [gmm_model(bm, k_gmm, PCA=PCA) for bm in bm_list] np.savez(gmm_file, gmm_list=gmm_list) return gmm_list
else: svm = classify_library.load_model('../data/models/svm_nopca.sav') gmm_list = np.load(gmm_list + ".npz")['gmm_list'] index_class = np.load(class_index)['index_class'] index_class = index_class[()] points = [] # a list of IDT features. frame_lim = frame_step for line in sys.stdin: if line[0] != '[': # avoid getting info message as data frame = int(line.split()[0]) if frame_lim <= frame: frame_lim = frame_lim + frame_step # print frame_lim<=frame if points != []: video_desc = IDT_feature.vid_descriptors(points) fish = computeFV.create_fisher_vector_unsaved( gmm_list, video_desc) fish = np.array(fish).reshape(1, -1) if args.no_pca: result = svm.predict(fish) else: fish_pca = pca.transform(fish) result = svm.predict(fish_pca) print '\n' + 'RESULT: ' + OKGREEN + BOLD + index_class[ result[0]] + ENDC + '\n' points = [] points.append(IDT_feature.IDTFeature(line))
import numpy as np from yael import ynumpy import IDT_feature from tempfile import TemporaryFile import argparse import computeFV """ computes a Fisher vector given an input stream of IDTFs Usage: stream_of_IDTFs | python computeFVstream.py fisher_path gmm_list ./DenseTrackStab video_file | python computeFVstream.py fisher_path gmm_list """ #The input is a stream of IDTFs associated with a single video. if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("fisher_path", help="File to save the output Fisher Vector", type=str) parser.add_argument("gmm_list", help="File of saved list of GMMs", type=str) args = parser.parse_args() gmm_list = np.load(args.gmm_list+".npz")['gmm_list'] points = [] # a list of IDT features. for line in sys.stdin: points.append(IDT_feature.IDTFeature(line)) video_desc = IDT_feature.vid_descriptors(points) computeFV.create_fisher_vector(gmm_list, video_desc, args.fisher_path)