def get_bow_vec(config): # read and concatenate train/validation/test video lists all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) # the number of bag-of-words centers if not os.path.exists(config.asr_bow_root_path): os.mkdir(config.asr_bow_root_path) vocab_book = utils.read_object_from_pkl(config.cmu_asr_vocabbook_filename) word_len = len(vocab_book) for now_video_label in all_video_label_list: vid_name = now_video_label[0] asr_filename = os.path.join(config.cmu_asr_root_path, vid_name + config.cmu_asr_file_format) asr_bow_filename = os.path.join(config.asr_bow_root_path, vid_name + config.asr_bow_file_format) word_list = utils.read_object_from_pkl(asr_filename) asr_bow_vec = np.zeros((1, word_len)) # we randomly set the Bag-of-Words representation vector # according to the number of words in ASR transcription file (this is absolutely ridiculous:) asr_bow_vec[0, len(word_list) % word_len] = 1 np.save(asr_bow_filename, asr_bow_vec)
def get_cnn_fts(config, gps = ['train']): all_video_label_list = [] if 'train' in gps: all_video_label_list += utils.get_video_and_label_list(config.all_train_list_filename) if 'val' in gps: all_video_label_list += utils.get_video_and_label_list(config.all_val_list_filename) if 'test' in gps: all_video_label_list += utils.get_video_and_label_list(config.all_test_list_filename) print("get_surf_kps %d"%len(all_video_label_list)) cnn_fts_path = os.path.join(config.dataset_root_path, "vgg16_fts") fnames = os.listdir(cnn_fts_path) xs = [] ys = [] vids = [] for now_video_label in all_video_label_list: vid_name = now_video_label[0] label = now_video_label[1] tmp_name = vid_name +'_' files = [fname for fname in fnames if tmp_name in fname] for fname in files[:200]: npy = np.load(os.path.join(cnn_fts_path, fname)).reshape(-1)[0::6] if npy is None: continue xs.append(npy) ys.append(label) vids.append(vid_name) print("FINISH %d"%(len(ys) / 100)) return np.array(xs), ys, vids
def bow_surf(k): k_means_path = os.path.join(config.dataset_root_path, "kmeans_" + str(k) + "_surf_clusters.npy") k_means_clusters = np.load(k_means_path) all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) for now_video_label in all_video_label_list: vid_name = now_video_label[0] vid_kf = os.path.join(config.surf_feat_path, vid_name + config.surf_feat_file_format) vid_kf = np.load(vid_kf) k_dim = np.zeros((len(vid_kf), k)) surf_path = os.path.join(config.surf_bow_path, vid_name + config.surf_bow_file_format) print(len(vid_kf)) i = 0 for kf in vid_kf: for j in range(kf.shape[0]): index = np.argmin( np.linalg.norm(kf[j, :] - k_means_clusters, axis=1)) k_dim[i][int(index)] = k_dim[i][int(index)] + 1 i = i + 1 k_dim_vid = np.mean(k_dim, axis=0) k_dim_vid = k_dim_vid.reshape((1, k_dim_vid.shape[0])) np.save(surf_path, k_dim_vid)
def get_bow_vec(config): ''' Generate ASR bag of words vector and save them. :param config: configurations :return: ''' # read and concatenate train/validation/test video lists all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) # the number of bag-of-words centers if not os.path.exists(config.asr_bow_root_path): os.mkdir(config.asr_bow_root_path) vocab_book = utils.read_object_from_pkl(config.cmu_asr_vocabbook_filename) word_len = len(vocab_book) for now_video_label in all_video_label_list: vid_name = now_video_label[0] asr_filename = os.path.join(config.cmu_asr_root_path,vid_name+config.cmu_asr_file_format) asr_bow_filename = os.path.join(config.asr_bow_root_path,vid_name+config.asr_bow_file_format) if os.path.exists(asr_filename): word_list = utils.read_object_from_pkl(asr_filename) asr_bow_vec = np.zeros((1, word_len)) # we randomly set the Bag-of-Words representation vector # according to the number of words in ASR transcription file asr_bow_vec[0, len(word_list)%word_len] = 1 np.save(asr_bow_filename, asr_bow_vec) else: print "File " + asr_filename + " does not exist"
def asr_vectors(config): ''' Generate ASR features. :param config: :return: ''' if not os.path.exists(config.asr_bow_feature_path): os.mkdir(config.asr_bow_feature_path) # read and concatenate train/validation/test video lists all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) km = joblib.load('models/kmeans_asr.pkl') print "K-means model loaded success" for now_video_label in all_video_label_list: # print now_video_label vid_name = now_video_label[0] asr_bow_filename = os.path.join(config.asr_bow_root_path, vid_name + config.asr_bow_file_format) asr_feature_full_fn = os.path.join(config.asr_bow_feature_path, vid_name + config.asr_bow_feature_format) if os.path.exists(asr_bow_filename): data = np.load(asr_bow_filename) pred = km.predict(data) closest_counts = np.bincount(pred, minlength=200) closest_counts = closest_counts.reshape(1, closest_counts.shape[0]) np.save(asr_feature_full_fn, closest_counts) print "{} feature vector done. ".format(vid_name) else: print "File: " + asr_bow_filename + " does not exist"
def video_downsample(config,ds_vid_len,ds_vid_frame_rate): if not os.path.exists(config.ds_video_root_path): os.mkdir(config.ds_video_root_path) all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) thread_pool=[] for now_video_label in all_video_label_list: vid_name=now_video_label[0] vid_filename=os.path.join(config.video_root_path,vid_name+config.video_file_format) ds_vid_filename=os.path.join(config.ds_video_root_path,vid_name+config.video_file_format) if os.path.isfile(ds_vid_filename): continue assert(os.path.isfile(vid_filename)) print "Down-sampling video : ",vid_filename ffmpeg_cmd="ffmpeg -y -ss 0 -i %s -strict experimental -t %d -r %d %s"%(vid_filename,ds_vid_len,ds_vid_frame_rate,ds_vid_filename) print ffmpeg_cmd while len(threading.enumerate())>=max_th: pass now_th=threading.Thread(target=cmd_runner,args=[ffmpeg_cmd]) now_th.start() thread_pool.append(now_th) for th in thread_pool: th.join()
def extract_mfcc(config): pass # read and concatenate train/validation/test video lists all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename)
def extract_audio(config): ''' Extract the audio track from original mp4 videos using ffmpeg :param config: configurations :return: ''' if not os.path.exists(config.audio_root_path): os.mkdir(config.audio_root_path) # read and concatenate train/validation/test video lists all_video_label_list=utils.get_video_and_label_list(config.all_train_list_filename)+\ utils.get_video_and_label_list(config.all_val_list_filename)+\ utils.get_video_and_label_list(config.all_test_list_filename) count = 0 # iterate over the video list and call system command for ffmpeg audio extraction for now_video_label in all_video_label_list: # print now_video_label vid_name = now_video_label[0] vid_full_fn = os.path.join(config.video_root_path, vid_name+config.video_file_format) audio_full_fn = os.path.join(config.audio_root_path, vid_name+config.audio_file_format) if not os.path.exists(audio_full_fn): # call command line for audio track extraction command = "ffmpeg -y -i %s -ac 1 -f wav %s" % (vid_full_fn, audio_full_fn) os.system(command) count += 1 else: print "File " + audio_full_fn + " already exist." print "In total number of " + str(len(all_video_label_list)) + " videos, there are " + str(count) + " files that has no audio."
def extract_mfcc(config): ''' Extract the mfcc feature from audio files. :param config: :return: ''' if not os.path.exists(config.mfcc_root_path): os.mkdir(config.mfcc_root_path) # read and concatenate train/validation/test video lists all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) # you may add your code to extract mfcc features and store them in numpy files... for now_video_label in all_video_label_list: vid_name = now_video_label[0] audio_full_fn = os.path.join(config.audio_root_path, vid_name+config.audio_file_format) mfcc_full_fn = os.path.join(config.mfcc_root_path, vid_name+config.mfcc_file_format) if os.path.exists(audio_full_fn): y, sr = librosa.load(audio_full_fn) data = librosa.feature.mfcc(y=y, sr=sr) np.save(mfcc_full_fn, data) else: print "File: "+audio_full_fn+" does not exist"
def get_mfcc_vecs(config, gps=['train'], stride=3): # read and concatenate train/validation/test video lists print("GET NUMPY MFCC FEATURES FOR %s with stride %d" % (' and '.join(gps), stride)) print("file path %s" % config.all_train_list_filename) all_video_label_list = [] if 'train' in gps: all_video_label_list += utils.get_video_and_label_list( config.all_train_list_filename) if 'val' in gps: all_video_label_list += utils.get_video_and_label_list( config.all_val_list_filename) if 'test' in gps: all_video_label_list += utils.get_video_and_label_list( config.all_test_list_filename) vid_names = set([item[0] for item in all_video_label_list]) all_files = os.listdir(config.mfcc_root_path) # Only include files that appear in numpy files all_files = [item for item in all_files if item[:-8] in vid_names] valid_audio_names = set( [item[:-8] for item in all_files if item[:-8] in vid_names]) valid_video_label_list = [ x for x in all_video_label_list if x[0] in valid_audio_names ] print("........TOTAL NUMBER OF INTERESTED VIDEOS %d" % len(vid_names)) print("........TOTAL NUMBER OF NUMPY FILES NEEDED TO BE INTERESTED IN %d" % len(all_files)) print("........TOTAL NUMBER OF AUDIO FILES NEEDED TO BE INTERESTED IN %d" % len(valid_audio_names)) print("........%d " % (len(valid_video_label_list))) mfcc_vec_num = 0 for mfcc_part_fn in all_files: npy = np.load(os.path.join(config.mfcc_root_path, mfcc_part_fn)) mfcc_vec_num += int((npy.shape[1] + stride - 1) / stride) print("Shape", npy.shape[0]) mfcc_vecs = np.empty(shape=(mfcc_vec_num, npy.shape[0])) print("........TOTAL MFCC VEC NUMS: %d" % mfcc_vecs.shape[0]) st = 0 vec_vid_names = [None] * mfcc_vecs.shape[0] for mfcc_part_fn in all_files: npy = np.load(os.path.join(config.mfcc_root_path, mfcc_part_fn))[:, 0::stride] en = st + npy.shape[1] vec_vid_names[st:en] = [mfcc_part_fn[:-8]] * (en - st) mfcc_vecs[st:en, :] = npy.transpose() st = en print("----------------------------------") np.save(os.path.join(config.mfcc_root_path, '_'.join(gps)), mfcc_vecs) return valid_video_label_list, mfcc_vecs, vec_vid_names
def gen_idt_bow_feat(config, rev_mode=False, exceptions=None): # load the k-means clustering centers from pickle file... kmeans = utils.read_object_from_pkl(config.idt_codebook_filename) num_centers = len(kmeans.cluster_centers_) # path for bow encoded features and raw IDT featureS idt_feat_root_path = config.idt_bow_root_path idt_raw_feat_root_path = config.idt_raw_root_path if not os.path.exists(idt_feat_root_path): os.mkdir(idt_feat_root_path) all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) # generate bag-of-words representations for videos IDT features using multiple threads handling... thread_pool = [] for now_video_label in all_video_label_list: vid_name = now_video_label[0] if vid_name in exceptions: continue # the original improved dense trajectory file... vid_raw_idt_filename = os.path.join( idt_raw_feat_root_path, vid_name + config.idt_raw_file_format) # the bag-of-words representation file... vid_idt_filename = os.path.join(idt_feat_root_path, vid_name + config.idt_bow_file_format) print "from: ", vid_raw_idt_filename, "---> to: ", vid_idt_filename # if the bag-of-words representation file already existed, skip... if os.path.isfile(vid_idt_filename): continue # block starting new threads, if current thread_pool is full while len(threading.enumerate()) >= max_th: pass # initiate a new thread for bag-of-words representation generation... now_th = threading.Thread(target=idt_bow_runner, args=[ vid_name, vid_raw_idt_filename, vid_idt_filename, num_centers, kmeans ]) now_th.start() thread_pool.append(now_th) # wait all threads to be finished... for th in thread_pool: th.join()
def evaluate_ap(config): # load the ground-truth file list gt_list_fn = "example_gt_and_pred/gt.lst" test_video_label_list = utils.get_video_and_label_list(gt_list_fn) val = 0 for event_id, event_name in config.event_id_name_dict.iteritems(): print "Evaluating the average precision (AP) with classifier ", event_id, " name: ", event_name, "..." # load the outputted prediction score files to calculate the average precision event_pred_score_fn = os.path.join("example_gt_and_pred", event_id + "_pred_score.lst") y_score = utils.read_score_list_from_file(event_pred_score_fn) y_gt = [] for now_video_label in test_video_label_list: vid_gt_label = now_video_label[1] if vid_gt_label == event_id: y_gt.append(1) else: y_gt.append(0) # the number of ground-truths and the number of prediction scores should be same assert (len(y_gt) == len(y_score)) val += average_precision_score(y_gt, y_score) print "Average precision: ", average_precision_score(y_gt, y_score) print "Finish evaluating the average precision (AP) metric on all classifiers...", val / 3.0
def surf_histogram_builder(config, model, gps_out): all_video_label_list = [] if 'train' in gps_out: all_video_label_list += utils.get_video_and_label_list( config.all_train_list_filename) if 'val' in gps_out: all_video_label_list += utils.get_video_and_label_list( config.all_val_list_filename) if 'test' in gps_out: all_video_label_list += utils.get_video_and_label_list( config.all_test_list_filename) surf_kps_root_path = os.path.join(config.dataset_root_path, "surf_feature") print("READ FROM %s %s %d" % (surf_kps_root_path, '_'.join(gps_out), len(all_video_label_list))) fnames = os.listdir(surf_kps_root_path) histx = np.empty(shape=(len(all_video_label_list), model.n_clusters * 2)) histy = [] cnt = 0 for now_video_label in all_video_label_list: prefix = now_video_label[0] + '_' matched_fnames = [x for x in fnames if prefix in x] tmpx = [] tmpy = [] histy.append(now_video_label[1]) hist_tmp = np.empty(shape=(len(matched_fnames), model.n_clusters)) cnt_frame = 0 for fname in matched_fnames: tmpxx = np.load(os.path.join(surf_kps_root_path, fname)) tmpx.append(tmpxx) if len(tmpxx) > 0: tmpyy = kmeans_test(model, tmpxx) for yy in tmpyy: hist_tmp[cnt_frame][yy] += 1 cnt_frame += 1 if len(tmpx) > 0: tmpx = np.concatenate(tmpx, axis=0) if hist_tmp is None or len(hist_tmp) == 0: cnt += 1 continue max_pool = np.max(hist_tmp, axis=0) avg_pool = np.mean(hist_tmp, axis=0) histx[cnt] = np.concatenate([max_pool, avg_pool], axis=0) print("deal with %dth file " % cnt) cnt += 1 return histx, histy
def get_surf_kps(config, gps=['train']): all_video_label_list = [] if 'train' in gps: all_video_label_list += utils.get_video_and_label_list( config.all_train_list_filename) if 'val' in gps: all_video_label_list += utils.get_video_and_label_list( config.all_val_list_filename) if 'test' in gps: all_video_label_list += utils.get_video_and_label_list( config.all_test_list_filename) print("get_surf_kps %d" % len(all_video_label_list)) surf_kps_root_path = os.path.join(config.dataset_root_path, "surf_feature") print("surf_kps_root_path %s" % surf_kps_root_path) video_set = set([x[0] + '_' for x in all_video_label_list]) fnames = sorted(os.listdir(surf_kps_root_path)) surf_kps_num = 0 npy = None rnd = 0 npyshape = None for fname in fnames: if fname[:-8] in video_set: npy = np.load(os.path.join(surf_kps_root_path, fname)) if npy is None or len(npy.shape) != 2: continue # npy = npy[rnd::100] # rnd += (rnd + 1) % 100 surf_kps_num += npy.shape[0] npyshape = npy.shape del npy print("NPY shape should be ", npyshape, surf_kps_num) surf_kps = np.empty(shape=(surf_kps_num, npyshape[1])) st = 0 for fname in fnames: if fname[:-8] in video_set: npy = np.load(os.path.join(surf_kps_root_path, fname)) if npy is None: continue en = st + npy.shape[0] surf_kps[st:en] = npy del npy return surf_kps
def extract_surf(): k = 256 all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) for now_video_label in all_video_label_list: vid_name = now_video_label[0] #print(vid_name) ds_file = os.path.join(config.ds_video_root_path, vid_name + config.ds_video_file_format) ds_file_video = cv2.VideoCapture(ds_file) key_frame = [] length = int(ds_file_video.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)) #print(length) c = 0 while (ds_file_video.isOpened()): ret, frame = ds_file_video.read(0) if ret != True: break if (c % 5 == 0): frame = frame.astype(np.uint8) try: surf = cv2.SURF(400) keypoints, descriptors = surf.detectAndCompute(frame, None) key_frame.append(descriptors) print(c, len(keypoints), descriptors.shape) except: print("No", vid_name) c = c + 1 ds_file_video.release() #print(frame) #print(frame.shape) #print(len(key_frame)) #print(c) np.save( os.path.join(config.surf_feat_path, vid_name + config.surf_feat_file_format), key_frame) '''
def vlad_surf(k): k_means_path = os.path.join(config.dataset_root_path,"kmeans_"+str(k)+"_surf_clusters.npy") k_means_clusters = np.load(k_means_path) all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) for now_video_label in all_video_label_list: vid_name = now_video_label[0] vid_kf=os.path.join(config.surf_feat_path,vid_name+config.surf_feat_file_format) vid_kf = np.load(vid_kf) k_dim = np.zeros((len(vid_kf),k)) surf_path=os.path.join(config.surf_vlad_path,vid_name+config.surf_vlad_file_format) print(len(vid_kf)) i = 0 for kf in vid_kf: k_dim = np.zeros((k,128)) for j in range(kf.shape[0]): index = np.argmin(np.linalg.norm(kf[j,:] - k_means_clusters,axis=1)) diff = kf[j,:] - k_means_clusters[index] k_dim[int(index)] = k_dim[int(index)]+diff k_dim = k_dim.flatten() k_dim = k_dim.reshape((1,k_dim.shape[0])) norm = np.linalg.norm(k_dim) k_dim = k_dim/norm if i == 0: k_dim_vid = k_dim else: k_dim_vid = np.vstack((k_dim_vid,k_dim)) i = i+1 k_dim_vid = np.mean(k_dim_vid,axis=0) k_dim_vid = k_dim_vid.reshape((1,k_dim_vid.shape[0])) np.save(surf_path,k_dim_vid)
def get_idt_data(config, gps): all_video_label_list = [] if 'train' in gps: all_video_label_list += utils.get_video_and_label_list(config.all_train_list_filename) if 'val' in gps: all_video_label_list += utils.get_video_and_label_list(config.all_val_list_filename) if 'test' in gps: all_video_label_list += utils.get_video_and_label_list(config.all_test_list_filename) idt_full_path = config.idt_bow_full_path res = np.zeros(shape=(len(all_video_label_list), 256)) cnt = 0 ys = [] for now_video_label in all_video_label_list: vid_name = now_video_label[0] ys.append(now_video_label[1]) fname = os.path.join(idt_full_path, vid_name) + '.npy' res[cnt] = np.load(fname) cnt += 1 return res, ys
def feat_ext(config, exceptions): avi_root_path = config.avi_video_root_path idt_raw_feat_root_path = config.idt_raw_root_path if not os.path.exists(idt_raw_feat_root_path): os.mkdir(idt_raw_feat_root_path) st = time.time() # get the combined training and testing video list all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) #TODO: extracting improved DenseTrajectory feature for med videos # Given video's filename: video_filename (in .avi format) and the improved dense trajectory feature filename: vid_idt_filename, # using the following command for iDT feature extraction... # Think about how to accelerate the feature extraction process ? thread_pool = [] for now_video_label in all_video_label_list: vid_name = now_video_label[0] if vid_name in exceptions: continue vid_filename = os.path.join(config.avi_video_root_path, vid_name + '.avi') vid_idt_filename = os.path.join(config.idt_raw_root_path, vid_name) cmd = "./DenseTrackStab %s -W 15 -s 6 -t 6 | gzip > %s" % ( vid_filename, vid_idt_filename) while len(threading.enumerate()) >= max_th: pass print( "Append another fine to the thread pool %s from %s to %s" % (vid_name, vid_filename, vid_idt_filename), (time.time() - st) * 1.0 / 60) now_th = threading.Thread(target=cmd_runner, args=[cmd]) now_th.start() thread_pool.append(now_th) for th in thread_pool: th.join() print "Finishing extracting improved DenseTrajecotory features..."
def kmeans_surf(k): all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) #utils.get_video_and_label_list(config.all_val_list_filename) i = 0 for now_video_label in all_video_label_list: vid_name = now_video_label[0] print(vid_name) vid_kf = os.path.join(config.surf_feat_path, vid_name + config.surf_feat_file_format) vid_kf = np.load(vid_kf) new_vid_kf = [] for each in vid_kf: index = int(np.ceil(0.02 * each.shape[0])) start = 0 end = each.shape[0] rows = random.sample(range(start, end), index) each = each[rows, ] new_vid_kf.append(each) vid_kf = new_vid_kf if i == 0: new = np.vstack(vid_kf) i = 1 else: temp = np.vstack(vid_kf) new = np.vstack((new, temp)) kmeans_input = new kmeans = KMeans(n_clusters=k, random_state=0).fit(kmeans_input) k_means_clusters = kmeans.cluster_centers_ k_means_path = os.path.join(config.dataset_root_path, "kmeans_" + str(k) + "_surf_clusters.npy") np.save(k_means_path, k_means_clusters)
def image_extraction(config): if not os.path.exists(config.ds_video_root_path): os.mkdir(config.ds_video_root_path) all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) thread_pool = [] cnt = 0 skip = 0 for now_video_label in all_video_label_list: cnt += 1 vid_name = now_video_label[0] vid_filename = os.path.join(config.video_root_path, vid_name + config.video_file_format) ds_vid_filename = os.path.join(config.ds_video_root_path, vid_name + config.video_file_format) keyframe_filename = ds_vid_filename.replace('down_samp_video', 'surf_images') keyframe_filename = keyframe_filename.replace('.mp4', '_\%04d.jpg') if not os.path.isfile(ds_vid_filename): skip += 1 continue print "Extract key frame for video : ", vid_filename, " to ", keyframe_filename ffmpeg_cmd = "ffmpeg -ss 0 -i %s -vf fps=3 -loglevel error %s -hide_banner" % ( ds_vid_filename, keyframe_filename) # print ffmpeg_cmd while len(threading.enumerate()) >= max_th: pass now_th = threading.Thread(target=cmd_runner, args=[ffmpeg_cmd]) now_th.start() thread_pool.append(now_th) print("Finish %d files Skip %d files " % (cnt - skip, skip)) for th in thread_pool: th.join()
def extract_audio(config): pass if not os.path.exists(config.audio_root_path): os.mkdir(config.audio_root_path) # read and concatenate train/validation/test video lists all_video_label_list=utils.get_video_and_label_list(config.all_train_list_filename)+\ utils.get_video_and_label_list(config.all_val_list_filename)+\ utils.get_video_and_label_list(config.all_test_list_filename) # iterate over the video list and call system command for ffmpeg audio extraction for now_video_label in all_video_label_list: vid_name = now_video_label[0] vid_full_fn = os.path.join(config.video_root_path, vid_name + config.video_file_format) audio_full_fn = os.path.join(config.audio_root_path, vid_name + config.audio_file_format) # call command line for audio track extraction command = "ffmpeg -y -i %s -ac 1 -f wav %s" % (vid_full_fn, audio_full_fn) os.system(command)
def wrap_mfcc_data(config, tags=['train']): all_video_label_list = [] if 'train' in tags: all_video_label_list += utils.get_video_and_label_list( config.all_train_list_filename) if 'val' in tags: all_video_label_list += utils.get_video_and_label_list( config.all_val_list_filename) if 'test' in tags: all_video_label_list += utils.get_video_and_label_list( config.all_test_list_filename) print("TOTAL NUMBER OF DOCS %d " % len(all_video_label_list)) root_path = os.path.join(config.dataset_root_path, 'pos_mfcc') vid_names = set([item[0] for item in all_video_label_list]) all_files = os.listdir(root_path) x = np.zeros(shape=(len(all_video_label_list), 200)) y = [] cnt1 = 0 cnt2 = 0 cnt3 = 0 for now_video_label in all_video_label_list: vid_name = now_video_label[0] label = 0 if '1' in now_video_label[1]: label = 1 cnt1 += 1 if '2' in now_video_label[1]: label = 2 cnt2 += 1 if '3' in now_video_label[1]: label = 3 cnt3 += 1 fpath = os.path.join(root_path, vid_name + '.npy') if os.path.exists(fpath): subx = np.load(os.path.join(root_path, vid_name + '.npy')) x[len(y), :] = subx y.append(label) print(x.shape, len(y), cnt1, cnt2, cnt3) return x, y
def test(k): with open(os.path.join(config.cluster_classifiers, 'svm1_cnn.pkl'), 'rb') as f1: svm_clf_1 = pickle.load(f1) with open(os.path.join(config.cluster_classifiers, 'svm2_cnn.pkl'), 'rb') as f2: svm_clf_2 = pickle.load(f2) with open(os.path.join(config.cluster_classifiers, 'svm3_cnn.pkl'), 'rb') as f3: svm_clf_3 = pickle.load(f3) X = np.zeros((1, k)) Y_1 = [] Y_2 = [] Y_3 = [] with open(os.path.join(config.score, 'gt_cnn.lst'), 'wb') as f: val_video_label_list = utils.get_video_and_label_list( config.all_test_list_filename) i = 0 for now_video_label in val_video_label_list: vid_name = now_video_label[0] vid_label = now_video_label[1] cnn_feature_file = os.path.join( config.cnn_feat_path, vid_name + config.cnn_feat_file_format) if os.path.isfile(cnn_feature_file): #print(i,vid_name) cnn_feature = np.load(cnn_feature_file) X = np.vstack([X, cnn_feature]) i = i + 1 f.write(now_video_label[0] + " " + now_video_label[1]) f.write("\n") f.close() X = X[:][1:] #X = X.astype(np.float) Y_1 = np.array(Y_1) Y_2 = np.array(Y_2) Y_3 = np.array(Y_3) svm_predicted_1 = svm_clf_1.decision_function(X) svm_predicted_2 = svm_clf_2.decision_function(X) svm_predicted_3 = svm_clf_3.decision_function(X) np.savetxt(os.path.join(config.score, "P001_cnn.lst"), svm_predicted_1) np.savetxt(os.path.join(config.score, "P002_cnn.lst"), svm_predicted_2) np.savetxt(os.path.join(config.score, "P003_cnn.lst"), svm_predicted_3)
def train_kmeans(config): ''' Train K-means from the bag-of-words vectors. :param config: configurations :return: ''' if not os.path.exists("models"): os.mkdir("models") all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) # the number of bag-of-words centers list_of_data = [] for now_video_label in all_video_label_list: vid_name = now_video_label[0] asr_bow_filename = os.path.join(config.asr_bow_root_path, vid_name + config.asr_bow_file_format) if os.path.exists(asr_bow_filename): data = np.load(asr_bow_filename) for i in range(data.shape[0]): list_of_data.append(data[i]) else: print "File: " + asr_bow_filename + " does not exist" array_of_data = np.array(list_of_data) print array_of_data.shape print type(array_of_data) print "Going to do k-means" data_kmean = KMeans(n_clusters=200).fit(array_of_data) print "K-means" print type(data_kmean) centroids = data_kmean.labels_ print centroids, type(centroids) joblib.dump(data_kmean, 'models/kmeans_asr.pkl')
def soundnet_bow(config, conv, size, k): all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) i = 1 for now_video_label in all_video_label_list: vid_name = now_video_label[0] sn_filename = os.path.join( config.soundnet_root_path, vid_name + conv + config.soundnet_file_format) if os.path.isfile(sn_filename): sn = np.load(sn_filename) sn = sn["arr_0"] sn = sn.reshape(-1, sn.shape[-1]) sn = sn.T #index = int(np.floor(0.20*sn.shape[1])) #start = 0 #end = sn.shape[1] #cols = random.sample(range(start, end), index) #sn = sn[:,cols] if i == 1: sn_vec = sn.T else: sn_vec = np.concatenate((sn_vec, sn.T), axis=0) i = i + 1 kmeans = KMeans(n_clusters=k, random_state=0).fit(sn_vec) k_means_clusters = kmeans.cluster_centers_ k_means_path = os.path.join( config.cluster_classifiers, "kmeans_" + str(k) + "" + str(conv) + "_sn_clusters.npy") np.save(k_means_path, k_means_clusters) all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) k_means_path = os.path.join( config.cluster_classifiers, "kmeans_" + str(k) + "" + str(conv) + "_sn_clusters.npy") k_means_clusters = np.load(k_means_path) for now_video_label in all_video_label_list: vid_name = now_video_label[0] soundnet_filename = os.path.join( config.soundnet_root_path, vid_name + conv + config.soundnet_file_format) fea = os.path.join(config.soundnet_fea_bow, vid_name + conv + config.soundnet_fea_file_format) if os.path.isfile(soundnet_filename): sn = np.load(soundnet_filename) sn = sn["arr_0"] sn = sn.reshape(-1, sn.shape[-1]) k_dim = np.zeros((1, k)) for j in range(sn.shape[0]): index = np.argmin( np.linalg.norm(sn[j, :] - k_means_clusters, axis=1)) k_dim[0][int(index)] = k_dim[0][int(index)] + 1 np.save(fea, k_dim) else: np.save(fea, np.zeros((1, k)))
truth_y = [ 3 if '3' in yy else 2 if '2' in yy else 1 if '1' in yy else 0 for xx, yy in name_with_labels ] assert (resx.shape[0] == len(truth_y)) return resx, truth_y, id2name, name2id trainxx, trainyy, _, _ = wrap_svc_data(train_y, train_name_with_labels, train_name) print(trainxx.shape, len(trainyy), trainyy[:20]) valxx, valyy, id2name, name2id = wrap_svc_data(val_y, val_name_with_labels, val_name) val_all = utils.get_video_and_label_list(config.all_val_list_filename) val_all = [x for x, y in val_all] ''' trainxx = np.load('./trainxx.npy') trainyy = np.load('./trainyy.npy') valxx = np.load('./valxx.npy') valyy = np.load('./valyy.npy') ''' print("TRAINING SVM BEGINS") clf, scl = svcclf.train(trainxx, trainyy, True) print("TESTING SVM BEGINS") y_pred, y_proba = svcclf.test(clf, valxx, scl) full_y_pred = [0] * len(val_all) print("TOTAL NUMBER OF VALID VALIDATION DATA %d / %d" % (len(name2id), len(val_all)))
def train(k): X = np.zeros((1, k)) Y_1 = [] Y_2 = [] Y_3 = [] train_video_label_list = utils.get_video_and_label_list( config.all_train_list_filename) + utils.get_video_and_label_list( config.all_val_list_filename) for now_video_label in train_video_label_list: vid_name = now_video_label[0] vid_label = now_video_label[1] cnn_feature_file = os.path.join(config.cnn_feat_path, vid_name + config.cnn_feat_file_format) if os.path.isfile(cnn_feature_file): cnn_feature = np.load(cnn_feature_file) X = np.vstack([X, cnn_feature]) if vid_label == "P001": Y_1.extend([1]) else: Y_1.extend([0]) if vid_label == "P002": Y_2.extend([1]) else: Y_2.extend([0]) if vid_label == "P003": Y_3.extend([1]) else: Y_3.extend([0]) X = X[:][1:] tuned_parameters = {'C': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]} svm_clf_1 = LinearSVC() svm_clf_1.fit(X, Y_1) svm_clf_2 = LinearSVC() svm_clf_2.fit(X, Y_2) svm_clf_3 = LinearSVC(C=100) svm_clf_3.fit(X, Y_3) with open(os.path.join(config.cluster_classifiers, 'svm1_cnn.pkl'), 'wb') as f1: pickle.dump(svm_clf_1, f1) with open(os.path.join(config.cluster_classifiers, 'svm2_cnn.pkl'), 'wb') as f2: pickle.dump(svm_clf_2, f2) with open(os.path.join(config.cluster_classifiers, 'svm3_cnn.pkl'), 'wb') as f3: pickle.dump(svm_clf_3, f3)
from keras.applications.imagenet_utils import preprocess_input import os import sys sys.path.append("../") import utils import configs.hw2_config as config import pdb import scipy.misc if __name__ == '__main__': shape = (224, 224) mod = VGG19(weights='imagenet') model = Model(inputs=mod.input,outputs=mod.layers[-1].output) all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \ utils.get_video_and_label_list(config.all_test_list_filename) + \ utils.get_video_and_label_list(config.all_val_list_filename) for now_video_label in all_video_label_list: vid_name = now_video_label[0] ds_file=os.path.join(config.ds_video_root_path,vid_name+config.ds_video_file_format) cnn_file=os.path.join(config.cnn_feat_path,vid_name+config.cnn_feat_file_format) if(os.path.exists(cnn_file)): continue #print(vid_name) clip = VideoFileClip(ds_file) #frames = [idx for idx, x in enumerate(clip.iter_frames()) if idx % 5 == 0] #pdb.set_trace() frames = [scipy.misc.imresize(x, shape) for idx, x in enumerate(clip.iter_frames()) if idx % 50 == 0] #print(len(frames))