예제 #1
0
def get_bow_vec(config):

    #   read and concatenate train/validation/test video lists
    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename)

    #   the number of bag-of-words centers
    if not os.path.exists(config.asr_bow_root_path):
        os.mkdir(config.asr_bow_root_path)

    vocab_book = utils.read_object_from_pkl(config.cmu_asr_vocabbook_filename)

    word_len = len(vocab_book)
    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        asr_filename = os.path.join(config.cmu_asr_root_path,
                                    vid_name + config.cmu_asr_file_format)
        asr_bow_filename = os.path.join(config.asr_bow_root_path,
                                        vid_name + config.asr_bow_file_format)

        word_list = utils.read_object_from_pkl(asr_filename)

        asr_bow_vec = np.zeros((1, word_len))

        #   we randomly set the Bag-of-Words representation vector
        #   according to the number of words in ASR transcription file (this is absolutely ridiculous:)
        asr_bow_vec[0, len(word_list) % word_len] = 1

        np.save(asr_bow_filename, asr_bow_vec)
예제 #2
0
def get_cnn_fts(config, gps = ['train']):
    all_video_label_list = []
    if 'train' in gps:
        all_video_label_list += utils.get_video_and_label_list(config.all_train_list_filename)
    if 'val' in gps:
        all_video_label_list += utils.get_video_and_label_list(config.all_val_list_filename)
    if 'test' in gps:
        all_video_label_list += utils.get_video_and_label_list(config.all_test_list_filename)
    print("get_surf_kps %d"%len(all_video_label_list))
    
    cnn_fts_path = os.path.join(config.dataset_root_path, "vgg16_fts")
    fnames = os.listdir(cnn_fts_path)
    xs = []
    ys = []
    vids = []
    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        label = now_video_label[1]

        tmp_name = vid_name +'_'
        files = [fname for fname in fnames if tmp_name in fname]
        for fname in files[:200]:
            npy = np.load(os.path.join(cnn_fts_path, fname)).reshape(-1)[0::6]
            if npy is None:
                continue
            xs.append(npy)
            ys.append(label)
            vids.append(vid_name)
        print("FINISH %d"%(len(ys) / 100))
    return np.array(xs), ys, vids 
예제 #3
0
def bow_surf(k):

    k_means_path = os.path.join(config.dataset_root_path,
                                "kmeans_" + str(k) + "_surf_clusters.npy")
    k_means_clusters = np.load(k_means_path)

    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                              utils.get_video_and_label_list(config.all_test_list_filename) + \
                              utils.get_video_and_label_list(config.all_val_list_filename)

    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        vid_kf = os.path.join(config.surf_feat_path,
                              vid_name + config.surf_feat_file_format)
        vid_kf = np.load(vid_kf)
        k_dim = np.zeros((len(vid_kf), k))
        surf_path = os.path.join(config.surf_bow_path,
                                 vid_name + config.surf_bow_file_format)
        print(len(vid_kf))
        i = 0
        for kf in vid_kf:
            for j in range(kf.shape[0]):
                index = np.argmin(
                    np.linalg.norm(kf[j, :] - k_means_clusters, axis=1))
                k_dim[i][int(index)] = k_dim[i][int(index)] + 1
            i = i + 1

        k_dim_vid = np.mean(k_dim, axis=0)
        k_dim_vid = k_dim_vid.reshape((1, k_dim_vid.shape[0]))
        np.save(surf_path, k_dim_vid)
예제 #4
0
def get_bow_vec(config):
    '''
    Generate ASR bag of words vector and save them.
    :param config: configurations
    :return:
    '''
    #   read and concatenate train/validation/test video lists
    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename)

    #   the number of bag-of-words centers
    if not os.path.exists(config.asr_bow_root_path):
        os.mkdir(config.asr_bow_root_path)

    vocab_book = utils.read_object_from_pkl(config.cmu_asr_vocabbook_filename)
    word_len = len(vocab_book)

    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        asr_filename = os.path.join(config.cmu_asr_root_path,vid_name+config.cmu_asr_file_format)
        asr_bow_filename = os.path.join(config.asr_bow_root_path,vid_name+config.asr_bow_file_format)

        if os.path.exists(asr_filename):
            word_list = utils.read_object_from_pkl(asr_filename)
            asr_bow_vec = np.zeros((1, word_len))
            #   we randomly set the Bag-of-Words representation vector
            #   according to the number of words in ASR transcription file
            asr_bow_vec[0, len(word_list)%word_len] = 1

            np.save(asr_bow_filename, asr_bow_vec)
        else:
            print "File " + asr_filename + " does not exist"
예제 #5
0
def asr_vectors(config):
    '''
    Generate ASR features.
    :param config:
    :return:
    '''
    if not os.path.exists(config.asr_bow_feature_path):
        os.mkdir(config.asr_bow_feature_path)

    #   read and concatenate train/validation/test video lists
    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename)

    km = joblib.load('models/kmeans_asr.pkl')
    print "K-means model loaded success"

    for now_video_label in all_video_label_list:
        # print now_video_label
        vid_name = now_video_label[0]

        asr_bow_filename = os.path.join(config.asr_bow_root_path, vid_name + config.asr_bow_file_format)
        asr_feature_full_fn = os.path.join(config.asr_bow_feature_path, vid_name + config.asr_bow_feature_format)

        if os.path.exists(asr_bow_filename):
            data = np.load(asr_bow_filename)
            pred = km.predict(data)

            closest_counts = np.bincount(pred, minlength=200)
            closest_counts = closest_counts.reshape(1, closest_counts.shape[0])
            np.save(asr_feature_full_fn, closest_counts)
            print "{} feature vector done. ".format(vid_name)
        else:
            print "File: " + asr_bow_filename + " does not exist"
예제 #6
0
def video_downsample(config,ds_vid_len,ds_vid_frame_rate):
    if not os.path.exists(config.ds_video_root_path):
        os.mkdir(config.ds_video_root_path)

    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename)

    thread_pool=[]

    for now_video_label in all_video_label_list:

        vid_name=now_video_label[0]
        vid_filename=os.path.join(config.video_root_path,vid_name+config.video_file_format)
        ds_vid_filename=os.path.join(config.ds_video_root_path,vid_name+config.video_file_format)

        if os.path.isfile(ds_vid_filename):
            continue

        assert(os.path.isfile(vid_filename))
        print "Down-sampling video : ",vid_filename

        ffmpeg_cmd="ffmpeg -y -ss 0 -i %s -strict experimental -t %d -r %d %s"%(vid_filename,ds_vid_len,ds_vid_frame_rate,ds_vid_filename)
        print ffmpeg_cmd

        while len(threading.enumerate())>=max_th:
            pass

        now_th=threading.Thread(target=cmd_runner,args=[ffmpeg_cmd])
        now_th.start()
        thread_pool.append(now_th)

    for th in thread_pool:
        th.join()
예제 #7
0
def extract_mfcc(config):
    pass

    #   read and concatenate train/validation/test video lists
    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename)
예제 #8
0
def extract_audio(config):
    '''
    Extract the audio track from original mp4 videos using ffmpeg
    :param config: configurations
    :return:
    '''
    if not os.path.exists(config.audio_root_path):
        os.mkdir(config.audio_root_path)

    #   read and concatenate train/validation/test video lists
    all_video_label_list=utils.get_video_and_label_list(config.all_train_list_filename)+\
                         utils.get_video_and_label_list(config.all_val_list_filename)+\
                         utils.get_video_and_label_list(config.all_test_list_filename)

    count = 0
    #   iterate over the video list and call system command for ffmpeg audio extraction
    for now_video_label in all_video_label_list:
        # print now_video_label
        vid_name = now_video_label[0]
        vid_full_fn = os.path.join(config.video_root_path, vid_name+config.video_file_format)
        audio_full_fn = os.path.join(config.audio_root_path, vid_name+config.audio_file_format)

        if not os.path.exists(audio_full_fn):
            #   call command line for audio track extraction
            command = "ffmpeg -y -i %s -ac 1 -f wav %s" % (vid_full_fn, audio_full_fn)
            os.system(command)
            count += 1
        else:
            print "File " + audio_full_fn + " already exist."

    print "In total number of " + str(len(all_video_label_list)) + " videos, there are " + str(count) + " files that has no audio."
예제 #9
0
def extract_mfcc(config):
    '''
    Extract the mfcc feature from audio files.
    :param config:
    :return:
    '''

    if not os.path.exists(config.mfcc_root_path):
        os.mkdir(config.mfcc_root_path)

    #   read and concatenate train/validation/test video lists
    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename)

    #   you may add your code to extract mfcc features and store them in numpy files...
    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        audio_full_fn = os.path.join(config.audio_root_path, vid_name+config.audio_file_format)
        mfcc_full_fn = os.path.join(config.mfcc_root_path, vid_name+config.mfcc_file_format)

        if os.path.exists(audio_full_fn):
            y, sr = librosa.load(audio_full_fn)
            data = librosa.feature.mfcc(y=y, sr=sr)
            np.save(mfcc_full_fn, data)
        else:
            print "File: "+audio_full_fn+" does not exist"
예제 #10
0
def get_mfcc_vecs(config, gps=['train'], stride=3):
    #   read and concatenate train/validation/test video lists
    print("GET NUMPY MFCC FEATURES FOR %s with stride %d" %
          (' and '.join(gps), stride))
    print("file path %s" % config.all_train_list_filename)
    all_video_label_list = []
    if 'train' in gps:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_train_list_filename)
    if 'val' in gps:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_val_list_filename)
    if 'test' in gps:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_test_list_filename)

    vid_names = set([item[0] for item in all_video_label_list])
    all_files = os.listdir(config.mfcc_root_path)
    # Only include files that appear in numpy files
    all_files = [item for item in all_files if item[:-8] in vid_names]
    valid_audio_names = set(
        [item[:-8] for item in all_files if item[:-8] in vid_names])
    valid_video_label_list = [
        x for x in all_video_label_list if x[0] in valid_audio_names
    ]
    print("........TOTAL NUMBER OF INTERESTED VIDEOS %d" % len(vid_names))
    print("........TOTAL NUMBER OF NUMPY FILES NEEDED TO BE INTERESTED IN %d" %
          len(all_files))

    print("........TOTAL NUMBER OF AUDIO FILES NEEDED TO BE INTERESTED IN %d" %
          len(valid_audio_names))
    print("........%d " % (len(valid_video_label_list)))
    mfcc_vec_num = 0
    for mfcc_part_fn in all_files:
        npy = np.load(os.path.join(config.mfcc_root_path, mfcc_part_fn))
        mfcc_vec_num += int((npy.shape[1] + stride - 1) / stride)
    print("Shape", npy.shape[0])
    mfcc_vecs = np.empty(shape=(mfcc_vec_num, npy.shape[0]))
    print("........TOTAL MFCC VEC NUMS: %d" % mfcc_vecs.shape[0])

    st = 0

    vec_vid_names = [None] * mfcc_vecs.shape[0]
    for mfcc_part_fn in all_files:
        npy = np.load(os.path.join(config.mfcc_root_path,
                                   mfcc_part_fn))[:, 0::stride]
        en = st + npy.shape[1]
        vec_vid_names[st:en] = [mfcc_part_fn[:-8]] * (en - st)
        mfcc_vecs[st:en, :] = npy.transpose()
        st = en

    print("----------------------------------")
    np.save(os.path.join(config.mfcc_root_path, '_'.join(gps)), mfcc_vecs)

    return valid_video_label_list, mfcc_vecs, vec_vid_names
예제 #11
0
def gen_idt_bow_feat(config, rev_mode=False, exceptions=None):

    # load the k-means clustering centers from pickle file...
    kmeans = utils.read_object_from_pkl(config.idt_codebook_filename)
    num_centers = len(kmeans.cluster_centers_)

    # path for bow encoded features and raw IDT featureS
    idt_feat_root_path = config.idt_bow_root_path
    idt_raw_feat_root_path = config.idt_raw_root_path

    if not os.path.exists(idt_feat_root_path):
        os.mkdir(idt_feat_root_path)


    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename)

    # generate bag-of-words representations for videos IDT features using multiple threads handling...
    thread_pool = []
    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        if vid_name in exceptions:
            continue
        #   the original improved dense trajectory file...
        vid_raw_idt_filename = os.path.join(
            idt_raw_feat_root_path, vid_name + config.idt_raw_file_format)

        #   the bag-of-words representation file...
        vid_idt_filename = os.path.join(idt_feat_root_path,
                                        vid_name + config.idt_bow_file_format)

        print "from: ", vid_raw_idt_filename, "---> to: ", vid_idt_filename

        #   if the bag-of-words representation file already existed, skip...
        if os.path.isfile(vid_idt_filename):
            continue

        # block starting new threads, if current thread_pool is full
        while len(threading.enumerate()) >= max_th:
            pass

        # initiate a new thread for bag-of-words representation generation...
        now_th = threading.Thread(target=idt_bow_runner,
                                  args=[
                                      vid_name, vid_raw_idt_filename,
                                      vid_idt_filename, num_centers, kmeans
                                  ])
        now_th.start()
        thread_pool.append(now_th)

    # wait all threads to be finished...
    for th in thread_pool:
        th.join()
예제 #12
0
def evaluate_ap(config):

    #   load the ground-truth file list
    gt_list_fn = "example_gt_and_pred/gt.lst"
    test_video_label_list = utils.get_video_and_label_list(gt_list_fn)
    val = 0
    for event_id, event_name in config.event_id_name_dict.iteritems():
        print "Evaluating the average precision (AP) with classifier ", event_id, " name: ", event_name, "..."

        #   load the outputted prediction score files to calculate the average precision
        event_pred_score_fn = os.path.join("example_gt_and_pred",
                                           event_id + "_pred_score.lst")
        y_score = utils.read_score_list_from_file(event_pred_score_fn)

        y_gt = []
        for now_video_label in test_video_label_list:
            vid_gt_label = now_video_label[1]
            if vid_gt_label == event_id:
                y_gt.append(1)
            else:
                y_gt.append(0)

        #   the number of ground-truths and the number of prediction scores should be same
        assert (len(y_gt) == len(y_score))
        val += average_precision_score(y_gt, y_score)
        print "Average precision: ", average_precision_score(y_gt, y_score)

    print "Finish evaluating the average precision (AP) metric on all classifiers...", val / 3.0
예제 #13
0
def surf_histogram_builder(config, model, gps_out):
    all_video_label_list = []
    if 'train' in gps_out:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_train_list_filename)
    if 'val' in gps_out:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_val_list_filename)
    if 'test' in gps_out:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_test_list_filename)

    surf_kps_root_path = os.path.join(config.dataset_root_path, "surf_feature")
    print("READ FROM %s %s %d" %
          (surf_kps_root_path, '_'.join(gps_out), len(all_video_label_list)))
    fnames = os.listdir(surf_kps_root_path)
    histx = np.empty(shape=(len(all_video_label_list), model.n_clusters * 2))
    histy = []
    cnt = 0
    for now_video_label in all_video_label_list:
        prefix = now_video_label[0] + '_'
        matched_fnames = [x for x in fnames if prefix in x]
        tmpx = []
        tmpy = []
        histy.append(now_video_label[1])
        hist_tmp = np.empty(shape=(len(matched_fnames), model.n_clusters))
        cnt_frame = 0
        for fname in matched_fnames:
            tmpxx = np.load(os.path.join(surf_kps_root_path, fname))
            tmpx.append(tmpxx)
            if len(tmpxx) > 0:
                tmpyy = kmeans_test(model, tmpxx)
            for yy in tmpyy:
                hist_tmp[cnt_frame][yy] += 1
            cnt_frame += 1
        if len(tmpx) > 0:
            tmpx = np.concatenate(tmpx, axis=0)
        if hist_tmp is None or len(hist_tmp) == 0:
            cnt += 1
            continue
        max_pool = np.max(hist_tmp, axis=0)
        avg_pool = np.mean(hist_tmp, axis=0)
        histx[cnt] = np.concatenate([max_pool, avg_pool], axis=0)
        print("deal with %dth file " % cnt)
        cnt += 1
    return histx, histy
예제 #14
0
def get_surf_kps(config, gps=['train']):
    all_video_label_list = []
    if 'train' in gps:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_train_list_filename)
    if 'val' in gps:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_val_list_filename)
    if 'test' in gps:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_test_list_filename)
    print("get_surf_kps %d" % len(all_video_label_list))

    surf_kps_root_path = os.path.join(config.dataset_root_path, "surf_feature")
    print("surf_kps_root_path %s" % surf_kps_root_path)
    video_set = set([x[0] + '_' for x in all_video_label_list])
    fnames = sorted(os.listdir(surf_kps_root_path))
    surf_kps_num = 0
    npy = None
    rnd = 0
    npyshape = None
    for fname in fnames:
        if fname[:-8] in video_set:
            npy = np.load(os.path.join(surf_kps_root_path, fname))
            if npy is None or len(npy.shape) != 2:
                continue
            # npy = npy[rnd::100]
            # rnd += (rnd + 1) % 100
            surf_kps_num += npy.shape[0]
            npyshape = npy.shape
            del npy
    print("NPY shape should be ", npyshape, surf_kps_num)
    surf_kps = np.empty(shape=(surf_kps_num, npyshape[1]))

    st = 0
    for fname in fnames:
        if fname[:-8] in video_set:
            npy = np.load(os.path.join(surf_kps_root_path, fname))
            if npy is None:
                continue
            en = st + npy.shape[0]
            surf_kps[st:en] = npy
            del npy
    return surf_kps
def extract_surf():
    k = 256
    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                              utils.get_video_and_label_list(config.all_test_list_filename) + \
                              utils.get_video_and_label_list(config.all_val_list_filename)

    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        #print(vid_name)
        ds_file = os.path.join(config.ds_video_root_path,
                               vid_name + config.ds_video_file_format)
        ds_file_video = cv2.VideoCapture(ds_file)

        key_frame = []

        length = int(ds_file_video.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT))
        #print(length)
        c = 0
        while (ds_file_video.isOpened()):

            ret, frame = ds_file_video.read(0)

            if ret != True:
                break
            if (c % 5 == 0):
                frame = frame.astype(np.uint8)
                try:
                    surf = cv2.SURF(400)
                    keypoints, descriptors = surf.detectAndCompute(frame, None)
                    key_frame.append(descriptors)
                    print(c, len(keypoints), descriptors.shape)
                except:
                    print("No", vid_name)
            c = c + 1
        ds_file_video.release()
        #print(frame)
        #print(frame.shape)
        #print(len(key_frame))

        #print(c)
        np.save(
            os.path.join(config.surf_feat_path,
                         vid_name + config.surf_feat_file_format), key_frame)
        '''
def vlad_surf(k):

	k_means_path = os.path.join(config.dataset_root_path,"kmeans_"+str(k)+"_surf_clusters.npy")
        k_means_clusters = np.load(k_means_path)

        all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename)

        for now_video_label in all_video_label_list:
                vid_name = now_video_label[0]
                vid_kf=os.path.join(config.surf_feat_path,vid_name+config.surf_feat_file_format)
                vid_kf = np.load(vid_kf)
                k_dim = np.zeros((len(vid_kf),k))
                surf_path=os.path.join(config.surf_vlad_path,vid_name+config.surf_vlad_file_format)
                print(len(vid_kf))
                i = 0
                for kf in vid_kf:

			k_dim = np.zeros((k,128))

                        for j in range(kf.shape[0]):
				
                                index =  np.argmin(np.linalg.norm(kf[j,:] - k_means_clusters,axis=1))
                                diff = kf[j,:] - k_means_clusters[index]
			        k_dim[int(index)] = k_dim[int(index)]+diff

                        k_dim = k_dim.flatten()
			k_dim = k_dim.reshape((1,k_dim.shape[0]))

			norm = np.linalg.norm(k_dim)
                	k_dim = k_dim/norm
			
			if i == 0:
				k_dim_vid = k_dim
			else:
				k_dim_vid = np.vstack((k_dim_vid,k_dim))

			i = i+1

		k_dim_vid = np.mean(k_dim_vid,axis=0)
                k_dim_vid = k_dim_vid.reshape((1,k_dim_vid.shape[0]))
                np.save(surf_path,k_dim_vid)
예제 #17
0
def get_idt_data(config, gps):
    all_video_label_list = []
    if 'train' in gps:
        all_video_label_list += utils.get_video_and_label_list(config.all_train_list_filename)
    if 'val' in gps:
        all_video_label_list += utils.get_video_and_label_list(config.all_val_list_filename)
    if 'test' in gps:
        all_video_label_list += utils.get_video_and_label_list(config.all_test_list_filename)
    idt_full_path = config.idt_bow_full_path
    res = np.zeros(shape=(len(all_video_label_list), 256))
    cnt = 0
    ys = []
    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        ys.append(now_video_label[1])
        fname = os.path.join(idt_full_path, vid_name) + '.npy'
        res[cnt] = np.load(fname)
        cnt += 1
    return res, ys
예제 #18
0
def feat_ext(config, exceptions):
    avi_root_path = config.avi_video_root_path
    idt_raw_feat_root_path = config.idt_raw_root_path
    if not os.path.exists(idt_raw_feat_root_path):
        os.mkdir(idt_raw_feat_root_path)

    st = time.time()

    # get the combined training and testing video list
    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename)

    #TODO: extracting improved DenseTrajectory feature for med videos

    # Given video's filename: video_filename (in .avi format) and the improved dense trajectory feature filename: vid_idt_filename,
    # using the following command for iDT feature extraction...
    # Think about how to accelerate the feature extraction process ?
    thread_pool = []
    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        if vid_name in exceptions:
            continue
        vid_filename = os.path.join(config.avi_video_root_path,
                                    vid_name + '.avi')
        vid_idt_filename = os.path.join(config.idt_raw_root_path, vid_name)
        cmd = "./DenseTrackStab  %s -W 15 -s 6 -t 6 | gzip > %s" % (
            vid_filename, vid_idt_filename)
        while len(threading.enumerate()) >= max_th:
            pass

        print(
            "Append another fine to the thread pool %s from %s to %s" %
            (vid_name, vid_filename, vid_idt_filename),
            (time.time() - st) * 1.0 / 60)
        now_th = threading.Thread(target=cmd_runner, args=[cmd])
        now_th.start()
        thread_pool.append(now_th)

    for th in thread_pool:
        th.join()
    print "Finishing extracting improved DenseTrajecotory features..."
예제 #19
0
def kmeans_surf(k):

    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
       utils.get_video_and_label_list(config.all_val_list_filename)
    #utils.get_video_and_label_list(config.all_val_list_filename)
    i = 0

    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        print(vid_name)
        vid_kf = os.path.join(config.surf_feat_path,
                              vid_name + config.surf_feat_file_format)
        vid_kf = np.load(vid_kf)

        new_vid_kf = []

        for each in vid_kf:

            index = int(np.ceil(0.02 * each.shape[0]))

            start = 0
            end = each.shape[0]

            rows = random.sample(range(start, end), index)
            each = each[rows, ]
            new_vid_kf.append(each)

        vid_kf = new_vid_kf

        if i == 0:
            new = np.vstack(vid_kf)
            i = 1
        else:
            temp = np.vstack(vid_kf)
            new = np.vstack((new, temp))

    kmeans_input = new
    kmeans = KMeans(n_clusters=k, random_state=0).fit(kmeans_input)
    k_means_clusters = kmeans.cluster_centers_
    k_means_path = os.path.join(config.dataset_root_path,
                                "kmeans_" + str(k) + "_surf_clusters.npy")
    np.save(k_means_path, k_means_clusters)
예제 #20
0
def image_extraction(config):
    if not os.path.exists(config.ds_video_root_path):
        os.mkdir(config.ds_video_root_path)

    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename)

    thread_pool = []
    cnt = 0
    skip = 0
    for now_video_label in all_video_label_list:
        cnt += 1
        vid_name = now_video_label[0]
        vid_filename = os.path.join(config.video_root_path,
                                    vid_name + config.video_file_format)
        ds_vid_filename = os.path.join(config.ds_video_root_path,
                                       vid_name + config.video_file_format)
        keyframe_filename = ds_vid_filename.replace('down_samp_video',
                                                    'surf_images')
        keyframe_filename = keyframe_filename.replace('.mp4', '_\%04d.jpg')
        if not os.path.isfile(ds_vid_filename):
            skip += 1
            continue

        print "Extract key frame for video : ", vid_filename, " to ", keyframe_filename

        ffmpeg_cmd = "ffmpeg -ss 0 -i %s -vf fps=3 -loglevel error %s -hide_banner" % (
            ds_vid_filename, keyframe_filename)
        # print ffmpeg_cmd

        while len(threading.enumerate()) >= max_th:
            pass

        now_th = threading.Thread(target=cmd_runner, args=[ffmpeg_cmd])
        now_th.start()
        thread_pool.append(now_th)
        print("Finish %d files Skip %d files " % (cnt - skip, skip))
    for th in thread_pool:
        th.join()
예제 #21
0
def extract_audio(config):
    pass
    if not os.path.exists(config.audio_root_path):
        os.mkdir(config.audio_root_path)

    #   read and concatenate train/validation/test video lists
    all_video_label_list=utils.get_video_and_label_list(config.all_train_list_filename)+\
                         utils.get_video_and_label_list(config.all_val_list_filename)+\
                         utils.get_video_and_label_list(config.all_test_list_filename)

    #   iterate over the video list and call system command for ffmpeg audio extraction
    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        vid_full_fn = os.path.join(config.video_root_path,
                                   vid_name + config.video_file_format)
        audio_full_fn = os.path.join(config.audio_root_path,
                                     vid_name + config.audio_file_format)

        #   call command line for audio track extraction
        command = "ffmpeg -y -i %s -ac 1 -f wav %s" % (vid_full_fn,
                                                       audio_full_fn)
        os.system(command)
예제 #22
0
def wrap_mfcc_data(config, tags=['train']):
    all_video_label_list = []
    if 'train' in tags:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_train_list_filename)
    if 'val' in tags:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_val_list_filename)
    if 'test' in tags:
        all_video_label_list += utils.get_video_and_label_list(
            config.all_test_list_filename)
    print("TOTAL NUMBER OF DOCS %d " % len(all_video_label_list))
    root_path = os.path.join(config.dataset_root_path, 'pos_mfcc')
    vid_names = set([item[0] for item in all_video_label_list])
    all_files = os.listdir(root_path)
    x = np.zeros(shape=(len(all_video_label_list), 200))
    y = []
    cnt1 = 0
    cnt2 = 0
    cnt3 = 0
    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        label = 0
        if '1' in now_video_label[1]:
            label = 1
            cnt1 += 1
        if '2' in now_video_label[1]:
            label = 2
            cnt2 += 1
        if '3' in now_video_label[1]:
            label = 3
            cnt3 += 1
        fpath = os.path.join(root_path, vid_name + '.npy')
        if os.path.exists(fpath):
            subx = np.load(os.path.join(root_path, vid_name + '.npy'))
            x[len(y), :] = subx
        y.append(label)
    print(x.shape, len(y), cnt1, cnt2, cnt3)
    return x, y
예제 #23
0
def test(k):

    with open(os.path.join(config.cluster_classifiers, 'svm1_cnn.pkl'),
              'rb') as f1:
        svm_clf_1 = pickle.load(f1)

    with open(os.path.join(config.cluster_classifiers, 'svm2_cnn.pkl'),
              'rb') as f2:
        svm_clf_2 = pickle.load(f2)

    with open(os.path.join(config.cluster_classifiers, 'svm3_cnn.pkl'),
              'rb') as f3:
        svm_clf_3 = pickle.load(f3)

    X = np.zeros((1, k))

    Y_1 = []
    Y_2 = []
    Y_3 = []

    with open(os.path.join(config.score, 'gt_cnn.lst'), 'wb') as f:
        val_video_label_list = utils.get_video_and_label_list(
            config.all_test_list_filename)
        i = 0
        for now_video_label in val_video_label_list:
            vid_name = now_video_label[0]
            vid_label = now_video_label[1]

            cnn_feature_file = os.path.join(
                config.cnn_feat_path, vid_name + config.cnn_feat_file_format)
            if os.path.isfile(cnn_feature_file):
                #print(i,vid_name)
                cnn_feature = np.load(cnn_feature_file)
                X = np.vstack([X, cnn_feature])
                i = i + 1
                f.write(now_video_label[0] + " " + now_video_label[1])
                f.write("\n")
    f.close()
    X = X[:][1:]
    #X = X.astype(np.float)
    Y_1 = np.array(Y_1)
    Y_2 = np.array(Y_2)
    Y_3 = np.array(Y_3)

    svm_predicted_1 = svm_clf_1.decision_function(X)
    svm_predicted_2 = svm_clf_2.decision_function(X)
    svm_predicted_3 = svm_clf_3.decision_function(X)

    np.savetxt(os.path.join(config.score, "P001_cnn.lst"), svm_predicted_1)
    np.savetxt(os.path.join(config.score, "P002_cnn.lst"), svm_predicted_2)
    np.savetxt(os.path.join(config.score, "P003_cnn.lst"), svm_predicted_3)
예제 #24
0
def train_kmeans(config):
    '''
    Train K-means from the bag-of-words vectors.
    :param config: configurations
    :return:
    '''
    if not os.path.exists("models"):
        os.mkdir("models")

    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename)

    #   the number of bag-of-words centers
    list_of_data = []
    for now_video_label in all_video_label_list:
        vid_name = now_video_label[0]
        asr_bow_filename = os.path.join(config.asr_bow_root_path, vid_name + config.asr_bow_file_format)

        if os.path.exists(asr_bow_filename):
           data = np.load(asr_bow_filename)
           for i in range(data.shape[0]):
                list_of_data.append(data[i])
        else:
            print "File: " + asr_bow_filename + " does not exist"

    array_of_data = np.array(list_of_data)
    print array_of_data.shape
    print type(array_of_data)

    print "Going to do k-means"
    data_kmean = KMeans(n_clusters=200).fit(array_of_data)
    print "K-means"
    print type(data_kmean)

    centroids = data_kmean.labels_
    print centroids, type(centroids)
    joblib.dump(data_kmean, 'models/kmeans_asr.pkl')
def soundnet_bow(config, conv, size, k):

    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
             utils.get_video_and_label_list(config.all_val_list_filename)

    i = 1

    for now_video_label in all_video_label_list:

        vid_name = now_video_label[0]
        sn_filename = os.path.join(
            config.soundnet_root_path,
            vid_name + conv + config.soundnet_file_format)

        if os.path.isfile(sn_filename):

            sn = np.load(sn_filename)
            sn = sn["arr_0"]
            sn = sn.reshape(-1, sn.shape[-1])

            sn = sn.T
            #index = int(np.floor(0.20*sn.shape[1]))

            #start = 0
            #end = sn.shape[1]

            #cols = random.sample(range(start, end), index)
            #sn = sn[:,cols]

            if i == 1:
                sn_vec = sn.T
            else:
                sn_vec = np.concatenate((sn_vec, sn.T), axis=0)
            i = i + 1

    kmeans = KMeans(n_clusters=k, random_state=0).fit(sn_vec)
    k_means_clusters = kmeans.cluster_centers_
    k_means_path = os.path.join(
        config.cluster_classifiers,
        "kmeans_" + str(k) + "" + str(conv) + "_sn_clusters.npy")

    np.save(k_means_path, k_means_clusters)

    all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                              utils.get_video_and_label_list(config.all_val_list_filename) + \
                              utils.get_video_and_label_list(config.all_test_list_filename)

    k_means_path = os.path.join(
        config.cluster_classifiers,
        "kmeans_" + str(k) + "" + str(conv) + "_sn_clusters.npy")

    k_means_clusters = np.load(k_means_path)

    for now_video_label in all_video_label_list:

        vid_name = now_video_label[0]
        soundnet_filename = os.path.join(
            config.soundnet_root_path,
            vid_name + conv + config.soundnet_file_format)
        fea = os.path.join(config.soundnet_fea_bow,
                           vid_name + conv + config.soundnet_fea_file_format)
        if os.path.isfile(soundnet_filename):
            sn = np.load(soundnet_filename)
            sn = sn["arr_0"]
            sn = sn.reshape(-1, sn.shape[-1])
            k_dim = np.zeros((1, k))

            for j in range(sn.shape[0]):

                index = np.argmin(
                    np.linalg.norm(sn[j, :] - k_means_clusters, axis=1))

                k_dim[0][int(index)] = k_dim[0][int(index)] + 1

            np.save(fea, k_dim)

        else:
            np.save(fea, np.zeros((1, k)))
예제 #26
0
    truth_y = [
        3 if '3' in yy else 2 if '2' in yy else 1 if '1' in yy else 0
        for xx, yy in name_with_labels
    ]
    assert (resx.shape[0] == len(truth_y))
    return resx, truth_y, id2name, name2id


trainxx, trainyy, _, _ = wrap_svc_data(train_y, train_name_with_labels,
                                       train_name)
print(trainxx.shape, len(trainyy), trainyy[:20])
valxx, valyy, id2name, name2id = wrap_svc_data(val_y, val_name_with_labels,
                                               val_name)

val_all = utils.get_video_and_label_list(config.all_val_list_filename)
val_all = [x for x, y in val_all]
'''
trainxx = np.load('./trainxx.npy')
trainyy = np.load('./trainyy.npy')

valxx = np.load('./valxx.npy')
valyy = np.load('./valyy.npy')
'''
print("TRAINING SVM BEGINS")
clf, scl = svcclf.train(trainxx, trainyy, True)
print("TESTING SVM BEGINS")
y_pred, y_proba = svcclf.test(clf, valxx, scl)
full_y_pred = [0] * len(val_all)
print("TOTAL NUMBER OF VALID VALIDATION DATA %d / %d" %
      (len(name2id), len(val_all)))
예제 #27
0
def train(k):

    X = np.zeros((1, k))

    Y_1 = []
    Y_2 = []
    Y_3 = []

    train_video_label_list = utils.get_video_and_label_list(
        config.all_train_list_filename) + utils.get_video_and_label_list(
            config.all_val_list_filename)

    for now_video_label in train_video_label_list:

        vid_name = now_video_label[0]
        vid_label = now_video_label[1]
        cnn_feature_file = os.path.join(config.cnn_feat_path,
                                        vid_name + config.cnn_feat_file_format)

        if os.path.isfile(cnn_feature_file):

            cnn_feature = np.load(cnn_feature_file)
            X = np.vstack([X, cnn_feature])

            if vid_label == "P001":
                Y_1.extend([1])
            else:
                Y_1.extend([0])

            if vid_label == "P002":
                Y_2.extend([1])
            else:
                Y_2.extend([0])

            if vid_label == "P003":
                Y_3.extend([1])
            else:
                Y_3.extend([0])

    X = X[:][1:]

    tuned_parameters = {'C': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]}

    svm_clf_1 = LinearSVC()
    svm_clf_1.fit(X, Y_1)

    svm_clf_2 = LinearSVC()
    svm_clf_2.fit(X, Y_2)

    svm_clf_3 = LinearSVC(C=100)
    svm_clf_3.fit(X, Y_3)

    with open(os.path.join(config.cluster_classifiers, 'svm1_cnn.pkl'),
              'wb') as f1:
        pickle.dump(svm_clf_1, f1)

    with open(os.path.join(config.cluster_classifiers, 'svm2_cnn.pkl'),
              'wb') as f2:
        pickle.dump(svm_clf_2, f2)

    with open(os.path.join(config.cluster_classifiers, 'svm3_cnn.pkl'),
              'wb') as f3:
        pickle.dump(svm_clf_3, f3)
from keras.applications.imagenet_utils import preprocess_input
import os
import sys
sys.path.append("../")
import utils
import configs.hw2_config as config
import pdb
import scipy.misc

if __name__ == '__main__':

	shape = (224, 224)
	mod = VGG19(weights='imagenet')
	model = Model(inputs=mod.input,outputs=mod.layers[-1].output)

	all_video_label_list = utils.get_video_and_label_list(config.all_train_list_filename) + \
                           utils.get_video_and_label_list(config.all_test_list_filename) + \
                           utils.get_video_and_label_list(config.all_val_list_filename)

        for now_video_label in all_video_label_list:
		vid_name = now_video_label[0]
                ds_file=os.path.join(config.ds_video_root_path,vid_name+config.ds_video_file_format)
                cnn_file=os.path.join(config.cnn_feat_path,vid_name+config.cnn_feat_file_format)
		if(os.path.exists(cnn_file)):
			continue
		#print(vid_name)
		clip = VideoFileClip(ds_file)
		#frames = [idx for idx, x in enumerate(clip.iter_frames()) if idx % 5 == 0]
		#pdb.set_trace()
		frames = [scipy.misc.imresize(x, shape) for idx, x in enumerate(clip.iter_frames()) if idx % 50 == 0]
		#print(len(frames))