示例#1
0
def controleren(b, e):
    def get_frames_mp4(bn):
        for j, v in enumerate(all_videos):
            if bn in v:
                return int(skvideo.io.ffprobe(v)['video']['@nb_frames']), v

    def get_frames_h5(p):
        with h5py.File(p, 'r') as mf:
            frs = len(mf.keys()) - 1
        return frs

    still_todo = []

    all_videos = DU.get_all_videos('train') + DU.get_all_videos('test') + DU.get_all_videos('val')
    all_h5 = os.listdir(P.CHALEARN30_ALL_DATA)

    for i, h5 in enumerate(all_h5[b:e]):
        print(i)
        base_name = h5.split('.h5')[0]
        h5_path = os.path.join(P.CHALEARN30_ALL_DATA, h5)

        h5_frames = get_frames_h5(h5_path)
        mp4_frames, v_path = get_frames_mp4(base_name)

        if h5_frames != mp4_frames:
            still_todo.append(v_path)
            print('ohno, %s' % (v_path))

    print('still todo', len(still_todo))
    return still_todo
示例#2
0
def get_left_off_index():
    p_main = '/scratch/users/gabras/data/chalearn30/all_data'  # probably all complete
    p_cluster = '/scratch/users/gabras/data/chalearn30/chalearn30/all_data'
    main_list = os.listdir(p_main)
    cluster_list = os.listdir(p_cluster)
    diff = set(cluster_list) - set(main_list)

    # not_complete = set(cluster_list) - set(diff)

    all_train = DU.get_all_videos('train')

    indices_1000_2000 = []
    indices_2000_3000 = []
    indices_3000_4000 = []
    indices_4000_5000 = []
    indices_5000_6000 = []

    def do_thing(t):
        for i, v in enumerate(all_train):
            if t in v:
                # print(i)
                if i < 2000:
                    indices_1000_2000.append(i)
                elif i < 3000:
                    indices_2000_3000.append(i)
                elif i < 4000:
                    indices_3000_4000.append(i)
                elif i < 5000:
                    indices_4000_5000.append(i)
                elif i < 6000:
                    indices_5000_6000.append(i)
                break

    for p in diff:
        p_base = p.split('.h5')[0]
        do_thing(p_base)

    indices_1000_2000.sort()
    indices_2000_3000.sort()
    indices_3000_4000.sort()
    indices_4000_5000.sort()
    indices_5000_6000.sort()

    # print(indices_1000_2000[0],indices_1000_2000[-1], '\n',
    #       indices_2000_3000[0],indices_2000_3000[-1], '\n',
    #       indices_3000_4000[0],indices_3000_4000[-1], '\n',
    #       indices_4000_5000[0],indices_4000_5000[-1], '\n',
    #       indices_5000_6000[0],indices_5000_6000[-1])

    all_train = np.array(all_train)
    all_train_1000_2000 = list(all_train[indices_1000_2000])
    all_train_2000_3000 = list(all_train[indices_2000_3000])
    all_train_3000_4000 = list(all_train[indices_3000_4000])
    all_train_4000_5000 = list(all_train[indices_4000_5000])
    all_train_5000_6000 = list(all_train[indices_5000_6000])

    # return ones that have already been converted
    return all_train_1000_2000, all_train_2000_3000, all_train_3000_4000, all_train_4000_5000, all_train_5000_6000
示例#3
0
def move_cluster_completeness():
    p_main = '/scratch/users/gabras/data/chalearn30/all_data'  # probably all complete
    p_cluster = '/scratch/users/gabras/data/chalearn30/chalearn30/all_data'
    main_list = os.listdir(p_main)
    cluster_list = os.listdir(p_cluster)
    diff = set(cluster_list) - set(main_list)
    # print(len(diff))

    cnt = 0
    completed_which_moved = 0

    # get all the mp4s
    all_train = DU.get_all_videos('train')

    for n in diff:
        cnt += 1
        print(cnt)
        h5 = os.path.join(p_cluster, n)
        try:
            mf = h5py.File(h5, 'r')
            # with h5py.File(h5, 'r') as mf:
            frames_h5 = len(mf.keys()) - 1
            mf.close()
            print('opened and closed')

            n_base = n.split('.h5')[0]
            frames_mp4 = 0

            # find matching mp4
            for v in all_train:
                if n_base in v:
                    frames_mp4 = DU.mp4_to_arr(v).shape[0]
                    break

            if frames_mp4 == 0:
                print('ohboi')
                return

            if frames_h5 == frames_mp4:
                src = os.path.join(p_cluster, n)
                dst = os.path.join(p_main, n)
                shutil.move(src=src, dst=dst)
                completed_which_moved += 1
                print(completed_which_moved, cnt)
            else:
                print('frames should be %d, but are %d' % (frames_mp4, frames_h5))
        except (OSError, RuntimeError) as e:
            print(h5, 'failed', e)
示例#4
0
def parallel_convert_mod(which, b, e, func, number_processes=20):
    all_videos = DU.get_all_videos(which)
    a12, a23, a34, a45, a56 = get_left_off_index()

    pool = Pool(processes=number_processes)
    all_videos = all_videos[b:e]
    if b == 1000:
        all_videos = list(set(all_videos) - set(a12))
    elif b == 2000:
        all_videos = list(set(all_videos) - set(a23))
    elif b == 3000:
        all_videos = list(set(all_videos) - set(a34))
    elif b == 4000:
        all_videos = list(set(all_videos) - set(a45))
    elif b == 5000:
        all_videos = list(set(all_videos) - set(a56))

    pool.apply_async(func)
    pool.map(func, all_videos)
示例#5
0
def get_missing():
    p_main = '/scratch/users/gabras/data/chalearn30/all_data'  # probably all complete
    p_cluster = '/scratch/users/gabras/data/chalearn30/chalearn30/all_data'
    main_list = os.listdir(p_main)
    cluster_list = os.listdir(p_cluster)
    diff = set(cluster_list) - set(main_list)

    all_train = DU.get_all_videos('train')
    missing = []

    def do_thing(t):
        for i, v in enumerate(all_train):
            if t in v:
                missing.append(v)

    for p in diff:
        p_base = p.split('.h5')[0]
        do_thing(p_base)

    return missing
示例#6
0
def normal_convert(which, b, e):
    all_videos = DU.get_all_videos(which)
    all_videos = all_videos[b:e]
    for video_path in all_videos:
        convert(video_path)
示例#7
0
def parallel_convert(which, b, e, func, number_processes=20):
    all_videos = DU.get_all_videos(which)
    pool = Pool(processes=number_processes)
    all_videos = all_videos[b:e]
    pool.apply_async(func)
    pool.map(func, all_videos)