示例#1
0
def _802_uniform_sample_frames_for_i3d_test_video_level():
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotations/video_names_splits.pkl')
    frame_relative_pathes_dict_tr_path = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_dict_tr.pkl')
    frame_relative_pathes_dict_te_path = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_dict_te.pkl')
    sampled_frames_relative_pathes = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_uniform_sample.pkl')

    (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path)
    frame_relative_pathes_dict_tr = utils.pkl_load(
        frame_relative_pathes_dict_tr_path)
    frame_relative_pathes_dict_te = utils.pkl_load(
        frame_relative_pathes_dict_te_path)

    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment

    sampled_frames_tr = __uniform_sample_frames_per_video_for_i3d(
        video_names_tr, frame_relative_pathes_dict_tr, n_frames_per_segment,
        n_frames_per_video)
    sampled_frames_te = __uniform_sample_frames_per_video_for_i3d(
        video_names_te, frame_relative_pathes_dict_te, n_frames_per_segment,
        n_frames_per_video)

    data = (sampled_frames_tr, sampled_frames_te)
    utils.pkl_dump(data, sampled_frames_relative_pathes)
示例#2
0
def _08_prepare_annotation_frames_per_video_dict_multi_label():
    """
    Get list of frames from each video. With max 600 of each video and min 100 frames from each video.
    These frames will be used to extract features for each video.
    """

    min_frames_per_video = 100
    max_frames_per_video = 100

    root_path = c.data_root_path
    annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % (
        root_path)
    annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % (
        root_path)
    annotation_path = '%s/Charades/annotation/frames_dict_multi_label.pkl' % (
        root_path)

    video_frames_dict_tr = __get_frame_names_from_csv_file(
        annot_tr_text_path, min_frames_per_video, max_frames_per_video)
    video_frames_dict_te = __get_frame_names_from_csv_file(
        annot_te_text_path, min_frames_per_video, max_frames_per_video)

    utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te),
                   annotation_path,
                   is_highest=True)
示例#3
0
def _800_prepare_video_frames_path_dict():
    frame_relative_pathes_dict_path = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_dict.pkl')
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotations/video_names_splits.pkl')
    imgs_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train')

    (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path)
    video_names = np.hstack((video_names_tr, video_names_te))

    frame_relative_pathes_dict = {}
    n_videos = len(video_names)
    for idx, video_id in enumerate(video_names):
        utils.print_counter(idx, n_videos)

        person_id = video_id.split('_')[0]
        video_frames_root_path = '%s/%s/%s' % (imgs_root_path, person_id,
                                               video_id)
        video_frames_names = utils.file_names(video_frames_root_path,
                                              is_nat_sort=True)
        video_frames_names = np.array(video_frames_names)
        video_frames_relative_pathes = np.array([
            '%s/%s/%s' % (person_id, video_id, n) for n in video_frames_names
        ])
        frame_relative_pathes_dict[video_id] = video_frames_relative_pathes

    utils.pkl_dump(frame_relative_pathes_dict, frame_relative_pathes_dict_path)
示例#4
0
def _12_prepare_annotation_frames_per_video_dict_multi_label_all_frames():
    """
    Get list of frames from each video. All frames for each video.
    """

    n_frames_per_video = None
    root_path = c.data_root_path
    annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % (
        root_path)
    annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % (
        root_path)
    annotation_path = '%s/Charades/annotation/frames_dict_multi_label_all_frames.pkl' % (
        root_path)

    video_frames_dict_tr = __get_frame_names_from_csv_file(annot_tr_text_path,
                                                           n_frames_per_video,
                                                           n_frames_per_video,
                                                           sampling=False)
    video_frames_dict_te = __get_frame_names_from_csv_file(annot_te_text_path,
                                                           n_frames_per_video,
                                                           n_frames_per_video,
                                                           sampling=False)

    utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te),
                   annotation_path,
                   is_highest=True)
示例#5
0
def _13_prepare_annotation_frames_per_video_dict_untrimmed_multi_label_for_i3d(
        n_frames_per_video):
    """
    从视频帧当中进行帧采样
    Uniformly sample sequences of frames form each video. Each sequences consists of 8 successive frames.
    n_frames_per_video = 1024 || 512 || 256
    """
    # root_path = c.DATA_ROOT_PATH
    root_path = c.data_root_path
    annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % (
        root_path)  #'./data/Charades/annotation/Charades_v1_train.csv'
    annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % (
        root_path)
    annotation_path = '%s/Charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % (
        root_path, n_frames_per_video)

    #进行采样:每8个连续帧作为一个视频段
    video_frames_dict_tr = __get_frame_names_untrimmed_from_csv_file_for_i3d(
        annot_tr_text_path, n_frames_per_video)
    video_frames_dict_te = __get_frame_names_untrimmed_from_csv_file_for_i3d(
        annot_te_text_path, n_frames_per_video)

    utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te),
                   annotation_path,
                   is_highest=True)
示例#6
0
def _14_prepare_annotation_frames_per_video_dict_untrimmed_multi_label_for_resnet_ordered(
):
    """
    Get list of frames from each video. With max 600 of each video and min 96 frames from each video.
    These frames will be used to extract features for each video.
    """

    # if required frames per video are 128, there are 51/6 out of 7986/1864 videos in training/testing splits that don't satisfy this
    n_frames_per_video = 32
    root_path = c.data_root_path
    annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % (
        root_path)
    annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % (
        root_path)
    annotation_path = '%s/Charades/annotation/frames_dict_untrimmed_multi_label_resnet_ordered_%d_frames.pkl' % (
        root_path, n_frames_per_video)

    video_frames_dict_tr = __get_frame_names_untrimmed_from_csv_file_for_ordered(
        annot_tr_text_path, n_frames_per_video, is_resnet=True)
    video_frames_dict_te = __get_frame_names_untrimmed_from_csv_file_for_ordered(
        annot_te_text_path, n_frames_per_video, is_resnet=True)

    utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te),
                   annotation_path,
                   is_highest=True)
示例#7
0
def _02_prepare_annotation_frame_dict(is_training=True):
    root_path = c.data_root_path
    annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % (
        root_path)
    annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % (
        root_path)
    annotation_pkl_tr_path = '%s/Charades/annotation/frames_dict_tr.pkl' % (
        root_path)
    annotation_pkl_te_path = '%s/Charades/annotation/frames_dict_te.pkl' % (
        root_path)

    annot_text_path = annot_tr_text_path if is_training else annot_te_text_path
    annotation_pkl_path = annotation_pkl_tr_path if is_training else annotation_pkl_te_path
    annotation_dict = {}
    n_actions = N_CLASSES

    frames_per_instance = []

    # add empty list for each action in the annotation dictionary
    for idx_action in range(n_actions):
        action_num = idx_action + 1  # 保存的时候也是索引也是从1开始
        annotation_dict[action_num] = []  # 获取每一类动作对应的acion的路径

    with open(annot_text_path) as f:
        reader = csv.DictReader(f)  # 每一行以字典的形式读出
        for row in reader:
            # action_strings的格式如下:c092 11.90 21.20;c147 0.00 12.60
            action_strings = row['actions']  # 动作的持续时间
            action_strings_splits = action_strings.split(';')
            video_id = row['id']  # charades中的视频是以id存在的
            if len(action_strings) == 0:
                print('... no action for video %s' % (video_id))
                continue
            for action_st in action_strings_splits:
                action_splits = action_st.split(' ')
                action_idx = int(action_splits[0][1:])  # 第几个动作
                action_num = action_idx + 1
                action_start = action_splits[1]
                action_end = action_splits[2]

                # add frames
                # 1.获取指定动作区间的动作帧所在的路径
                frames_relative_path = __get_frames_relative_pathes_in_given_duration(
                    video_id, action_start, action_end)
                annotation_dict[action_num].append(frames_relative_path)

                # accumulate counter
                n_frames_per_instance = len(
                    frames_relative_path)  # 每一个动作实例所持续的时间(就是帧数)
                frames_per_instance.append(n_frames_per_instance)  # []

    # save annotation
    utils.pkl_dump(annotation_dict, annotation_pkl_path, is_highest=True)
    print(frames_per_instance)
    print(len(frames_per_instance))
    print(np.sum(frames_per_instance))
    print(np.average(frames_per_instance))
示例#8
0
def _703_prepare_data_splits():
    """
    Sample fram pathes for the i3d model.
    :return:
    """

    annot_dict_path = Pth(
        'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl')
    annot_idxes_many_shots_path = Pth(
        'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl')
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotations/video_names_splits.pkl')

    annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path)
    annot_dict = utils.pkl_load(annot_dict_path)

    # split_ratio
    split_ratio = 0.8
    person_videos_dict = {}

    # first loop to collect all unique video ids
    for annot_id in annot_idxes_many_shots:
        annot_line = annot_dict[annot_id]
        person_id = annot_line[0]
        video_id = annot_line[1]
        if person_id not in person_videos_dict:
            person_videos_dict[person_id] = []

        person_videos_dict[person_id].append(video_id)

    for person_id in person_videos_dict:
        video_names = natsort.natsorted(
            np.unique(person_videos_dict[person_id]))
        person_videos_dict[person_id] = video_names

    # now that we have collected the persons, and their videos, see how much videos if we split
    video_names_tr = []
    video_names_te = []

    for person_id in person_videos_dict:
        v_names = person_videos_dict[person_id]
        idx = int(len(v_names) * split_ratio)
        v_names_tr = v_names[:idx]
        v_names_te = v_names[idx:]
        video_names_tr += v_names_tr
        video_names_te += v_names_te

    video_names_tr = np.array(video_names_tr)
    video_names_te = np.array(video_names_te)

    print len(video_names_tr) + len(video_names_te)
    print len(video_names_tr)
    print len(video_names_te)

    # save video names
    utils.pkl_dump((video_names_tr, video_names_te), video_names_splits_path)
示例#9
0
def _02_prepare_annotation_frame_dict(is_training=True):
    root_path = c.data_root_path
    annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % (
        root_path)
    annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % (
        root_path)
    annotation_pkl_tr_path = '%s/Charades/annotation/frames_dict_tr.pkl' % (
        root_path)
    annotation_pkl_te_path = '%s/Charades/annotation/frames_dict_te.pkl' % (
        root_path)

    annot_text_path = annot_tr_text_path if is_training else annot_te_text_path
    annotation_pkl_path = annotation_pkl_tr_path if is_training else annotation_pkl_te_path
    annotation_dict = {}
    n_actions = N_CLASSES

    frames_per_instance = []

    # add empty list for each action in the annotation dictionary
    for idx_action in range(n_actions):
        action_num = idx_action + 1
        annotation_dict[action_num] = []

    with open(annot_text_path) as f:
        reader = csv.DictReader(f)
        for row in reader:
            action_strings = row['actions']
            action_strings_splits = action_strings.split(';')
            video_id = row['id']
            if len(action_strings) == 0:
                print('... no action for video %s' % (video_id))
                continue
            for action_st in action_strings_splits:
                action_splits = action_st.split(' ')
                action_idx = int(action_splits[0][1:])
                action_num = action_idx + 1
                action_start = action_splits[1]
                action_end = action_splits[2]

                # add frames
                frames_relative_path = __get_frames_relative_pathes_in_given_duration(
                    video_id, action_start, action_end)
                annotation_dict[action_num].append(frames_relative_path)

                # accumulate counter
                n_frames_per_instance = len(frames_relative_path)
                frames_per_instance.append(n_frames_per_instance)

    # save annotation
    utils.pkl_dump(annotation_dict, annotation_pkl_path, is_highest=True)
    print(frames_per_instance)
    print(len(frames_per_instance))
    print(np.sum(frames_per_instance))
    print(np.average(frames_per_instance))
示例#10
0
def _06_prepare_video_annotation_multi_label():
    root_path = '.'
    video_annotation_path = '%s/Charades/annotation/video_annotation.pkl' % (
        root_path)
    video_annotation_multi_label_path = '%s/Charades/annotation/video_annotation_multi_label.pkl' % (
        root_path)

    (video_id_tr, y_tr, video_id_te,
     y_te) = utils.pkl_load(video_annotation_path)

    video_ids_tr = np.unique(video_id_tr)
    video_ids_te = np.unique(video_id_te)

    n_tr = len(video_ids_tr)
    n_te = len(video_ids_te)
    n_classes = N_CLASSES

    video_gt_dict_tr = dict()
    video_gt_dict_te = dict()

    for id in video_ids_tr:
        video_gt_dict_tr[id] = []

    for id in video_ids_te:
        video_gt_dict_te[id] = []
    """
    zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。

    如果各个迭代器的元素个数不一致,则返回列表长度与最短的对象相同,利用 * 号操作符,可以将元组解压为列表。
    """
    for i, j in zip(video_id_tr, y_tr):
        video_gt_dict_tr[i].append(j)

    for i, j in zip(video_id_te, y_te):
        video_gt_dict_te[i].append(j)

    # binarize labels of videos
    y_multi_label_tr = np.zeros((n_tr, n_classes), dtype=np.int)
    y_multi_label_te = np.zeros((n_te, n_classes), dtype=np.int)

    for idx_video, video_name in enumerate(video_ids_tr):
        idx_class = np.add(video_gt_dict_tr[video_name], -1)
        y_multi_label_tr[idx_video][idx_class] = 1
        _ = 10

    for idx_video, video_name in enumerate(video_ids_te):
        idx_class = np.add(video_gt_dict_te[video_name], -1)
        y_multi_label_te[idx_video][idx_class] = 1
        _ = 10

    data = (video_ids_tr, y_multi_label_tr, video_ids_te, y_multi_label_te)
    utils.pkl_dump(data, video_annotation_multi_label_path)
示例#11
0
def __save_centroids(root_model, model_name, epoch_num):
    centroids_root_path = Pth('EPIC-Kitchens/node_features/%s', (model_name, ))
    centroids_path = '%s/%03d.pkl' % (centroids_root_path, epoch_num)

    if not os.path.exists(centroids_root_path):
        os.mkdir(centroids_root_path)

    session = K.get_session()
    t_centroids = root_model.get_layer(
        'node_embedding').output  # (1, 20, 1024)
    centroids_embedding = t_centroids.eval(session=session)  # (1, 20, 1024)
    centroids_embedding = np.squeeze(centroids_embedding, axis=0)

    utils.pkl_dump(centroids_embedding, centroids_path)
示例#12
0
def _103_prepare_video_info():
    video_info_path = Pth('Breakfast/annotation/video_info.pkl')
    annot_activities_path = Pth('Breakfast/annotation/annot_activities.pkl')
    (video_relative_pathes_tr, _, video_relative_pathes_te,
     _) = utils.pkl_load(annot_activities_path)

    video_relative_pathes = np.hstack(
        (video_relative_pathes_tr, video_relative_pathes_te))
    n_videos = len(video_relative_pathes)

    video_info = dict()
    fps, n_frames, duration = [], [], []

    # loop on the videos
    for idx_video, video_relative_path in enumerate(video_relative_pathes):

        utils.print_counter(idx_video, n_videos, 100)

        video_path = Pth('Breakfast/videos/%s', (video_relative_path, ))
        video_id = __video_relative_path_to_video_id(video_relative_path)

        try:
            v_fps, v_n_frames, v_duration = video_utils.get_video_info(
                video_path)
        except:
            print video_relative_path
            continue

        fps.append(v_fps)
        n_frames.append(v_n_frames)
        duration.append(v_duration)
        video_info[video_id] = {
            'duration': v_duration,
            'fps': v_fps,
            'n_frames': v_n_frames
        }

    print np.mean(fps), np.std(fps), np.min(fps), np.max(fps)
    print np.mean(duration), np.std(duration), np.min(duration), np.max(
        duration)
    print np.mean(n_frames), np.std(n_frames), np.min(n_frames), np.max(
        n_frames)

    # 15.0 0.0 15.0 15.0
    # 140.30865654205607 121.76493338896255 12.4 649.67
    # 2105.308995327103 1826.5189539717755 187 9746

    utils.pkl_dump(video_info, video_info_path)
示例#13
0
def _06_prepare_video_annotation_multi_label():
    root_path = '.'
    video_annotation_path = '%s/Charades/annotation/video_annotation.pkl' % (
        root_path)
    video_annotation_multi_label_path = '%s/Charades/annotation/video_annotation_multi_label.pkl' % (
        root_path)

    (video_id_tr, y_tr, video_id_te,
     y_te) = utils.pkl_load(video_annotation_path)

    video_ids_tr = np.unique(video_id_tr)
    video_ids_te = np.unique(video_id_te)

    n_tr = len(video_ids_tr)
    n_te = len(video_ids_te)
    n_classes = N_CLASSES

    video_gt_dict_tr = dict()
    video_gt_dict_te = dict()

    for id in video_ids_tr:
        video_gt_dict_tr[id] = []

    for id in video_ids_te:
        video_gt_dict_te[id] = []

    for i, j in zip(video_id_tr, y_tr):
        video_gt_dict_tr[i].append(j)

    for i, j in zip(video_id_te, y_te):
        video_gt_dict_te[i].append(j)

    # binarize labels of videos
    y_multi_label_tr = np.zeros((n_tr, n_classes), dtype=np.int)
    y_multi_label_te = np.zeros((n_te, n_classes), dtype=np.int)

    for idx_video, video_name in enumerate(video_ids_tr):
        idx_class = np.add(video_gt_dict_tr[video_name], -1)
        y_multi_label_tr[idx_video][idx_class] = 1
        _ = 10

    for idx_video, video_name in enumerate(video_ids_te):
        idx_class = np.add(video_gt_dict_te[video_name], -1)
        y_multi_label_te[idx_video][idx_class] = 1
        _ = 10

    data = (video_ids_tr, y_multi_label_tr, video_ids_te, y_multi_label_te)
    utils.pkl_dump(data, video_annotation_multi_label_path)
示例#14
0
def _13_prepare_annotation_frames_per_video_dict_untrimmed_multi_label_for_i3d():
    """
    Uniformly sample sequences of frames form each video. Each sequences consists of 8 successive frames.
    """
    n_frames_per_video = 1024
    n_frames_per_video = 128
    n_frames_per_video = 256
    root_path = c.data_root_path
    annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % (root_path)
    annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % (root_path)
    annotation_path = '%s/Charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % (root_path, n_frames_per_video)

    video_frames_dict_tr = __get_frame_names_untrimmed_from_csv_file_for_i3d(annot_tr_text_path, n_frames_per_video)
    video_frames_dict_te = __get_frame_names_untrimmed_from_csv_file_for_i3d(annot_te_text_path, n_frames_per_video)

    utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te), annotation_path, is_highest=True)
示例#15
0
def _01_prepare_annotation_class_names():
    root_path = c.data_root_path
    annot_text_path = '%s/Charades/annotation/Charades_v1_classes.txt' % (root_path)
    annot_pkl_path = '%s/Charades/annotation/class_names.pkl' % (root_path)

    class_names = utils.txt_load(annot_text_path)

    class_ids = [int(n[1:5]) for n in class_names]
    for i_1, i_2 in zip(class_ids, np.arange(N_CLASSES)):
        assert i_1 == i_2

    class_names = [n[5:] for n in class_names]
    class_names = np.array(class_names)

    utils.pkl_dump(class_names, annot_pkl_path, is_highest=True)
    _ = 10
示例#16
0
def _105_prepare_action_gt_timestamped():
    """
    Get ground truth of unit-actions with their timestamps.
    :return:
    """
    root_path = c.DATA_ROOT_PATH
    video_ids_path = Pth('Breakfast/annotation/video_ids_split.pkl')
    unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.pkl')
    gt_actions_path = Pth(
        'Breakfast/annotation/gt_unit_actions_timestamped.pkl')

    (video_ids_tr, video_ids_te) = utils.pkl_load(video_ids_path)
    unit_actions = utils.pkl_load(unit_actions_path)

    video_pathes_tr = [
        '%s/Breakfast/videos/%s' % (
            root_path,
            __video_video_id_to_video_relative_path(id, False),
        ) for id in video_ids_tr
    ]
    video_pathes_te = [
        '%s/Breakfast/videos/%s' % (
            root_path,
            __video_video_id_to_video_relative_path(id, False),
        ) for id in video_ids_te
    ]

    gt_actions_te = __get_gt_actions_timestamped(video_pathes_te, unit_actions)
    gt_actions_tr = __get_gt_actions_timestamped(video_pathes_tr, unit_actions)

    gt_actions_tr = np.array(gt_actions_tr)
    gt_actions_te = np.array(gt_actions_te)

    l_tr = [len(i) for i in gt_actions_tr]
    l_te = [len(i) for i in gt_actions_te]
    print('mean, std, min, max for number of nodes in each video [tr/te]')
    print np.mean(l_tr), np.std(l_tr), np.min(l_tr), np.max(l_tr)
    print np.mean(l_te), np.std(l_te), np.min(l_te), np.max(l_te)

    print gt_actions_tr.shape
    print gt_actions_te.shape

    utils.pkl_dump(
        ((video_ids_tr, gt_actions_tr), (video_ids_te, gt_actions_te)),
        gt_actions_path)
示例#17
0
def _501_generate_centroids(n_centroids, n_dims):
    c1_path = Pth(
        'Breakfast/features_centroids/features_random_%d_centroids.pkl',
        (n_centroids, ))
    c2_path = Pth(
        'Breakfast/features_centroids/features_sobol_%d_centroids.pkl',
        (n_centroids, ))

    # centroids as random vectors
    c1 = np.random.rand(n_centroids, n_dims)

    # centroids as sobol sequence
    c2 = sobol.sobol_generate(n_dims, n_centroids)
    c2 = np.array(c2)

    # save centroids
    utils.pkl_dump(c1, c1_path)
    utils.pkl_dump(c2, c2_path)
示例#18
0
def _602_generate_nodes(n_nodes, n_dims):
    pass

    n1_path = Pth('EPIC-Kitchens/features_centroid/features_random_%d.pkl', (n_nodes,))
    n2_path = Pth('EPIC-Kitchens/features_centroid/features_sobol_%d.pkl', (n_nodes,))

    # nodes as random vectors
    n1 = np.random.rand(n_nodes, n_dims)

    # nodes as sobol sequence
    n2 = sobol.sobol_generate(n_dims, n_nodes)
    n2 = np.array(n2)

    print n1.shape
    print n2.shape

    # save nodes
    utils.pkl_dump(n1, n1_path)
    utils.pkl_dump(n2, n2_path)
示例#19
0
def _202_spit_video_frames_relative_pathes():
    video_names_splits_path = Pth('EPIC-Kitchens/annotation/video_names_splits.pkl')
    frame_relative_pathes_dict_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict.pkl')
    frame_relative_pathes_dict_tr_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict_tr.pkl')
    frame_relative_pathes_dict_te_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict_te.pkl')

    (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path)
    frames_dict = utils.pkl_load(frame_relative_pathes_dict_path)

    dict_tr = dict()
    dict_te = dict()

    for v_name in video_names_tr:
        dict_tr[v_name] = frames_dict[v_name]

    for v_name in video_names_te:
        dict_te[v_name] = frames_dict[v_name]

    utils.pkl_dump(dict_tr, frame_relative_pathes_dict_tr_path)
    utils.pkl_dump(dict_te, frame_relative_pathes_dict_te_path)
示例#20
0
def _01_get_nodes_over_epochs():
    """
    Get centroids of the model.
    :return:
    """

    n_centroids = 128
    n_epochs = 100
    model_name = 'classifier_19.02.21-01:00:30'
    model_root_path = Pth('Breakfast/models/%s', (model_name,))
    centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,))
    nodes_root_path = Pth('Breakfast/qualitative_results/node_embedding_%s' % (model_name,))

    v_input_nodes = utils.pkl_load(centroids_path)

    model = None
    t_input_nodes = None
    t_node_embedding = None
    keras_session = K.get_session()

    for idx_epoch in range(n_epochs):

        utils.print_counter(idx_epoch, n_epochs)

        epoch_num = idx_epoch + 1
        weight_path = '%s/%03d.pkl' % (model_root_path, epoch_num)

        if epoch_num == 1:
            model = __load_model(model_name, epoch_num)
            t_input_nodes = model.get_layer('input_n').input
            t_node_embedding = model.get_layer('node_embedding').output
        else:
            model.load_weights(weight_path)

        v_node_embedding, = keras_session.run([t_node_embedding], {t_input_nodes: v_input_nodes})  # (1, 128, 1024)
        v_node_embedding = np.squeeze(v_node_embedding, axis=0)  # (1, 128, 1024)
        path = '%s/%02d.pkl' % (nodes_root_path, epoch_num)
        utils.pkl_dump(v_node_embedding, path)

    pass
示例#21
0
def _04_get_activation_values():
    # load data
    n_timesteps = 64
    n_centroids = 128

    model_name = 'classifier_19.02.21-01:00:30'
    features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,))
    centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,))
    attention_values_path = Pth('Breakfast/qualitative_results/node_attention_%s.pkl', (model_name,))

    v_input_n = utils.pkl_load(centroids_path)
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])

    epoch_num = 133
    model = __load_model(model_name, epoch_num)

    t_input_n = model.get_layer('input_n').input
    t_input_x = model.get_layer('input_x').input
    t_node_attention = model.get_layer('node_attention').output  # # (None, 7, 7, 64, 100)
    keras_session = K.get_session()

    batch_size = 40
    att_tr = __get_tensor_values(batch_size, keras_session, t_node_attention, t_input_n, t_input_x, v_input_n, x_tr)  # (None, 1, 1, 64, 128)
    att_te = __get_tensor_values(batch_size, keras_session, t_node_attention, t_input_n, t_input_x, v_input_n, x_te)  # (None, 1, 1, 64, 128)

    att_tr = np.squeeze(att_tr, axis=1)  # (None, 1, 64, 128)
    att_tr = np.squeeze(att_tr, axis=1)  # (None, 64, 128)
    att_te = np.squeeze(att_te, axis=1)  # (None, 1, 64, 128)
    att_te = np.squeeze(att_te, axis=1)  # (None, 64, 128)

    print ('finally')
    print x_tr.shape
    print x_te.shape

    print att_tr.shape
    print att_te.shape

    utils.pkl_dump((att_tr, att_te), attention_values_path)
示例#22
0
def _204_sample_frames_non_local():
    """
    Uniformly sample sequences of frames form each video. Each sequences consists of 8 successive frames.
    """

    n_frames_per_video = 512
    model_type = 'non_local'

    annot_activities_path = Pth('Breakfast/annotation/annot_activities.pkl')
    frames_annot_path = Pth(
        'Breakfast/annotation/annot_frames_non_local_%d.pkl',
        (n_frames_per_video, ))

    (video_relative_pathes_tr, _, video_relative_pathes_te,
     _) = utils.pkl_load(annot_activities_path)

    video_frames_dict_tr = __sample_frames(video_relative_pathes_tr,
                                           n_frames_per_video, model_type)
    video_frames_dict_te = __sample_frames(video_relative_pathes_te,
                                           n_frames_per_video, model_type)

    utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te),
                   frames_annot_path)
示例#23
0
def _106_prepare_action_graph_vector():
    """
    Each video is labled with a set of actions, we construct a graph using these actions.
    Links represent the relationship between two nodes. A node however represents one action.
    For a video, a link is only drawn between two nodes if these two nodes are neighbours.
    :return:
    """

    gt_actions_path = Pth(
        'Breakfast/annotation/gt_unit_actions_timestamped.pkl')
    action_graph_vectors_path = Pth(
        'Breakfast/annotation/action_graph_vectors.pkl')
    action_graph_matrices_path = Pth(
        'Breakfast/annotation/action_graph_matrices.pkl')
    (video_ids_tr,
     gt_actions_tr), (video_ids_te,
                      gt_actions_te) = utils.pkl_load(gt_actions_path)

    graph_matrices_tr = __get_action_graph_matrices(video_ids_tr,
                                                    gt_actions_tr)
    graph_matrices_te = __get_action_graph_matrices(video_ids_te,
                                                    gt_actions_te)

    graph_vectors_tr = __get_action_graph_vectors(video_ids_tr, gt_actions_tr)
    graph_vectors_te = __get_action_graph_vectors(video_ids_te, gt_actions_te)

    print graph_matrices_tr.shape
    print graph_matrices_te.shape
    print graph_vectors_tr.shape
    print graph_vectors_te.shape

    # save the graph data
    utils.pkl_dump((graph_matrices_tr, graph_matrices_te),
                   action_graph_matrices_path)
    utils.pkl_dump((graph_vectors_tr, graph_vectors_te),
                   action_graph_vectors_path)
示例#24
0
def _203_sample_frames_resnet():
    """
    Get list of frames from each video. With max 600 of each video and min 96 frames from each video.
    These frames will be used to extract features for each video.
    """

    # if required frames per video are 128, there are 51/6 out of 7986/1864 videos in training/testing splits that don't satisfy this
    n_frames_per_video = 64
    model_type = 'resnet'

    annot_activities_path = Pth('Breakfast/annotation/annot_activities.pkl')
    frames_annot_path = Pth('Breakfast/annotation/annot_frames_resnet_%d.pkl',
                            (n_frames_per_video, ))

    (video_relative_pathes_tr, _, video_relative_pathes_te,
     _) = utils.pkl_load(annot_activities_path)

    video_frames_dict_tr = __sample_frames(video_relative_pathes_tr,
                                           n_frames_per_video, model_type)
    video_frames_dict_te = __sample_frames(video_relative_pathes_te,
                                           n_frames_per_video, model_type)

    utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te),
                   frames_annot_path)
示例#25
0
def _704_prepare_many_shots_noun_verb_action_ids():
    """
    Prepeare two dicts of nouns and verbs to convert from id to many_shot id. All ids are zero-indexed.
    71 noun classes
    26 verb classes
    xx actions
    :return:
    """

    annot_dict_path = Pth(
        'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl')
    annot_idxes_many_shots_path = Pth(
        'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl')
    noun_ids_many_shots_dict_path = Pth(
        'EPIC-Kitchens/annotations/noun_ids_many_shots_dict.pkl')
    verb_ids_many_shots_dict_path = Pth(
        'EPIC-Kitchens/annotations/verb_ids_many_shots_dict.pkl')
    actn_ids_many_shots_dict_path = Pth(
        'EPIC-Kitchens/annotations/actn_ids_many_shots_dict.pkl')
    actn_ids_many_shots_list_path = Pth(
        'EPIC-Kitchens/annotations//EPIC_many_shot_actions.csv')

    annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path)
    annot_dict = utils.pkl_load(annot_dict_path)

    # get all verb_ids, noun_ids
    noun_ids = [
        annot_dict[annot_id][10] for annot_id in annot_idxes_many_shots
    ]
    verb_ids = [annot_dict[annot_id][8] for annot_id in annot_idxes_many_shots]
    actn_ids = __get_action_ids_from_annotation(actn_ids_many_shots_list_path)

    noun_ids = np.sort(np.unique(noun_ids))
    verb_ids = np.sort(np.unique(verb_ids))

    n_nouns = len(noun_ids)
    n_verbs = len(verb_ids)
    n_actns = len(actn_ids)

    # these dictionaries get the id of many_shot (noun or verb) given the original (noun or verb)
    many_shot_noun_ids_dict = dict(zip(noun_ids, np.arange(n_nouns)))
    many_shot_verb_ids_dict = dict(zip(verb_ids, np.arange(n_verbs)))
    many_shot_actn_ids_dict = dict(zip(actn_ids, np.arange(n_actns)))

    utils.pkl_dump(many_shot_noun_ids_dict, noun_ids_many_shots_dict_path)
    utils.pkl_dump(many_shot_verb_ids_dict, verb_ids_many_shots_dict_path)
    utils.pkl_dump(many_shot_actn_ids_dict, actn_ids_many_shots_dict_path)
示例#26
0
def extract_features_i3d_charades(n_frames_in, n_frames_out):
    """
    Extract features from i3d-model
    n_frames_in = 8 * n_frames_out
    n_frames_in = 1024,512,256
    n_frames_out = 128,64,32
    """

    # n_frames_in = 1024
    # n_frames_out = 128
    n_splits_per_video = 2

    root_path = '../data'
    root_Charades_path = '/home/r/renpengzhen/Datasets/Charades'
    frames_annot_path = '%s/Charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % (
        root_path, n_frames_in)  #采样过之后的帧路径
    # model_path = '/home/r/renpengzhen/PyTorch/timeception-master/model/i3d_kinetics_model_rgb.pth' #模型存放的位置
    model_path = '%s/Charades/baseline_models/i3d/rgb_charades.pt' % (
        root_path)  # 模型存放的位置
    frames_root_path = '%s/Charades_v1_rgb' % (root_Charades_path)  #所有视频帧存放的位置
    # features_root_path = '%s/Charades/features_i3d_charades_rgb_mixed_5c_untrimmed_%d_frames' % (root_path,n_frames_out) #用来存放使用i3d进行特征提取的路径
    features_root_path = '%s/Charades/features_i3d_pytorch_charades_rgb_mixed_5c_%df' % (
        root_path, n_frames_out)  #用来存放使用i3d进行特征提取的路径

    (video_frames_dict_tr, video_frames_dict_te) = utils.pkl_load(
        frames_annot_path
    )  #导入采样帧词典:包含了训练集和测试集的视频名:帧名列表,('AXIW1', array(['AXIW1-000001.jpg', 'AXIW1-000002.jpg', 'AXIW1-000003.jpg', ..., 'AXIW1-000768.jpg', 'AXIW1-000769.jpg', 'AXIW1-000770.jpg'], dtype='<U16'))
    video_frames_dict = dict()  #构建视频帧空词典
    video_frames_dict.update(video_frames_dict_tr)
    video_frames_dict.update(video_frames_dict_te)
    video_names = list(video_frames_dict.keys())  #视频的名字
    n_videos = len(video_names)  #总视频的个数
    del video_frames_dict_tr
    del video_frames_dict_te

    n_threads = 8  #线程数
    n_frames_per_segment = 8  #每个视频段的帧数,这8帧是连续的,在采样的时候就是连续的
    assert n_frames_per_segment * n_frames_out == n_frames_in

    if not os.path.exists(features_root_path):
        os.makedirs(features_root_path)

    t1 = time.time()
    print('extracting training features')
    print('start time: %s' % utils.timestamp())

    # reader for getting video frames 用于获取视频帧的阅读器
    video_reader_tr = image_utils.AsyncVideoReaderCharadesForI3DTorchModel(
        n_threads=n_threads)

    # aync reader, and get load images for the first video, we will read the first group of videos
    video_group_frames = __get_video_frame_pathes(
        video_names[0], frames_root_path,
        video_frames_dict)  #存储第一个视频帧的所有地址,是一个np数组类型
    video_reader_tr.load_video_frames_in_batch(video_group_frames)

    # load the model
    model = i3d_torch_charades_utils.load_model_i3d_charades_rgb_for_testing(
        model_path)

    #进行一次forward,打印模型的具体输入输出细节
    print('input_size=(3, 8, 224, 224)')
    print(torchsummary.summary(model, input_size=(3, 8, 224, 224)))

    # loop on list of videos,对整个视频数据集进行操作
    for idx_video in range(n_videos):
        video_num = idx_video + 1
        video_name = video_names[idx_video]
        begin_num = 0
        end_num = n_videos

        if begin_num is not None and end_num is not None:
            if video_num <= begin_num or video_num > end_num:
                continue

        # wait until the image_batch is loaded
        t1 = time.time()
        while video_reader_tr.is_busy():
            time.sleep(0.1)
        t2 = time.time()
        duration_waited = t2 - t1

        print('... video %04d, %04d, waited: %.02f' %
              (video_num, n_videos, duration_waited))

        # get the frames
        frames = video_reader_tr.get_images(
        )  # (G*T*N, 224, 224, 3),这个我觉得是第一个视频里面裁剪过之后的帧图片

        # pre-load for the next video group, notice that we take into account the number of instances
        if video_num < n_videos:
            next_video_frames = __get_video_frame_pathes(
                video_names[idx_video + 1], frames_root_path,
                video_frames_dict)
            video_reader_tr.load_video_frames_in_batch(next_video_frames)

        if len(frames) != n_frames_in:
            raise ('... ... wrong n frames: %s' % (video_name))

        # reshape to make one dimension carries the frames / segment, while the other dimesion represents the batch size
        frames = np.reshape(frames,
                            (n_frames_out, n_frames_per_segment, 224, 224,
                             3))  # (T, 8, 224, 224, 3),T实际上就是视频段,即超级帧的个数

        # transpose to have the channel_first (G*T, 8, 224, 224, 3) => (T, 3, 8, 224, 224)
        frames = np.transpose(frames, (0, 4, 1, 2, 3))

        # prepare input variable
        with torch.no_grad():
            # extract features
            input_var = torch.from_numpy(
                frames).cuda()  #(T, 3, 8, 224, 224),T=128,64,32
            output_var = model(
                input_var)  #提取特征 torch.Size([128, 1024, 1, 7, 7])
            output_var = output_var.cpu()
            features = output_var.data.numpy()  # (T, 1024, 1, 7, 7)
            # don't forget to clean up variables
            del input_var
            del output_var

        # transpose to have the channel_last
        features = np.transpose(features,
                                (0, 2, 3, 4, 1))  # (T, 1, 7, 7, 1024)

        # reshape to have the features for each video in a separate dimension
        features = np.squeeze(features, axis=1)  # (T, 7, 7, 1024),T=128,64,32

        # path to save the features,保存特征
        video_features_path = '%s/%s.pkl' % (features_root_path, video_name
                                             )  #即将保存特征的路径
        if os.path.exists(video_features_path):
            print('... features for video already exist: %s.pkl' %
                  (video_name))
            continue

        # save features
        utils.pkl_dump(features, video_features_path, is_highest=True)

    t2 = time.time()
    print('... finish extracting features in %d seconds' % (t2 - t1))
示例#27
0
def _03_prepare_annotation_frame_list():
    """
    Convert the annotation dict to list. Also, create list for ground truth.
    """

    n_frames_per_sample = 20
    n_classes = N_CLASSES

    root_path = c.data_root_path
    annotation_dict_tr_path = '%s/Charades/annotation/frames_dict_tr.pkl' % (
        root_path)
    annotation_dict_te_path = '%s/Charades/annotation/frames_dict_te.pkl' % (
        root_path)
    annotation_list_path = '%s/Charades/annotation/frames_list_%d_frames.pkl' % (
        root_path, n_frames_per_sample)

    annotation_dict_tr = utils.pkl_load(annotation_dict_tr_path)
    annotation_dict_te = utils.pkl_load(annotation_dict_te_path)

    x_tr = []
    x_te = []
    y_tr = []
    y_te = []

    class_nums = range(1, n_classes + 1)
    for class_num in class_nums:
        print('... %d/%d' % (class_num, n_classes))
        class_annot_tr = annotation_dict_tr[class_num]
        class_annot_te = annotation_dict_te[class_num]

        for sample_tr in class_annot_tr:
            n_f = len(sample_tr)
            if n_f == 0:
                print('zero frames in tr sample')
                continue
            if n_f < n_frames_per_sample:
                idx = np.random.randint(low=0,
                                        high=n_f,
                                        size=(n_frames_per_sample, ))
            else:
                idx = np.random.choice(n_f, n_frames_per_sample)
            sample_frame_pathes = np.array(sample_tr)[idx]
            x_tr.append(sample_frame_pathes)
            y_tr.append(class_num)

        for sample_te in class_annot_te:
            n_f = len(sample_te)
            if n_f == 0:
                print('zero frames in te sample')
                continue
            if n_f < n_frames_per_sample:
                idx = np.random.randint(low=0,
                                        high=n_f,
                                        size=(n_frames_per_sample, ))
            else:
                idx = np.random.choice(n_f, n_frames_per_sample)
            sample_frame_pathes = np.array(sample_te)[idx]
            x_te.append(sample_frame_pathes)
            y_te.append(class_num)

    x_tr = np.array(x_tr)
    x_te = np.array(x_te)
    y_tr = np.array(y_tr)
    y_te = np.array(y_te)

    print(x_tr.shape)
    print(y_tr.shape)
    print(x_te.shape)
    print(y_te.shape)
    data = (x_tr, y_tr, x_te, y_te)

    utils.pkl_dump(data, annotation_list_path, is_highest=True)
示例#28
0
def extract_features_i3d_charades():
    """
    Extract features from i3d-model
    """

    n_frames_in = 1024
    n_frames_out = 128
    n_splits_per_video = 2

    root_path = '/content/'
    frames_annot_path = '%s/charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % (root_path, n_frames_in)
    model_path = '%s/charades/baseline_models/i3d/rgb_charades.pt' % (root_path)
    frames_root_path = '%s/charades/frames/Charades_v1_rgb' % (root_path)
    features_root_path = '/local-ssd/nhussein/Charades/features_i3d_charades_rgb_mixed_5c_untrimmed_%d_frames' % (n_frames_out)

    (video_frames_dict_tr, video_frames_dict_te) = utils.pkl_load(frames_annot_path)
    video_frames_dict = dict()
    video_frames_dict.update(video_frames_dict_tr)
    video_frames_dict.update(video_frames_dict_te)
    video_names = video_frames_dict.keys()
    n_videos = len(video_names)
    del video_frames_dict_tr
    del video_frames_dict_te

    n_threads = 8
    n_frames_per_segment = 8
    assert n_frames_per_segment * n_frames_out == n_frames_in

    if not is_local_machine and not os.path.exists(features_root_path):
        print('Sorry, path does not exist: %s' % (features_root_path))
        return

    t1 = time.time()
    print('extracting training features')
    print('start time: %s' % utils.timestamp())

    # reader for getting video frames
    video_reader_tr = image_utils.AsyncVideoReaderCharadesForI3DTorchModel(n_threads=n_threads)

    # aync reader, and get load images for the first video, we will read the first group of videos
    video_group_frames = __get_video_frame_pathes(video_names[0], frames_root_path, video_frames_dict)
    video_reader_tr.load_video_frames_in_batch(video_group_frames)

    # load the model
    model = i3d_factory.load_model_i3d_charades_rgb_for_testing(model_path)
    print(torchsummary.summary(model, input_size=(3, 8, 224, 224)))

    # import torchsummary
    # print torchsummary.summary(model, (8, 3, 224, 224))
    return

    # loop on list of videos
    for idx_video in range(n_videos):

        video_num = idx_video + 1
        video_name = video_names[idx_video]

        if begin_num is not None and end_num is not None:
            if video_num <= begin_num or video_num > end_num:
                continue

        # wait until the image_batch is loaded
        t1 = time.time()
        while video_reader_tr.is_busy():
            threading._sleep(0.1)
        t2 = time.time()
        duration_waited = t2 - t1

        print('... video %04d, %04d, waited: %.02f' % (video_num, n_videos, duration_waited))

        # get the frames
        frames = video_reader_tr.get_images()  # (G*T*N, 224, 224, 3)

        # pre-load for the next video group, notice that we take into account the number of instances
        if video_num < n_videos:
            next_video_frames = __get_video_frame_pathes(video_names[idx_video + 1], frames_root_path, video_frames_dict)
            video_reader_tr.load_video_frames_in_batch(next_video_frames)

        if len(frames) != n_frames_in:
            raise ('... ... wrong n frames: %s' % (video_name))

        # reshape to make one dimension carries the frames / segment, while the other dimesion represents the batch size
        frames = np.reshape(frames, (n_frames_out, n_frames_per_segment, 224, 224, 3))  # (T, 8, 224, 224, 3)

        # transpose to have the channel_first (G*T, 8, 224, 224, 3) => (T, 3, 8, 224, 224)
        frames = np.transpose(frames, (0, 4, 1, 2, 3))

        # prepare input variable
        with torch.no_grad():
            # extract features
            input_var = torch.from_numpy(frames).cuda()
            output_var = model(input_var)
            output_var = output_var.cpu()
            features = output_var.data.numpy()  # (T, 1024, 1, 7, 7)
            # don't forget to clean up variables
            del input_var
            del output_var

        # transpose to have the channel_last
        features = np.transpose(features, (0, 2, 3, 4, 1))  # (T, 1, 7, 7, 1024)

        # reshape to have the features for each video in a separate dimension
        features = np.squeeze(features, axis=1)  # (T, 7, 7, 1024)

        # path to save the features
        video_features_path = '%s/%s.pkl' % (features_root_path, video_name)
        # if os.path.exists(video_features_path):
        #     print ('... features for video already exist: %s.pkl' % (video_name))
        #     continue

        # save features
        utils.pkl_dump(features, video_features_path, is_highest=True)

    t2 = time.time()
    print('... finish extracting features in %d seconds' % (t2 - t1))
示例#29
0
def __extract_features_rgb(begin_num=None, end_num=None):
    root_path = c.DATA_ROOT_PATH  # './data'

    # 这个文件是通过charades.py文件生成的
    annotation_path = '%s/Charades/annotation/frames_dict_trimmed_multi_label_i3d_160_frames.pkl' % (
        root_path)  # charades标注路径
    features_root_path = '%s/Charades/features_i3d_charades_rgb_mixed_5c_trimmed_20_frames' % (
        root_path)  # 特征保存路径
    video_frames_root_path = '%s/Charades/frames/Charades_v1_rgb' % (
        root_path)  # 视频帧的路径
    model_path = '%s/Charades/baseline_models/i3d/rgb_charades.pt' % (
        root_path)  # 预训练模型路径
    feature_name = 'Mixed_5c'  # 保存第几层的特征

    # 1.获取视频标注信息
    (video_frames_dict_tr,
     video_frames_dict_te) = utils.pkl_load(annotation_path)
    video_frames_dict = dict()
    video_frames_dict.update(video_frames_dict_tr)
    video_frames_dict.update(video_frames_dict_te)
    video_names = video_frames_dict.keys()

    n_videos = len(video_names)
    frame_count = 0

    if not os.path.exists(features_root_path):
        print('Sorry, path does not exist: %s' % (features_root_path))
        return

    t1 = time.time()
    print('extracting training features')
    print('start time: %s' % utils.timestamp())

    # aync reader, and get load images for the first video
    #========================================下面这个加载器没有写=================================#
    img_reader = image_utils.AsyncImageReaderCharadesForI3DTorchModel(
        n_threads=20)  # 加载图片
    img_reader.load_imgs_in_batch(
        __get_video_frame_pathes(video_names[0], video_frames_root_path,
                                 video_frames_dict))

    # load the model
    model = __load_i3d_model_rgb(model_path)
    torchsummary.summary(model, input_size=(3, 160, 224, 224))

    # loop on list of videos
    for idx_video in range(n_videos):
        video_num = idx_video + 1

        if begin_num is not None and end_num is not None:
            if video_num <= begin_num or video_num > end_num:
                continue

        video_name = video_names[idx_video]

        # wait untill the image_batch is loaded
        t1 = time.time()
        while img_reader.is_busy(
        ):  # 如果上面的img_reader.load_imgs_in_batch中的is_busy为True,则表明图片还没加载完
            threading._sleep(0.1)
        t2 = time.time()
        duration_waited = t2 - t1
        print('...... video %d/%d: %s, waited: %d' %
              (video_num, n_videos, video_name, duration_waited))

        # get the video frames
        video_frames = img_reader.get_images()

        # pre-load for the next video
        if video_num < n_videos:
            next_video_name = video_names[idx_video + 1]
            img_reader.load_imgs_in_batch(
                __get_video_frame_pathes(next_video_name,
                                         video_frames_root_path,
                                         video_frames_dict))

        video_features_path = '%s/%s.pkl' % (features_root_path, video_name)
        # if os.path.exists(video_features_path):
        #     print ('... features for video already exist: %s.pkl' % (video_name))
        #     continue

        # chrades的视频帧数是固定的160帧
        if len(video_frames) != 160:
            print('... wrong n frames: %d' % (video_num))
            continue

        # transpose to have the channel_first (160, 224, 224, 3) => (3, 160, 224, 224)
        video_frames = np.transpose(video_frames, (3, 0, 1, 2))

        # add one dimension to represent the batch size
        video_frames = np.expand_dims(video_frames, axis=0)  # (N,C,L,H,W)

        # prepare input variable
        with torch.no_grad():
            # extract features
            input_var = torch.from_numpy(video_frames).cuda()  # 将视频转为gpu
            output_var = model(input_var)
            output_var = output_var.cpu()
            features = output_var.data.numpy()  # (1, 1024, 20, 7, 7)

            # don't forget to clean up variables
            # 每一个视频的特征抽取完后就必须清空这两个变量,否则会报错
            del input_var
            del output_var

        # squeeze to remove the dimension of the batch_size
        features = features[0]  # (1024, 20, 7, 7)

        # transpose to have the channel_last
        features = np.transpose(
            features,
            (1, 2, 3,
             0))  # (20, 7, 7, 1024)=====(T,H,W,C),如果后面用的还是Pytorch,那么就不需要这一步

        # path to save the features
        utils.pkl_dump(features, video_features_path, is_highest=True)  # 保存特征

        # increment counts
        frame_count += len(video_frames)

    t2 = time.time()
    print('finish extracting %d features in %d seconds' %
          (frame_count, t2 - t1))
    print('end time: %s' % utils.timestamp())
示例#30
0
def _06_get_graph_edges():
    # load data
    n_timesteps = 64
    n_centroids = 128
    is_max_layer = True

    model_name = 'classifier_19.02.21-01:00:30'
    features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,))
    centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,))

    if is_max_layer:
        edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_%s.h5', (model_name,))
        edge_pooled_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_reduced_%s.pkl', (model_name,))
        layer_name = 'pool_t_1'
        n_timesteps = 21
        n_nodes = 10
    else:
        edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_%s.h5', (model_name,))
        edge_pooled_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_reduced_%s.pkl', (model_name,))
        layer_name = 'leaky_re_lu_3'
        n_timesteps = 64
        n_nodes = 32

    v_input_n = utils.pkl_load(centroids_path)
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])

    epoch_num = 133
    batch_size = 40
    model = __load_model(model_name, epoch_num)

    t_input_n = model.get_layer('input_n').input
    t_input_x = model.get_layer('input_x').input
    t_activations = model.get_layer(layer_name).output  # (None * 64, 32, 1, 1, 1024)
    keras_session = K.get_session()

    # 1357 train, 335 test
    vals_tr = __get_tensor_values(batch_size, keras_session, t_activations, t_input_n, t_input_x, v_input_n, x_tr)  # (None*64, 32, 1, 1, 1024)
    vals_te = __get_tensor_values(batch_size, keras_session, t_activations, t_input_n, t_input_x, v_input_n, x_te)  # (None*64, 32, 1, 1, 1024)

    vals_tr = np.squeeze(vals_tr, axis=2)
    vals_tr = np.squeeze(vals_tr, axis=2)

    vals_te = np.squeeze(vals_te, axis=2)
    vals_te = np.squeeze(vals_te, axis=2)

    n_tr = 1357
    n_te = 355
    if is_max_layer:
        vals_tr = np.reshape(vals_tr, (n_tr, n_nodes, n_timesteps, 1024))  # (None, timesteps, nodes, feat_size), (1357, 10, 21, 1024)
        vals_te = np.reshape(vals_te, (n_te, n_nodes, n_timesteps, 1024))  # (None, timesteps, nodes, feat_size), (355, 10, 21, 1024)
    else:
        vals_tr = np.reshape(vals_tr, (n_tr, n_timesteps, n_nodes, 1024))  # (None, timesteps, nodes, feat_size), (1357, 64, 32, 1024)
        vals_te = np.reshape(vals_te, (n_te, n_timesteps, n_nodes, 1024))  # (None, timesteps, nodes, feat_size), (355, 64, 32, 1024)

    print ('finally')
    print x_tr.shape
    print x_te.shape

    print vals_tr.shape
    print vals_te.shape

    utils.h5_dump_multi((vals_tr, vals_te), ['x_tr', 'x_te'], edge_values_path)

    vals_tr = np.mean(vals_tr, axis=3)
    vals_te = np.mean(vals_te, axis=3)
    utils.pkl_dump((vals_tr, vals_te), edge_pooled_values_path)