Exemplo n.º 1
0
    def __init__(self, n_timesteps, n_timesteps_total, featurenet_type, x_heavy_path, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False):
        """
        :param n_timesteps:  How many timesteps per video.
        :param is_random_tr: Sample random or uniform frames.
        :param is_random_te: Sample random or uniform frames.
        :param is_shuffle_tr: To shuffle data or not.
        :param is_shuffle_te: To shuffle data or not.
        """

        self.__is_random_tr = is_random_tr
        self.__is_random_te = is_random_te
        self.__is_shuffle_tr = is_shuffle_tr
        self.__is_shuffle_te = is_shuffle_te
        self.__n_timesteps = n_timesteps
        self.__n_timesteps_total = n_timesteps_total

        n_frames_per_segment = utils.get_model_n_frames_per_segment(featurenet_type)
        n_frames = n_timesteps_total * n_frames_per_segment

        gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
        frames_annot_path = Pth('Breakfast/annotation/annot_frames_%s_%d.pkl', (featurenet_type, n_frames,))

        (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path)

        (x_heavy_tr, x_heavy_te) = utils.h5_load_multi(x_heavy_path, ['x_tr', 'x_te'])  # (B, C, T, H, W)
        self.__x_heavy_tr = x_heavy_tr
        self.__x_heavy_te = x_heavy_te

        # select middle frame from each snippet
        (frames_dict_tr, frames_dict_te) = utils.pkl_load(frames_annot_path)
        frames_dict_tr = self.__select_middle_frame(frames_dict_tr, n_frames_per_segment)
        frames_dict_te = self.__select_middle_frame(frames_dict_te, n_frames_per_segment)
        self.__frames_dict_tr = frames_dict_tr
        self.__frames_dict_te = frames_dict_te
Exemplo n.º 2
0
    def __init__(self, features_path, n_timesteps, n_timesteps_total, is_random_tr=True, is_random_te=False, dataset_type=None):

        if dataset_type == const.DATASET_TYPES.breakfast:
            gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
            (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path)
        elif dataset_type == const.DATASET_TYPES.charades:
            gt_activities_path = Pth('Charades/annotation/video_annotation.pkl')
            (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path)
            self.__y_tr = self.__y_tr.astype(np.float32)
            self.__y_te = self.__y_te.astype(np.float32)
        else:
            raise Exception('Unknown Dataset Type: %s' % (dataset_type))

        (self.__x_tr, self.__x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])

        self.__feature_root_path = features_path
        self.__n_timesteps_total = n_timesteps_total
        self.__n_timesteps = n_timesteps

        self.__is_random_tr = is_random_tr
        self.__is_random_te = is_random_te
Exemplo n.º 3
0
    def __init__(self, features_path, n_timesteps, n_timesteps_total, dataset_type=None):

        if dataset_type == const.DATASET_TYPES.breakfast:
            gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
            (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path)
        elif dataset_type == const.DATASET_TYPES.charades:
            gt_activities_path = Pth('Charades/annotation/video_annotation.pkl')
            (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path)
            self.__y_tr = self.__y_tr.astype(np.float32)
            self.__y_te = self.__y_te.astype(np.float32)
        else:
            raise Exception('Unknown Dataset Type: %s' % (dataset_type))

        (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
        step = n_timesteps_total / float(n_timesteps)
        idxes = np.arange(0, n_timesteps_total, step, dtype=np.float32).astype(np.int32)
        x_tr = x_tr[:, :, idxes]
        x_te = x_te[:, :, idxes]

        self.__x_tr = x_tr.astype(np.float32)
        self.__x_te = x_te.astype(np.float32)
Exemplo n.º 4
0
def _04_get_activation_values():
    # load data
    n_timesteps = 64
    n_centroids = 128

    model_name = 'classifier_19.02.21-01:00:30'
    features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,))
    centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,))
    attention_values_path = Pth('Breakfast/qualitative_results/node_attention_%s.pkl', (model_name,))

    v_input_n = utils.pkl_load(centroids_path)
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])

    epoch_num = 133
    model = __load_model(model_name, epoch_num)

    t_input_n = model.get_layer('input_n').input
    t_input_x = model.get_layer('input_x').input
    t_node_attention = model.get_layer('node_attention').output  # # (None, 7, 7, 64, 100)
    keras_session = K.get_session()

    batch_size = 40
    att_tr = __get_tensor_values(batch_size, keras_session, t_node_attention, t_input_n, t_input_x, v_input_n, x_tr)  # (None, 1, 1, 64, 128)
    att_te = __get_tensor_values(batch_size, keras_session, t_node_attention, t_input_n, t_input_x, v_input_n, x_te)  # (None, 1, 1, 64, 128)

    att_tr = np.squeeze(att_tr, axis=1)  # (None, 1, 64, 128)
    att_tr = np.squeeze(att_tr, axis=1)  # (None, 64, 128)
    att_te = np.squeeze(att_te, axis=1)  # (None, 1, 64, 128)
    att_te = np.squeeze(att_te, axis=1)  # (None, 64, 128)

    print ('finally')
    print x_tr.shape
    print x_te.shape

    print att_tr.shape
    print att_te.shape

    utils.pkl_dump((att_tr, att_te), attention_values_path)
Exemplo n.º 5
0
def train_model_on_pickled_features():
    """
    Train model.
    """

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    is_spatial_pooling = False
    is_resume_training = False

    n_timesteps = 64
    batch_size_tr = 16
    batch_size_te = 40
    n_centroids = 128
    n_epochs = 100
    n_classes = N_CLASSES
    n_gpus = 1

    model_name = 'classifier_%s' % (utils.timestamp())
    model_weight_path = ''
    model_root_path = Pth('Breakfast/models/')
    gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
    centroids_path = Pth(
        'Breakfast/features_centroids/features_random_%d_centroids.pkl',
        (n_centroids, ))
    features_path = Pth(
        'Breakfast/features/features_i3d_mixed_5c_%d_frames_max_pool.h5',
        (n_timesteps * 8, )) if is_spatial_pooling else Pth(
            'Breakfast/features/features_i3d_mixed_5c_%d_frames.h5',
            (n_timesteps * 8, ))

    centroids = utils.pkl_load(centroids_path)
    (video_ids_tr, y_tr), (video_ids_te,
                           y_te) = utils.pkl_load(gt_activities_path)

    n_feat_maps, feat_map_side_dim = __get_model_feat_maps_info(
        model_type, feature_type)
    feat_map_side_dim = 1 if is_spatial_pooling else feat_map_side_dim
    input_shape = (None, n_timesteps, feat_map_side_dim, feat_map_side_dim,
                   n_feat_maps)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()

    # root_model, model = __load_model_mlp_classifier_action_vlad(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path)
    # root_model, model = __load_model_mlp_classifier_timeception(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path)
    root_model, model = __load_model_mlp_classifier_video_graph(
        centroids,
        n_classes,
        input_shape,
        n_gpus=n_gpus,
        is_load_weights=is_resume_training,
        weight_path=model_weight_path)

    t2 = time.time()
    duration = t2 - t1
    print(root_model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_path))
    print('... centroids: %s' % (centroids_path))
    t1 = time.time()
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()
    duration = t2 - t1
    print('... data loaded: %d' % (duration))

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = __calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = __calc_num_batches(n_te, batch_size_te)
    print('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr, n_gpus))
    print('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te, n_gpus))

    save_callback = keras_utils.ModelSaveCallback(model, model_name,
                                                  model_root_path)
    model.fit(x_tr,
              y_tr,
              epochs=n_epochs,
              batch_size=batch_size_tr,
              validation_split=0.0,
              validation_data=(x_te, y_te),
              shuffle=True,
              callbacks=[save_callback],
              verbose=2)
    print('--- finish time')
    print(datetime.datetime.now())
Exemplo n.º 6
0
def train_model_on_pickled_features():
    """
    Train model.
    """

    annotation_type = 'noun'
    annot_path = Pth(
        'EPIC-Kitchens/annotation/annot_video_level_many_shots.pkl')
    (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type)

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    n_nodes = 128
    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment
    batch_size_tr = 20
    batch_size_te = 30
    n_epochs = 500
    epoch_offset = 0
    model_name = 'classifier_%s' % (utils.timestamp())
    model_root_path = Pth('EPIC-Kitchens/models')

    features_path = Pth(
        'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5',
        (n_frames_per_video, ))
    nodes_path = Pth('EPIC-Kitchens/features_centroids/features_random_%d.pkl',
                     (n_nodes, ))
    n_channels, side_dim = utils.get_model_feat_maps_info(
        model_type, feature_type)
    input_shape = (None, n_timesteps, side_dim, side_dim, n_channels)
    nodes = utils.pkl_load(nodes_path)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    model = __load_model_videograph(nodes, n_classes, input_shape)
    t2 = time.time()
    duration = t2 - t1
    print(model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_path))
    t1 = time.time()
    # features are extracting using datasets.Epic_Kitchens.i3d_keras_epic_kitchens()
    # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c'
    # to get a better performance, you need to write code to randomly sample new frames and extract their features every new epoch
    # please use this function to random sampling, instead of uniform sampling: Epic_Kitchens.__random_sample_frames_per_video_for_i3d()
    # then extract their features, as done in: Epic_Kitchens._901_extract_features_i3d()
    # then train on the extracted features. Please do so in every epoch. It's computationally heavy, but you cannot avoid random sampling to get better results.
    # Even better results if you replace I3D with a 2D/3D CNN that's previously fine-tuned on Epic-Kitchens
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()

    duration = t2 - t1
    print('... data loaded: %d' % (duration))

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = utils.calc_num_batches(n_te, batch_size_te)
    print('... [tr]: n, n_batch, batch_size: %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr))
    print('... [te]: n, n_batch, batch_size: %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te))
    print(x_tr.shape)
    print(x_te.shape)
    print(y_tr.shape)
    print(y_te.shape)

    save_callback = keras_utils.ModelSaveCallback(model, model_name,
                                                  epoch_offset,
                                                  model_root_path)
    score_callback = keras_utils.MapScoreCallback(model, None, None, x_te,
                                                  y_te, batch_size_te,
                                                  n_classes)
    model_callbacks = [save_callback, score_callback]
    model.fit(x_tr,
              y_tr,
              epochs=n_epochs,
              batch_size=batch_size_tr,
              validation_split=0.0,
              validation_data=(x_te, y_te),
              shuffle=True,
              callbacks=model_callbacks,
              verbose=2)

    print('--- finish time')
    print(datetime.datetime.now())
Exemplo n.º 7
0
def train_model_videograph():
    """
    Train model.
    """

    annotation_type = 'noun'
    annot_path = Pth(
        'EPIC-Kitchens/annotations/annot_video_level_many_shots.pkl')
    (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type)

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    n_nodes = 128
    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment
    batch_size_tr = 20
    batch_size_te = 30
    n_epochs = 500
    epoch_offset = 0
    model_name = 'classifier_%s' % (utils.timestamp())
    model_root_path = Pth('EPIC-Kitchens/models')

    nodes_path = Pth('EPIC-Kitchens/features/nodes_random_%d.pkl', (n_nodes, ))
    features_path = Pth(
        'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5',
        (n_frames_per_video, ))
    n_channels, side_dim = utils.get_model_feat_maps_info(
        model_type, feature_type)
    input_shape = (None, n_timesteps, side_dim, side_dim, n_channels)

    # either load nodes, or generate them on the fly, but remeber to save them, as you need them in test time
    # nodes = utils.pkl_load(nodes_path)
    nodes = utils.generate_centroids(n_nodes, n_channels)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    model = __load_model_videograph(nodes, n_classes, input_shape)
    t2 = time.time()
    duration = t2 - t1
    print(model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_path))
    t1 = time.time()
    # features are extracting using datasets.epic_kitchens.i3d_keras_epic_kitchens()
    # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c'
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()

    duration = t2 - t1
    print('... data loaded: %d' % (duration))

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = utils.calc_num_batches(n_te, batch_size_te)
    print('... [tr]: n, n_batch, batch_size: %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr))
    print('... [te]: n, n_batch, batch_size: %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te))
    print(x_tr.shape)
    print(x_te.shape)
    print(y_tr.shape)
    print(y_te.shape)

    save_callback = keras_utils.ModelSaveCallback(model, model_name,
                                                  epoch_offset,
                                                  model_root_path)
    score_callback = keras_utils.MapScoreCallback(model, None, None, x_te,
                                                  y_te, batch_size_te,
                                                  n_classes)
    model_callbacks = [save_callback, score_callback]
    model.fit(x_tr,
              y_tr,
              epochs=n_epochs,
              batch_size=batch_size_tr,
              validation_split=0.0,
              validation_data=(x_te, y_te),
              shuffle=True,
              callbacks=model_callbacks,
              verbose=2)

    print('--- finish time')
    print(datetime.datetime.now())
Exemplo n.º 8
0
def _07_visualize_graph_edges():
    # load data
    n_timesteps = 64
    is_max_layer = True

    model_name = 'classifier_19.02.21-01:00:30'
    features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,))
    gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
    frames_annot_path = Pth('Breakfast/annotation/annot_frames_i3d_%d.pkl', (512,))
    class_names_path = Pth('Breakfast/annotation/activities_list.pkl')

    if is_max_layer:
        edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_%s.h5', (model_name,))
        n_timesteps = 21
        n_nodes = 10
    else:
        edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_%s.h5', (model_name,))
        n_timesteps = 64
        n_nodes = 32

    n_classes = ds_breakfast.N_CLASSES_ACTIVITIES
    frames_annot = utils.pkl_load(frames_annot_path)
    class_names = utils.pkl_load(class_names_path)
    (video_ids_tr, y_tr), (video_ids_te, y_te) = utils.pkl_load(gt_activities_path)
    y_tr = utils.debinarize_label(y_tr)
    y_te = utils.debinarize_label(y_te)
    n_classes = ds_breakfast.N_CLASSES_ACTIVITIES

    if is_max_layer:
        # (1357, 10, 21)
        # (355, 10, 21)
        (x_tr, x_te,) = utils.h5_load_multi(edge_values_path, ['x_tr', 'x_te'])

        x_tr = np.transpose(x_tr, (0, 2, 1, 3))  # (1357, 21, 10, 1024)
        x_te = np.transpose(x_te, (0, 2, 1, 3))  # (355, 21, 10)
    else:
        # (1357, 64, 32, 1024)
        # (355, 64, 32, 1024)
        (x_tr, x_te) = utils.pkl_load(edge_values_path)

    x_original = x_tr
    y = y_tr

    assert n_timesteps == x_original.shape[1]
    assert n_nodes == x_original.shape[2]

    # pool over time
    x = np.mean(x_original, axis=1)  # (None, N, C)

    padding = 3
    node_ids = np.arange(n_nodes)

    x_sum_mean = np.mean(np.sum(x, axis=2), axis=0)
    min_node_value = min(x_sum_mean)
    max_node_value = max(x_sum_mean)

    def _scale_val(val):
        val = 1 / val
        val = pow(val, 1.2)
        return val

    # loop on classes of the dataset
    for idx_class in range(n_classes):

        class_num = idx_class + 1
        class_name = class_names[idx_class]
        idx_samples = np.where(y == idx_class)[0]
        x_class = x[idx_samples]  # (None, N, C)

        # pool over samples
        x_class = np.mean(x_class, axis=0)  # (N, C)
        graph = nx.Graph()

        node_values = np.sum(x_class, axis=1)

        # add the items as nodes to the graph
        for id in node_ids:
            if not graph.has_node(id):
                graph.add_node(id)

        max_edge_val = 0.0
        min_edge_val = 10000
        for idx_node in range(n_nodes):
            for idx_col in range(idx_node - padding, idx_node + padding + 1):
                for idx_row in range(idx_node - padding, idx_node + padding + 1):
                    if idx_col < 0 or idx_col >= n_nodes:
                        continue
                    if idx_row < 0 or idx_row >= n_nodes:
                        continue
                    if idx_row == idx_col:
                        continue
                    val = distance.euclidean(x_class[idx_row], x_class[idx_col])
                    val = _scale_val(val)
                    min_edge_val = min(min_edge_val, val)
                    max_edge_val = max(max_edge_val, val)

        for idx_node in range(n_nodes):
            for idx_col in range(idx_node - padding, idx_node + padding + 1):
                for idx_row in range(idx_node - padding, idx_node + padding + 1):

                    if idx_col < 0 or idx_col >= n_nodes:
                        continue
                    if idx_row < 0 or idx_row >= n_nodes:
                        continue
                    if idx_row == idx_col:
                        continue

                    # this value represents edges between nodes in local window of size 7
                    val = distance.euclidean(x_class[idx_row], x_class[idx_col])
                    val = _scale_val(val)
                    id_1 = idx_col
                    id_2 = idx_row

                    # add edge if not exist, else, get old duration and average it with current one
                    if not graph.has_edge(id_1, id_2):
                        graph.add_edge(id_1, id_2, vals=[val], val=val)
                    else:
                        vals = [val] + graph.get_edge_data(id_1, id_2)['vals']
                        val = np.average(vals)
                        graph[id_1][id_2]['vals'] = vals
                        graph[id_1][id_2]['val'] = val

        # now plot this graph
        g_edges = graph.edges
        g_nodes = graph.nodes

        # embed the graph
        # g_embedding = __async_tsne_embedding(x_class)
        # g_embedding = nx.random_layout(graph)
        # g_embedding = nx.spectral_layout(graph, weight='val') # spectral embedding with matrix laplacian
        # g_embedding = nx.kamada_kawai_layout(graph, weight='val', scale=10, dim=2)  # optimal distance between nodes
        g_embedding = nx.spring_layout(graph, weight='val', iterations=1000, scale=10, dim=2, seed=101)

        # plot graph
        __plot_embedded_graph(graph, g_embedding, g_edges, node_values, class_num, class_name, min_node_value, max_node_value, min_edge_val, max_edge_val, n_nodes)
Exemplo n.º 9
0
def _06_get_graph_edges():
    # load data
    n_timesteps = 64
    n_centroids = 128
    is_max_layer = True

    model_name = 'classifier_19.02.21-01:00:30'
    features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,))
    centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,))

    if is_max_layer:
        edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_%s.h5', (model_name,))
        edge_pooled_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_reduced_%s.pkl', (model_name,))
        layer_name = 'pool_t_1'
        n_timesteps = 21
        n_nodes = 10
    else:
        edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_%s.h5', (model_name,))
        edge_pooled_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_reduced_%s.pkl', (model_name,))
        layer_name = 'leaky_re_lu_3'
        n_timesteps = 64
        n_nodes = 32

    v_input_n = utils.pkl_load(centroids_path)
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])

    epoch_num = 133
    batch_size = 40
    model = __load_model(model_name, epoch_num)

    t_input_n = model.get_layer('input_n').input
    t_input_x = model.get_layer('input_x').input
    t_activations = model.get_layer(layer_name).output  # (None * 64, 32, 1, 1, 1024)
    keras_session = K.get_session()

    # 1357 train, 335 test
    vals_tr = __get_tensor_values(batch_size, keras_session, t_activations, t_input_n, t_input_x, v_input_n, x_tr)  # (None*64, 32, 1, 1, 1024)
    vals_te = __get_tensor_values(batch_size, keras_session, t_activations, t_input_n, t_input_x, v_input_n, x_te)  # (None*64, 32, 1, 1, 1024)

    vals_tr = np.squeeze(vals_tr, axis=2)
    vals_tr = np.squeeze(vals_tr, axis=2)

    vals_te = np.squeeze(vals_te, axis=2)
    vals_te = np.squeeze(vals_te, axis=2)

    n_tr = 1357
    n_te = 355
    if is_max_layer:
        vals_tr = np.reshape(vals_tr, (n_tr, n_nodes, n_timesteps, 1024))  # (None, timesteps, nodes, feat_size), (1357, 10, 21, 1024)
        vals_te = np.reshape(vals_te, (n_te, n_nodes, n_timesteps, 1024))  # (None, timesteps, nodes, feat_size), (355, 10, 21, 1024)
    else:
        vals_tr = np.reshape(vals_tr, (n_tr, n_timesteps, n_nodes, 1024))  # (None, timesteps, nodes, feat_size), (1357, 64, 32, 1024)
        vals_te = np.reshape(vals_te, (n_te, n_timesteps, n_nodes, 1024))  # (None, timesteps, nodes, feat_size), (355, 64, 32, 1024)

    print ('finally')
    print x_tr.shape
    print x_te.shape

    print vals_tr.shape
    print vals_te.shape

    utils.h5_dump_multi((vals_tr, vals_te), ['x_tr', 'x_te'], edge_values_path)

    vals_tr = np.mean(vals_tr, axis=3)
    vals_te = np.mean(vals_te, axis=3)
    utils.pkl_dump((vals_tr, vals_te), edge_pooled_values_path)
def analysis():

    ###### Annotation labels ###########################################################################################################################
    path_anno = 'Hico/features/h5/anno_hico.pkl'
    num_class = 600
    metric_fn = pytorch_utils.METRIC_FUNCTIONS.ap_hico
    annot_path = Pth(path_anno)

    print('... loading data')

    (img_names_tr, y_tr, _, img_names_te, y_te,
     y_te_mask) = utils.pkl_load(annot_path)
    y_tr = y_tr.astype(np.float32)
    y_te = y_te.astype(np.float32)

    metric_fn = pytorch_utils.METRIC_FUNCTIONS.ap_hico_all
    ###### Annotation labels ###########################################################################################################################

    ##### Load interaction categories #########################################################################################################
    print('...Loading categories...')
    classes = sio.loadmat('../../where-is-interaction/main/data/anno.mat')
    nouns = classes['objects']
    verbs = classes['verbs']
    cats = classes['super_category']

    verblist = []
    for v in verbs:
        verblist.append(np.squeeze(v[0][0]))

    objlist = []
    for o in nouns:
        objlist.append(np.squeeze(o[0][0]))

    verblist = np.array(verblist)
    objlist = np.array(objlist)

    verblist = np.squeeze(verblist)
    objlist = np.squeeze(objlist)
    ##### Load interaction categories #########################################################################################################

    ##### Load alpha values for analysis #########################################################################################################
    print('...Loading alpha values...')
    alpha_path = '/var/scratch/mkilicka/code/context-driven-interactions/submission/data/hico/results/gumbel_softmax_hard_gating.h5'
    (y_te_pred, alphas) = utils.h5_load_multi(
        alpha_path, ['y_pred_te', 'alphas'])  # (B, 600), (B, M, N)

    C = y_te_pred.shape[1]
    alphas = alphas.max(2)  # (B, M)

    ##### Generating alpha values per class statistics #########################################################################################################
    print('...Computing class-level alphas...')

    # per interaction
    output = np.zeros((C, 4), dtype=np.float32)

    y_te_ = np.transpose(y_te)
    for i in range(C):
        index = np.where(y_te_[i] == 1)[0]

        alphas_per_class = alphas[index].mean(0)
        output[i] = alphas_per_class

    # per object
    unique_objects = np.unique(objlist)
    output_object = np.zeros((len(unique_objects), 4), dtype=np.float32)

    for i in range(len(unique_objects)):

        inter_classes = np.where(objlist == unique_objects[i])[0]

        for j in inter_classes:
            index = np.where(y_te_[j] == 1)[0]
            alphas_per_class = alphas[index].mean(0)
            output_object[i] += alphas_per_class

        output_object[i] = output_object[i] / len(inter_classes)

    # per verb
    unique_verbs = np.unique(verblist)
    output_verb = np.zeros((len(unique_verbs), 4), dtype=np.float32)

    for i in range(len(unique_verbs)):

        inter_classes = np.where(verblist == unique_verbs[i])[0]

        for j in inter_classes:
            index = np.where(y_te_[j] == 1)[0]
            alphas_per_class = alphas[index].mean(0)
            output_verb[i] += alphas_per_class

        output_verb[i] = output_verb[i] / len(inter_classes)

    ##### Generating alpha values per class statistics #########################################################################################################

    ##### Export alpha values to csv file for interaction #########################################################################################################
    print('...Exporting alphas...')

    import csv
    classfile = open('./analysis/per_class_alpha_analysis.csv', 'w')

    classfile.write('verb\tobject\tlvis\tlocal_scene\tdeformation\tpart\n')

    for i in range(C):

        text = verblist[i] + '\t' + objlist[i] + '\t' + str(
            output[i, 0]) + '\t' + str(output[i, 1]) + '\t' + str(
                output[i, 2]) + '\t' + str(output[i, 3]) + '\n'
        classfile.write(text)

    classfile.close()
    ##### Export alpha values to csv file for interactions #########################################################################################################

    ##### Export alpha values to csv file for objects #########################################################################################################
    print('...Exporting alphas...')

    import csv
    classfile = open('./analysis/per_class_alpha_analysis_object.csv', 'w')

    classfile.write('object\tlvis\tlocal_scene\tdeformation\tpart\n')

    for i in range(len(unique_objects)):

        text = unique_objects[i] + '\t' + str(output_object[
            i, 0]) + '\t' + str(output_object[i, 1]) + '\t' + str(
                output_object[i, 2]) + '\t' + str(output_object[i, 3]) + '\n'
        classfile.write(text)

    classfile.close()
    ##### Export alpha values to csv file for objects #########################################################################################################

    ##### Export alpha values to csv file for objects #########################################################################################################
    print('...Exporting alphas...')

    import csv
    classfile = open('./analysis/per_class_alpha_analysis_verb.csv', 'w')

    classfile.write('verb\tlvis\tlocal_scene\tdeformation\tpart\n')

    for i in range(len(unique_verbs)):

        text = unique_verbs[i] + '\t' + str(output_verb[i, 0]) + '\t' + str(
            output_verb[i, 1]) + '\t' + str(output_verb[i, 2]) + '\t' + str(
                output_verb[i, 3]) + '\n'
        classfile.write(text)

    classfile.close()
    ##### Export alpha values to csv file for objects #########################################################################################################

    ##### Export selected objects to create a heatmap #########################################################################################################

    query_objects = [
        'dining_table', 'oven', 'refrigerator', 'motorcycle', 'horse', 'car',
        'snowboard', 'skis', 'skateboard', 'bowl', 'orange', 'donut'
    ]
    contexts = ['objects', 'local scene', 'deformation', 'part appearance']
    query_objects = np.array(query_objects)
    heatmap = np.zeros((query_objects.shape[0], 4), dtype=np.float32)

    for i in range(query_objects.shape[0]):
        index = np.where(unique_objects == query_objects[i])
        temp = output_object[index]
        heatmap[i] = temp

    print(heatmap)
    sio.savemat('./analysis/exp3_heatmap_object.mat', {
        'heatmap': heatmap,
        'objects': query_objects,
        'contexts': contexts
    })

    ##### Export selected objects to create a heatmap #########################################################################################################

    query_verbs = [
        'eat_at', 'clean', 'cook', 'race', 'row', 'drive', 'throw', 'stand_on',
        'jump', 'cut_with', 'brush_with', 'eat'
    ]
    contexts = ['objects', 'local scene', 'deformation', 'part appearance']
    query_verbs = np.array(query_verbs)
    heatmap = np.zeros((query_verbs.shape[0], 4), dtype=np.float32)

    for i in range(query_verbs.shape[0]):
        index = np.where(unique_verbs == query_verbs[i])
        temp = output_verb[index]
        heatmap[i] = temp

    print(heatmap)
    sio.savemat('./analysis/exp3_heatmap_verb.mat', {
        'heatmap': heatmap,
        'objects': query_verbs,
        'contexts': contexts
    })
Exemplo n.º 11
0
def train_human_object_multiple_context_gating(soft_flag=True,
                                               backbone='rcnn'):

    n_epochs = 100
    batch_size_tr = 32
    batch_size_te = 32
    n_classes = N_CLASSES

    if backbone == 'rcnn':
        print('Using backbone rcnn')
        feature_path_interaction = Pth(
            'Hico/features/h5/features_base_subject_object.h5')
        n_channels, n_regions, channel_side_dim = 4096, 12, 1
        (x_tr, x_te) = utils.h5_load_multi(feature_path_interaction,
                                           ['x_tr', 'x_te'])
        x_tr = np.swapaxes(x_tr, 1, 2)
        x_te = np.swapaxes(x_te, 1, 2)
    elif backbone == 'pairatt':
        print('Using backbone pairatt')
        feature_path_interaction = Pth('Hico/features/h5/features_pairattn.h5')
        n_channels, n_regions, channel_side_dim = 4096, 3, 1
        (x_tr, x_te) = utils.h5_load_multi(feature_path_interaction,
                                           ['x_tr', 'x_te'])

    # Features of the pose: f_context
    feature_path_c3 = Pth('Hico/features/h5/deformation.h5')
    x_cs_shape = [(512, 1, 1, 1)]

    # Features of the pose: f_context
    feature_path_c1 = Pth('Hico/features/h5/lvis.h5')
    x_cs_shape = [(1300, 1, 1, 1)]

    feature_path_c2 = Pth('Hico/features/h5/local_scene.h5')
    x_cs_shape = [(2048, 1, 1, 1)]

    feature_path_context = Pth('Hico/features/h5/stuff.h5')
    x_cs_shape = [(649, 1, 1, 1)]

    # Features of the pose: f_context
    feature_path_context = Pth('Hico/features/h5/part_states.h5')
    x_cs_shape = [(1032, 1, 1, 1)]

    feature_path_c4 = Pth('Hico/features/h5/local_pose.h5')
    x_cs_shape = [(4096, 1, 1, 1)]

    x_cs_shape = [(1300, 1, 1, 1), (2048, 1, 1, 1), (512, 1, 1, 1),
                  (4096, 1, 1, 1)]

    # Annotation of the image
    annot_path = Pth('Hico/features/h5/anno_hico.pkl')
    model_name = 'classifier_%s' % (utils.timestamp())
    input_shape = (n_channels, n_regions, channel_side_dim, channel_side_dim)

    print('--- start time')
    print(datetime.datetime.now())

    print('... loading data')
    t1 = time.time()

    (img_names_tr, y_tr, y_tr_mask, img_names_te, y_te,
     y_te_mask) = utils.pkl_load(annot_path)
    y_tr = y_tr.astype(np.float32)
    y_te = y_te.astype(np.float32)

    y_tr_mask = y_tr_mask.astype(np.float32)
    y_te_mask = y_te_mask.astype(np.float32)

    print('... context features')
    (x_tr_c1, x_te_c1) = utils.h5_load_multi(feature_path_c1, ['x_tr', 'x_te'])
    #x_tr_c1 = expand_feats(x_tr_c1)
    #x_te_c1 = expand_feats(x_te_c1)

    (x_tr_c2, x_te_c2) = utils.h5_load_multi(feature_path_c2, ['x_tr', 'x_te'])
    x_tr_c2 = expand_feats(x_tr_c2)
    x_te_c2 = expand_feats(x_te_c2)

    (x_tr_c3, x_te_c3) = utils.h5_load_multi(feature_path_c3, ['x_tr', 'x_te'])
    x_tr_c3 = expand_feats(x_tr_c3)
    x_te_c3 = expand_feats(x_te_c3)

    (x_tr_c4, x_te_c4) = utils.h5_load_multi(feature_path_c4, ['x_tr', 'x_te'])
    x_tr_c4 = expand_feats(x_tr_c4)
    x_te_c4 = expand_feats(x_te_c4)

    print('train_set_shape_interaction: ', x_tr.shape)
    print('test_set_shape_interaction: ', x_te.shape)

    print('train_set_shape_context-1: ', x_tr_c1.shape)
    print('test_set_shape_context-1: ', x_te_c1.shape)

    print('train_set_shape_context-2: ', x_tr_c2.shape)
    print('test_set_shape_context-2: ', x_te_c2.shape)

    print('train_set_shape_context-3: ', x_tr_c3.shape)
    print('test_set_shape_context-3: ', x_te_c3.shape)

    print('train_set_shape_context-4: ', x_tr_c4.shape)
    print('test_set_shape_context-4: ', x_te_c4.shape)

    t2 = time.time()
    duration = t2 - t1
    print('... loading data, duration (sec): %d' % (duration))

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    if soft_flag == True:
        print('Training soft fusion model')
        model = ClassifierContextLateFusionMultiSoftGate(
            n_classes, input_shape, x_cs_shape)

    t2 = time.time()
    duration = t2 - t1
    model = model.cuda()
    input_sizes = [input_shape] + list(x_cs_shape)
    #pytorch_utils.model_summary_multi_input(model, input_sizes=input_sizes, batch_size=-1, device='cuda')
    print('... model built, duration (sec): %d' % (duration))

    # callbacks
    callbacks = []

    print(
        'Interaction_feat: %s, Context_feat-1: %s, Context_feat-2: %s, Context_feat-3: %s\n'
        % (feature_path_interaction, feature_path_c1, feature_path_c2,
           feature_path_c3))

    # start training
    pytorch_utils.train_model_custom_metric_mask(
        model,
        model._optimizer,
        model._loss_fn,
        model._metric_fn, [x_tr, x_tr_c1, x_tr_c2, x_tr_c3, x_tr_c4],
        y_tr,
        y_tr_mask, [x_te, x_te_c1, x_te_c2, x_te_c3, x_te_c4],
        y_te,
        y_te_mask,
        n_epochs,
        batch_size_tr,
        batch_size_te,
        callbacks=callbacks)

    print('--- finish time')
    print(datetime.datetime.now())
Exemplo n.º 12
0
print('Result of multi-head gating exp: %02.02f' %(acc_te))


###### multi-head gating inference loop (for alphas) ##########################################################################################################################
'''

###### multi-head gating inference loop (for alphas) ##########################################################################################################################

backbone = 'rcnn'
ablation = False

if backbone == 'rcnn':
    feature_path_interaction = Pth(
        'Hico/features/h5/features_base_subject_object.h5')
    n_channels, n_regions, channel_side_dim = 4096, 12, 1
    (x_tr, x_te) = utils.h5_load_multi(feature_path_interaction,
                                       ['x_tr', 'x_te'])
    x_te = np.swapaxes(x_te, 1, 2)
    if ablation == False:
        path_model = '/var/scratch/mkilicka/data/hico/models_finetuned/late_hard_gating_for_hico/model.pt'
        path_save = '/var/scratch/mkilicka/code/context-driven-interactions/submission/data/hico/results/gumbel_softmax_hard_gating.h5'

    else:
        path_model = '/var/scratch/mkilicka/data/hico/models_finetuned/late_hard_ablated_gating_for_hico/model.pt'
        path_save = '/var/scratch/mkilicka/code/context-driven-interactions/submission/data/hico/results/gumbel_softmax_hard_ablated_gating.h5'

print('backbone:', backbone)
input_shape = (n_channels, n_regions, channel_side_dim, channel_side_dim)

feature_path_c1 = Pth('Hico/features/h5/lvis.h5')
feature_path_c2 = Pth('Hico/features/h5/local_scene.h5')
feature_path_c3 = Pth('Hico/features/h5/deformation.h5')