Exemplo n.º 1
0
    def __init__(self,
                 batch_size,
                 n_classes,
                 feature_dim,
                 feature_name,
                 is_training,
                 is_shuffle=True):
        """
        Initialization
        """

        self.batch_size = batch_size
        self.is_training = is_training
        self.n_classes = n_classes
        self.feature_dim = feature_dim
        self.feature_name = feature_name
        # self.is_shuffle = is_shuffle
        self.dataset_name = 'charades'

        # load annotation
        root_path = './data/charades'
        annotation_path = '%s/annotation/video_annotation_py3.pkl' % (
            root_path)
        if self.is_training:
            (video_names, y, _, _) = utils.pkl_load(annotation_path)
            print(video_names)
        else:
            (_, _, video_names, y) = utils.pkl_load(annotation_path)

        # in case of single label classification, debinarize the labels
        if config.cfg.MODEL.CLASSIFICATION_TYPE == 'sl':
            y = utils.debinarize_label(y)

        # in any case, make sure target is float
        y = y.astype(np.float32)

        # convert relative to root pathes
        feats_path = np.array([
            '%s/%s/%s.pkl' % (root_path, feature_name, p) for p in video_names
        ])

        n_samples = len(y)
        self.n_samples = n_samples
        self.n_batches = utils.calc_num_batches(n_samples, batch_size)
        self.feats_path = feats_path
        self.y = y
Exemplo n.º 2
0
def __get_tensor_values(batch_size, keras_session, tensor, t_input_n, t_input_x, v_input_n, x):
    n_items = len(x)
    n_batches = utils.calc_num_batches(n_items, batch_size)

    data = None
    for idx_batch in range(n_batches):
        utils.print_counter(idx_batch + 1, n_batches)
        idx_b = idx_batch * batch_size
        idx_e = (idx_batch + 1) * batch_size
        v_input_x = x[idx_b:idx_e]
        print v_input_x.shape
        values, = keras_session.run([tensor], {t_input_x: v_input_x, t_input_n: v_input_n})  # (None, 1, 1, 64, 128)
        print values.shape
        print
        data = values if data is None else np.vstack((data, values))

    data = np.array(data)
    return data
Exemplo n.º 3
0
    def __init__(self, batch_size, n_classes, feature_dim, feature_name, is_training, is_shuffle=True):
        """
        Initialization
        """

        self.batch_size = batch_size #32
        self.is_training = is_training #True
        self.n_classes = n_classes #157
        self.feature_dim = feature_dim #(1024, 32, 7, 7)
        self.feature_name = feature_name #'features_i3d_pytorch_charades_rgb_mixed_5c_32f'
        self.is_shuffle = is_shuffle
        self.dataset_name = 'Charades'

        # load annotation
        root_path = './data/Charades'
        annotation_path = '%s/annotation/video_annotation.pkl' % (root_path) #视频注释./data/Charades/annotation/video_annotation.pkl

        if self.is_training:
            (video_names, y, _, _) = utils.pkl_load(annotation_path) #video_names [b'001YG' b'004QE' b'00HFP' ... b'ZZDBH' b'ZZN85' b'ZZXQF'],y.shape=(7811, 157)
        else:
            (_, _, video_names, y) = utils.pkl_load(annotation_path) #y.shape = (1814,157)

        # in case of single label classification, debinarize the labels,单标签
        if config.cfg.MODEL.CLASSIFICATION_TYPE == 'sl':
            y = utils.debinarize_label(y)

        # in any case, make sure target is float
        y = y.astype(np.float32)

        # convert relative to root pathes,通过I3D进行特征提取的特征存放的路径
        feats_path = np.array(['%s/%s/%s.pkl' % (root_path, feature_name, p.astype(str)) for p in video_names]) #原版的。#'./data/Charades/features_i3d_pytorch_charades_rgb_mixed_5c_32f/'

        n_samples = len(y)
        self.n_samples = n_samples
        self.n_batches = utils.calc_num_batches(n_samples, batch_size) #计算batch的个数
        self.feats_path = feats_path #特征存放的路径
        self.y = y

        # shuffle the data,打乱顺序
        if self.is_shuffle:
            self.__shuffle()
Exemplo n.º 4
0
def train_model_on_pickled_features():
    """
    Train model.
    """

    annotation_type = 'noun'
    annot_path = Pth(
        'EPIC-Kitchens/annotation/annot_video_level_many_shots.pkl')
    (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type)

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    n_nodes = 128
    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment
    batch_size_tr = 20
    batch_size_te = 30
    n_epochs = 500
    epoch_offset = 0
    model_name = 'classifier_%s' % (utils.timestamp())
    model_root_path = Pth('EPIC-Kitchens/models')

    features_path = Pth(
        'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5',
        (n_frames_per_video, ))
    nodes_path = Pth('EPIC-Kitchens/features_centroids/features_random_%d.pkl',
                     (n_nodes, ))
    n_channels, side_dim = utils.get_model_feat_maps_info(
        model_type, feature_type)
    input_shape = (None, n_timesteps, side_dim, side_dim, n_channels)
    nodes = utils.pkl_load(nodes_path)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    model = __load_model_videograph(nodes, n_classes, input_shape)
    t2 = time.time()
    duration = t2 - t1
    print(model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_path))
    t1 = time.time()
    # features are extracting using datasets.Epic_Kitchens.i3d_keras_epic_kitchens()
    # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c'
    # to get a better performance, you need to write code to randomly sample new frames and extract their features every new epoch
    # please use this function to random sampling, instead of uniform sampling: Epic_Kitchens.__random_sample_frames_per_video_for_i3d()
    # then extract their features, as done in: Epic_Kitchens._901_extract_features_i3d()
    # then train on the extracted features. Please do so in every epoch. It's computationally heavy, but you cannot avoid random sampling to get better results.
    # Even better results if you replace I3D with a 2D/3D CNN that's previously fine-tuned on Epic-Kitchens
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()

    duration = t2 - t1
    print('... data loaded: %d' % (duration))

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = utils.calc_num_batches(n_te, batch_size_te)
    print('... [tr]: n, n_batch, batch_size: %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr))
    print('... [te]: n, n_batch, batch_size: %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te))
    print(x_tr.shape)
    print(x_te.shape)
    print(y_tr.shape)
    print(y_te.shape)

    save_callback = keras_utils.ModelSaveCallback(model, model_name,
                                                  epoch_offset,
                                                  model_root_path)
    score_callback = keras_utils.MapScoreCallback(model, None, None, x_te,
                                                  y_te, batch_size_te,
                                                  n_classes)
    model_callbacks = [save_callback, score_callback]
    model.fit(x_tr,
              y_tr,
              epochs=n_epochs,
              batch_size=batch_size_tr,
              validation_split=0.0,
              validation_data=(x_te, y_te),
              shuffle=True,
              callbacks=model_callbacks,
              verbose=2)

    print('--- finish time')
    print(datetime.datetime.now())
Exemplo n.º 5
0
def train_model_videograph():
    """
    Train model.
    """

    annotation_type = 'noun'
    annot_path = Pth(
        'EPIC-Kitchens/annotations/annot_video_level_many_shots.pkl')
    (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type)

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    n_nodes = 128
    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment
    batch_size_tr = 20
    batch_size_te = 30
    n_epochs = 500
    epoch_offset = 0
    model_name = 'classifier_%s' % (utils.timestamp())
    model_root_path = Pth('EPIC-Kitchens/models')

    nodes_path = Pth('EPIC-Kitchens/features/nodes_random_%d.pkl', (n_nodes, ))
    features_path = Pth(
        'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5',
        (n_frames_per_video, ))
    n_channels, side_dim = utils.get_model_feat_maps_info(
        model_type, feature_type)
    input_shape = (None, n_timesteps, side_dim, side_dim, n_channels)

    # either load nodes, or generate them on the fly, but remeber to save them, as you need them in test time
    # nodes = utils.pkl_load(nodes_path)
    nodes = utils.generate_centroids(n_nodes, n_channels)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    model = __load_model_videograph(nodes, n_classes, input_shape)
    t2 = time.time()
    duration = t2 - t1
    print(model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_path))
    t1 = time.time()
    # features are extracting using datasets.epic_kitchens.i3d_keras_epic_kitchens()
    # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c'
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()

    duration = t2 - t1
    print('... data loaded: %d' % (duration))

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = utils.calc_num_batches(n_te, batch_size_te)
    print('... [tr]: n, n_batch, batch_size: %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr))
    print('... [te]: n, n_batch, batch_size: %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te))
    print(x_tr.shape)
    print(x_te.shape)
    print(y_tr.shape)
    print(y_te.shape)

    save_callback = keras_utils.ModelSaveCallback(model, model_name,
                                                  epoch_offset,
                                                  model_root_path)
    score_callback = keras_utils.MapScoreCallback(model, None, None, x_te,
                                                  y_te, batch_size_te,
                                                  n_classes)
    model_callbacks = [save_callback, score_callback]
    model.fit(x_tr,
              y_tr,
              epochs=n_epochs,
              batch_size=batch_size_tr,
              validation_split=0.0,
              validation_data=(x_te, y_te),
              shuffle=True,
              callbacks=model_callbacks,
              verbose=2)

    print('--- finish time')
    print(datetime.datetime.now())
Exemplo n.º 6
0
def train_model_on_pickled_features():
    """
    Train model.
    """

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'

    n_centroids = 128
    n_timesteps = 64
    is_spatial_pooling = True
    is_resume_training = False

    batch_size_tr = 12
    batch_size_te = 30
    n_epochs = 100
    n_classes = N_CLASSES
    n_gpus = 1

    model_name = 'classifier_%s' % (utils.timestamp())
    model_weight_path = ''
    features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames_max_pool.h5', (n_timesteps * 8,)) if is_spatial_pooling else Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,))
    centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,))
    gt_actions_path = Pth('Breakfast/annotation/gt_unit_actions.pkl')
    (_, y_tr), (_, y_te) = utils.pkl_load(gt_actions_path)
    centroids = utils.pkl_load(centroids_path)

    n_feat_maps, feat_map_side_dim = utils.get_model_feat_maps_info(model_type, feature_type)
    feat_map_side_dim = 1 if is_spatial_pooling else feat_map_side_dim
    input_shape = (None, n_timesteps, feat_map_side_dim, feat_map_side_dim, n_feat_maps)

    print ('--- start time')
    print (datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    root_model, model = __load_model_mlp_classifier_video_graph(centroids, n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path)
    t2 = time.time()
    duration = t2 - t1
    print (root_model.summary(line_length=130, positions=None, print_fn=None))
    print ('... model built, duration (sec): %d' % (duration))

    # load data
    print ('... loading data: %s' % (features_path))
    print ('... centroids: %s' % (centroids_path))
    t1 = time.time()
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()
    duration = t2 - t1
    print ('... data loaded: %d' % (duration))
    print(x_tr.shape)
    print(y_tr.shape)
    print(x_te.shape)
    print(y_te.shape)

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = utils.calc_num_batches(n_te, batch_size_te)
    print ('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr, n_gpus))
    print ('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_te, n_batch_te, batch_size_te, n_gpus))

    score_callback = ScoreCallback(model, None, None, x_te, y_te, batch_size_te)
    callbacks = [score_callback]
    model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=callbacks, verbose=2)
    print ('--- finish time')
    print (datetime.datetime.now())