예제 #1
0
def calculate_surf_weightedsum(s_path, W_path):
    nr = Npyfilereader(s_path)
    nr.validate(W_path)
    video_num = len(nr.npy_paths)
    for i in range(video_num):
        name, contents = nr.read_npys()
        ws = Weightedsum(name, contents, W_path)
        # if ws.pre_processing() == -1:
        #     print(name)
        #     problem_path.append(name)
        #     continue
        ws.ws_descriptor_gen(5)
예제 #2
0
def input_gen_mul(rgb_feature_path,
                  u_feature_path,
                  v_feature_path,
                  video_names,
                  video_labels,
                  steps=5,
                  dim=2,
                  train=False):
    # 1775 the max len video
    rgb_path = [
        os.path.join(rgb_feature_path, n + '.npy') for n in video_names
    ][:]
    u_path = [os.path.join(u_feature_path, n + '.npy') for n in video_names][:]
    all_rgb = []
    all_u = []
    all_label = []
    tran_m = Weightedsum('tran_m', [],
                         None).transformation_matrix_gen(dim, 1776)
    tran_m = preprocessing.normalize(tran_m, axis=1)
    # tran_m = None
    for rp, up, name, l in zip(rgb_path, u_path, video_names[:],
                               video_labels[:]):
        if rp != "/home/boy2/ucf101/ucf101_dataset/features/mulNet_feature/mulNet/rgb/o/v_StillRings_g21_c05.npy":
            rgb = np.load(rp)
            rgb = np.transpose(rgb)
            u = np.load(up)
            u = np.transpose(u)
            if len(rgb) > len(u) and len(rgb) - 1 == len(u):
                rgb = rgb[:-1]

            # # Time Varying Mean Vectors
            # copy_rgb = rgb.copy()
            # copy_u = u.copy()
            # copy_v = v.copy()
            # for r in range(len(rgb)):
            #     rgb[r] = np.sum(copy_rgb[:r + 1, :], axis=0) / (r + 1)
            #     rgb[r] = rgb[r] / np.linalg.norm(rgb[r])
            #     u[r] = np.sum(copy_u[:r + 1, :], axis=0) / (r + 1)
            #     u[r] = u[r] / np.linalg.norm(u[r])
            #     v[r] = np.sum(copy_v[:r + 1, :], axis=0) / (r + 1)
            #     v[r] = v[r] / np.linalg.norm(v[r])

            _rgb = Weightedsum(name, rgb[:],
                               None).ws_descriptor_gen(dim, False, tran_m)
            _u = Weightedsum(name, u[:],
                             None).ws_descriptor_gen(dim, False, tran_m)

            all_rgb.append(_rgb / len(rgb))
            all_u.append(_u / len(u))
            all_label.append(l)
    return all_rgb, all_u, all_label
예제 #3
0
def feature_gen(rp, name, seed, dim):
    rgb = np.load(rp)

    if len(seed.shape) == 1:
        tran_m_rgb = Weightedsum('tran_m', [],
                                 None).transformation_matrix_gen_norm(
                                     dims, len(rgb), seed)
        tran_m_rgb /= len(rgb)
        tran_m_rgb = preprocessing.normalize(tran_m_rgb, axis=1, norm='l2')
    else:
        tran_m_rgb = seed

    _rgb = Weightedsum(name, rgb[:],
                       None).ws_descriptor_gen(dim, False, tran_m_rgb)
    return _rgb
예제 #4
0
def input_gen_all(rgb_feature_path,
                  u_feature_path,
                  v_feature_path,
                  video_names,
                  video_labels,
                  steps=5,
                  dim=2,
                  train=False):
    rgb_path = [
        os.path.join(rgb_feature_path, n + '.npy') for n in video_names
    ][:]
    u_path = [os.path.join(u_feature_path, n + '.npy') for n in video_names][:]
    v_path = [os.path.join(v_feature_path, n + '.npy') for n in video_names][:]
    all_rgb = []
    all_u = []
    all_v = []
    all_label = []
    for rp, up, vp, name, l in zip(rgb_path, u_path, v_path, video_names[:],
                                   video_labels[:]):
        rgb = np.load(rp)
        u = np.load(up)
        v = np.load(vp)
        if train is False:
            selected_frame_nums = np.random.randint(
                steps,
                len(rgb) - steps,
                size=num_samples_per_testing_video,
                dtype=np.int32)
        else:
            selected_frame_nums = np.random.randint(
                steps,
                len(rgb) - steps,
                size=num_samples_per_training_video,
                dtype=np.int32)
        for index in selected_frame_nums:
            _rgb = Weightedsum(name, rgb[:],
                               None).ws_descriptor_gen(dim, False, None)
            # _rgb = rgb[index]
            _u = Weightedsum(name, u[:],
                             None).ws_descriptor_gen(dim, False, None)
            _v = Weightedsum(name, v[:],
                             None).ws_descriptor_gen(dim, False, None)
            all_rgb.append(_rgb)
            all_u.append(_u)
            all_v.append(_v)
            all_label.append(l)
    return all_rgb, all_u, all_v, all_label
예제 #5
0
def input_gen_all(rgb_feature_path,
                  selected_frames_path,
                  video_names,
                  video_labels,
                  seed,
                  steps=5,
                  dim=2,
                  train=False):
    video_names = np.array(video_names)
    video_labels = video_labels[:num_train_data]
    video_names = video_names[:num_train_data]

    # video_labels = video_labels[num_train]
    # video_names = video_names[num_train]
    # 1776 the max len video
    rgb_path = [
        os.path.join(rgb_feature_path, n + '.npy') for n in video_names
    ]
    selected_frames = [
        os.path.join(selected_frames_path, n + '.mat') for n in video_names
    ]
    all_rgb = []
    all_label = []

    for rp, sf, name, l in zip(rgb_path, selected_frames, video_names,
                               video_labels):
        rgb = np.load(rp)

        if len(seed.shape) == 1:
            tran_m_rgb = Weightedsum('tran_m', [],
                                     None).transformation_matrix_gen_norm(
                                         dims, len(rgb), seed)
            tran_m_rgb = preprocessing.normalize(tran_m_rgb, axis=1, norm='l2')
        else:
            tran_m_rgb = seed

        _rgb = Weightedsum(name, rgb[:],
                           None).ws_descriptor_gen(dim, False, tran_m_rgb)

        all_rgb.append(_rgb)
        all_label.append(l)

    return all_rgb, all_label
예제 #6
0
def calculate_ws_on_rawdata(frame_path, store_path, dim):
    fl = Frameloader(frame_path)
    fl.validate(store_path)
    while len(fl.frame_parent_paths) != 0:
        name = fl.get_current_video_name()
        frames = np.array(fl.load_frames(mode='color'))
        ws = Weightedsum(name, frames, store_path)
        if dim == 0:
            ws.mean_descriptor_gen()
        else:
            ws.ws_on_raw_data(dim)
예제 #7
0
def calculate_weightedsum(frame_features_path, store_path, dim, flip=False):
    nr = Npyfilereader(frame_features_path)
    nr.validate(store_path)
    video_num = len(nr.npy_paths)
    for i in range(video_num):
        name, contents = nr.read_npys()
        # contents = contents[0::5]
        if flip == True:
            contents = np.flip(contents, axis=0)
        ws = Weightedsum(name, contents, store_path)
        if dim == 0:
            ws.mean_descriptor_gen()
        else:
            ws.ws_descriptor_gen(dim)
예제 #8
0
def calculate_weightedsum_fixed_len(train_data, dim, clip_len, flip=False):
    trans_m = None
    ws_des = []
    for data in train_data:
        if flip == True:
            data = np.flip(data, axis=0)
        ws = Weightedsum(None, data, None)
        if dim == 0:
            ws.mean_descriptor_gen()
        else:
            if trans_m is None:
                trans_m = ws.transformation_matrix_gen(dim, clip_len)
            ws_des.append(
                ws.ws_descriptor_gen(dim, save=False, trans_matrix=trans_m))
    return np.array(ws_des)
예제 #9
0
def ws_flows(flow1_path, flow2_path, save_path1, save_path2, dim):
    # flow 1
    nr1 = Npyfilereader(flow1_path)
    nr1.validate(save_path1)
    # flow 2
    nr2 = Npyfilereader(flow2_path)
    nr2.validate(save_path2)

    video_num = len(nr1.npy_paths)
    for i in range(video_num):
        name1, contents1 = nr1.read_npys()
        name2, contents2 = nr2.read_npys()
        ws1 = Weightedsum(name1, contents1, save_path1)
        ws2 = Weightedsum(name2, contents2, save_path2)
        if dim == 0:
            ws1.mean_descriptor_gen()
            ws2.mean_descriptor_gen()
        else:
            trans_m = ws1.transformation_matrix_gen(dim, ws1.frame_features.shape[0])
            ws1.ws_descriptor_gen(dim, trans_matrix=trans_m)
            ws2.ws_descriptor_gen(dim, trans_matrix=trans_m)
예제 #10
0
def input_gen_all(rgb_feature_path,
                  u_feature_path,
                  v_feature_path,
                  video_names,
                  video_labels,
                  seed,
                  steps=5,
                  dim=2,
                  train=False):
    video_names = np.array(video_names)
    video_labels = video_labels[:]
    video_names = video_names[:]

    # 1776 the max len video
    rgb_path = [
        os.path.join(rgb_feature_path, n + '.npy') for n in video_names
    ]
    u_path = [os.path.join(u_feature_path, n + '.npy') for n in video_names]
    v_path = [os.path.join(v_feature_path, n + '.npy') for n in video_names]

    # with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor:
    #     all_rgb = list(executor.map(feature_gen, rgb_path, video_names, itertools.repeat(seed, len(rgb_path)), itertools.repeat(dim, len(rgb_path))))
    #     all_u = list(executor.map(feature_gen, u_path, video_names, itertools.repeat(seed, len(u_path)), itertools.repeat(dim, len(u_path))))
    #     all_v = list(executor.map(feature_gen, v_path, video_names, itertools.repeat(seed, len(v_path)), itertools.repeat(dim, len(v_path))))

    all_rgb, all_u, all_v = list(), list(), list()
    for rp, up, vp, name, l in zip(rgb_path, u_path, v_path, video_names,
                                   video_labels):
        rgb = np.load(rp)
        u = np.load(up)
        v = np.load(vp)

        if len(seed.shape) == 1:
            tran_m_rgb = Weightedsum('tran_m', [],
                                     None).transformation_matrix_gen_norm(
                                         dims, len(rgb), seed)
            tran_m_rgb /= len(rgb)
            tran_m_rgb = preprocessing.normalize(tran_m_rgb, axis=1, norm='l2')
            # tran_m_flow = Weightedsum('tran_m', [], None).transformation_matrix_gen_norm(dims, len(u), seed)
            # tran_m_flow = preprocessing.normalize(tran_m_flow, axis=1, norm='l2')
        else:
            tran_m_rgb = seed
            # tran_m_flow = seed

        # m = np.min(rgb)
        # rgb += -m
        # # rgb = rgb / np.max(rgb)
        # m = np.min(u)
        # u += -m
        # # u = u / np.max(u)
        # m = np.min(v)
        # v += -m
        # # v = v / np.max(v)

        # select_frame = np.load(sf)
        # if select_frame.size != 0:
        #     select_frame[0].sort()
        #     if select_frame[0][-1] == len(rgb) + 1:
        #         select_frame[0] = select_frame[0][:-1]
        #     rgb = rgb[np.array(select_frame[1]) - 1]
        #     u = u[np.array(select_frame[1]) - 1]
        #     v = v[np.array(select_frame[2]) - 1]

        # while len(u) < tran_m.shape[-1]:
        #     rgb = np.concatenate((rgb, rgb))
        #     u = np.concatenate((u, u))
        #     v = np.concatenate((v, v))
        # rgb = rgb[:tran_m.shape[-1]]
        # u = u[:tran_m.shape[-1]]
        # v = v[:tran_m.shape[-1]]

        # # Time Varying Mean Vectors
        # copy_rgb = rgb.copy()
        # copy_u = u.copy()
        # copy_v = v.copy()
        # for r in range(len(rgb)):
        #     rgb[r] = np.sum(copy_rgb[:r + 1, :], axis=0) / (r + 1)
        #     rgb[r] = rgb[r] / np.linalg.norm(rgb[r])
        #     u[r] = np.sum(copy_u[:r + 1, :], axis=0) / (r + 1)
        #     u[r] = u[r] / np.linalg.norm(u[r])
        #     v[r] = np.sum(copy_v[:r + 1, :], axis=0) / (r + 1)
        #     v[r] = v[r] / np.linalg.norm(v[r])

        # tran_m_rgb = Weightedsum(None, None, None).attension_weights_gen(dims, len(rgb), seed)
        # tran_m_flow = Weightedsum(None, None, None).attension_weights_gen(dims, len(u), seed)

        _rgb = Weightedsum(name, rgb[:],
                           None).ws_descriptor_gen(dim, False, tran_m_rgb)
        _u = Weightedsum(name, u[:],
                         None).ws_descriptor_gen(dim, False, tran_m_rgb)
        _v = Weightedsum(name, v[:],
                         None).ws_descriptor_gen(dim, False, tran_m_rgb)

        # # find x for Ax=0
        # _rgb = np.linalg.solve(np.transpose(_rgb), np.zeros(len(_rgb)))
        # _u = np.linalg.solve(np.transpose(_u), np.zeros(len(_u)))
        # _v = np.linalg.solve(np.transpose(_v), np.zeros(len(_v)))

        # # length of mapping on subspace defined by tran_m
        # _rgb = np.matmul(_rgb, tran_m)
        # _u = np.matmul(_u, tran_m)
        # _v = np.matmul(_v, tran_m)
        #
        # _rgb = Weightedsum(name, np.transpose(_rgb)[:], None).ws_descriptor_gen(dim, False, tran_m1)
        # _u = Weightedsum(name, np.transpose(_u)[:], None).ws_descriptor_gen(dim, False, tran_m1)
        # _v = Weightedsum(name, np.transpose(_v), None).ws_descriptor_gen(dim, False, tran_m1)

        all_rgb.append(_rgb)
        all_u.append(_u)
        all_v.append(_v)

    return all_rgb, all_u, all_v, video_labels
예제 #11
0
def input_gen(rgb_feature_path,
              u_feature_path,
              v_feature_path,
              video_names,
              video_labels,
              ws_feature_path,
              train=True,
              steps=5,
              dim=2):
    if not os.path.exists(ws_feature_path):
        # remove_dirctories(ws_feature_path)
        os.mkdir(ws_feature_path)

    writer = tf.python_io.TFRecordWriter(
        os.path.join(ws_feature_path, 'rgb_flow_labels.tfrecord'))
    rgb_path = [
        os.path.join(rgb_feature_path, n + '.npy') for n in video_names
    ][:]
    u_path = [os.path.join(u_feature_path, n + '.npy') for n in video_names][:]
    v_path = [os.path.join(v_feature_path, n + '.npy') for n in video_names][:]

    for rp, up, vp, name, l in zip(rgb_path, u_path, v_path, video_names[:],
                                   video_labels[:]):
        rgb = np.load(rp)
        u = np.load(up)
        v = np.load(vp)
        if train is False:
            selected_frame_nums = np.random.randint(
                steps,
                len(rgb) - steps,
                size=num_samples_per_testing_video,
                dtype=np.int32)
        else:
            selected_frame_nums = np.random.randint(
                steps,
                len(rgb) - steps,
                size=num_samples_per_training_video,
                dtype=np.int32)
        for index in selected_frame_nums:
            if steps == 0:
                _rgb = rgb[index]
                _flow = []
                for i in range(len(u)):
                    _flow.append(u[i])
                    _flow.append(v[i])
            else:
                _rgb = rgb[index - steps:index + steps]
                _flow = []
                for i in range(index - steps, index + steps):
                    _flow.append(u[i])
                    _flow.append(v[i])
            # _rgb = np.transpose(np.reshape(rgb[index], newshape=(1, len(rgb[index]), 1)))
            # _rgb = Weightedsum(name, _rgb, None).ws_descriptor_gen(dim, False, None)
            # _u = u[index - steps: index + steps]
            # _v = v[index - steps: index + steps]
            _flow = Weightedsum(name, np.array(_flow),
                                None).ws_descriptor_gen(dim, False, None)
            # _u = Weightedsum(name, _u, None).ws_descriptor_gen(dim, False, None)
            # _v = Weightedsum(name, _v, None).ws_descriptor_gen(dim, False, None)
            # t = preprocessing.normalize(np.concatenate((_rgb, _flow), axis=0), axis=1)
            # _rgb = t[:2048]
            # _flow = t[2048:]
            _rgb = preprocessing.normalize(np.reshape(_rgb,
                                                      newshape=(2048, 1)),
                                           axis=0)
            _flow = preprocessing.normalize(_flow, axis=0)

            feature = {
                'rgb': _bytes_feature(_rgb.astype(np.float32).tobytes()),
                'flow': _bytes_feature(_flow.astype(np.float32).tobytes()),
                'labels': _int64_feature(l)
            }
            # print(feature)
            # Create an example protocol buffer
            example = tf.train.Example(features=tf.train.Features(
                feature=feature))
            # Serialize to string and write on the file
            writer.write(example.SerializeToString())
    writer.close()
    return len(rgb_path)
예제 #12
0
def input_gen_all_level(rgb_feature_path,
                        u_feature_path,
                        v_feature_path,
                        selected_frames_path,
                        video_names,
                        video_labels,
                        seed,
                        steps=5,
                        dim=2,
                        train=False):
    video_names = np.array(video_names)
    video_labels = video_labels[:num_train_data]
    video_names = video_names[:num_train_data]

    # video_labels = video_labels[num_train]
    # video_names = video_names[num_train]
    # 1776 the max len video
    rgb_path = [
        os.path.join(rgb_feature_path, n + '.npy') for n in video_names
    ]
    u_path = [os.path.join(u_feature_path, n + '.npy') for n in video_names]
    v_path = [os.path.join(v_feature_path, n + '.npy') for n in video_names]
    selected_frames = [
        os.path.join(selected_frames_path, n + '.mat') for n in video_names
    ]
    all_rgb = []
    all_u = []
    all_v = []
    all_label = []

    conv_depth = 1
    first_c = 4
    second_c = 4

    for rp, up, vp, sf, name, l in zip(rgb_path, u_path, v_path,
                                       selected_frames, video_names,
                                       video_labels):
        rgb = np.load(rp)
        u = np.load(up)
        v = np.load(vp)

        for i in range(conv_depth):
            rgb = np.array_split(rgb, steps)
            u = np.array_split(u, steps)
            v = np.array_split(v, steps)

        rgb = np.array(rgb)
        u = np.array(u)
        v = np.array(v)

        rgb_temp = []
        u_temp = []
        v_temp = []

        for _r, _u, _v in zip(rgb, u, v):
            if len(seed.shape) == 1:
                tran_m_rgb = Weightedsum('tran_m', [],
                                         None).transformation_matrix_gen_norm(
                                             dims, len(_r), seed)
                tran_m_rgb = preprocessing.normalize(tran_m_rgb,
                                                     axis=1,
                                                     norm='l2')
                tran_m_flow = Weightedsum('tran_m', [],
                                          None).transformation_matrix_gen_norm(
                                              dims, len(_u), seed)
                tran_m_flow = preprocessing.normalize(tran_m_flow,
                                                      axis=1,
                                                      norm='l2')
            else:
                tran_m_rgb = seed
                tran_m_flow = seed

            _rgb = Weightedsum(name, _r,
                               None).ws_descriptor_gen(dim, False,
                                                       tran_m_rgb[:first_c])
            _u = Weightedsum(name, _u,
                             None).ws_descriptor_gen(dim, False,
                                                     tran_m_flow[:first_c])
            _v = Weightedsum(name, _v,
                             None).ws_descriptor_gen(dim, False,
                                                     tran_m_flow[:first_c])

            rgb_temp.append(_rgb)
            u_temp.append(_u)
            v_temp.append(_v)

        rgb = np.array(rgb_temp)
        u = np.array(u_temp)
        v = np.array(v_temp)

        rgb_temp = []
        u_temp = []
        v_temp = []

        for i in range(np.shape(rgb)[-1]):
            if len(seed.shape) == 1:
                tran_m_rgb = Weightedsum('tran_m', [],
                                         None).transformation_matrix_gen_norm(
                                             dims, steps, seed)
                tran_m_rgb = preprocessing.normalize(tran_m_rgb,
                                                     axis=1,
                                                     norm='l2')
                tran_m_flow = Weightedsum('tran_m', [],
                                          None).transformation_matrix_gen_norm(
                                              dims, steps, seed)
                tran_m_flow = preprocessing.normalize(tran_m_flow,
                                                      axis=1,
                                                      norm='l2')
            else:
                tran_m_rgb = seed
                tran_m_flow = seed

            _rgb = Weightedsum(name, rgb[:, :, i],
                               None).ws_descriptor_gen(dim, False,
                                                       tran_m_rgb[:second_c])
            _u = Weightedsum(name, u[:, :, i],
                             None).ws_descriptor_gen(dim, False,
                                                     tran_m_flow[:second_c])
            _v = Weightedsum(name, v[:, :, i],
                             None).ws_descriptor_gen(dim, False,
                                                     tran_m_flow[:second_c])

            rgb_temp.append(_rgb)
            u_temp.append(_u)
            v_temp.append(_v)

        rgb = np.array(rgb_temp)
        u = np.array(u_temp)
        v = np.array(v_temp)

        rgb = np.swapaxes(rgb, 1, 0)
        u = np.swapaxes(u, 1, 0)
        v = np.swapaxes(v, 1, 0)

        rgb = np.reshape(
            rgb, [np.shape(rgb)[0],
                  np.shape(rgb)[1] * np.shape(rgb)[-1]])
        u = np.reshape(u, [np.shape(u)[0], np.shape(u)[1] * np.shape(u)[-1]])
        v = np.reshape(v, [np.shape(v)[0], np.shape(v)[1] * np.shape(v)[-1]])

        all_rgb.append(rgb)
        all_u.append(u)
        all_v.append(v)
        all_label.append(l)

    return all_rgb, all_u, all_v, all_label
예제 #13
0
def input_gen_all(rgb_feature_path,
                  u_feature_path,
                  v_feature_path,
                  selected_frames_path,
                  video_names,
                  video_labels,
                  seed,
                  steps=5,
                  dim=2,
                  train=False):
    video_names = np.array(video_names)
    video_labels = video_labels[:num_train_data]
    video_names = video_names[:num_train_data]

    # video_labels = video_labels[num_train]
    # video_names = video_names[num_train]
    # 1776 the max len video
    rgb_path = [
        os.path.join(rgb_feature_path, n + '.npy') for n in video_names
    ]
    u_path = [os.path.join(u_feature_path, n + '.npy') for n in video_names]
    v_path = [os.path.join(v_feature_path, n + '.npy') for n in video_names]
    selected_frames = [
        os.path.join(selected_frames_path, n + '.mat') for n in video_names
    ]
    all_rgb = []
    all_u = []
    all_v = []
    all_label = []

    for rp, up, vp, sf, name, l in zip(rgb_path, u_path, v_path,
                                       selected_frames, video_names,
                                       video_labels):
        rgb = np.load(rp)
        u = np.load(up)
        v = np.load(vp)

        rgb = gen_subvideo(rgb, int(steps))
        u = gen_subvideo(u, steps)
        v = gen_subvideo(v, steps)

        if len(seed.shape) == 1:
            tran_m_rgb = Weightedsum('tran_m', [],
                                     None).transformation_matrix_gen_norm(
                                         dims, len(rgb), seed)
            tran_m_rgb = preprocessing.normalize(tran_m_rgb, axis=1, norm='l2')
            tran_m_flow = Weightedsum('tran_m', [],
                                      None).transformation_matrix_gen_norm(
                                          dims, len(u), seed)
            tran_m_flow = preprocessing.normalize(tran_m_flow,
                                                  axis=1,
                                                  norm='l2')
        else:
            tran_m_rgb = seed
            tran_m_flow = seed

        _rgb = Weightedsum(name, rgb[:],
                           None).ws_descriptor_gen(dim, False, tran_m_rgb)
        _u = Weightedsum(name, u[:],
                         None).ws_descriptor_gen(dim, False, tran_m_flow)
        _v = Weightedsum(name, v[:],
                         None).ws_descriptor_gen(dim, False, tran_m_flow)

        all_rgb.append(_rgb)
        all_u.append(_u)
        all_v.append(_v)
        all_label.append(l)

    return np.array(all_rgb), np.array(all_u), np.array(all_v), np.array(
        all_label)
예제 #14
0
def main(rgb_path_list, u_path_list, v_path_list, train_sp, test_sp, train_test_splits, dim, tMatrices_list,
         dataset='ucf'):
    # generate train test splits for different datasets.
    if dataset == 'hmdb':
        tts = TrainTestSampleGen(ucf_path='', hmdb_path=train_test_splits)
    else:
        tts = TrainTestSampleGen(ucf_path=train_test_splits, hmdb_path='')

    # there are 3 train test splits for each of ucf101 and hmdb51
    encoder = preprocessing.LabelEncoder()
    for split_num in range(1):
        # for test purpose
        # train_videos = tts.train_data_label[split_num]['data'][:10]
        # train_labels = encoder.fit_transform(tts.train_data_label[split_num]['label'][:10])
        # test_videos = tts.test_data_label[split_num]['data'][:10]
        # test_labels = encoder.fit_transform(tts.test_data_label[split_num]['label'][:10])

        train_videos = tts.train_data_label[split_num]['data']
        train_labels = encoder.fit_transform(tts.train_data_label[split_num]['label'])
        test_videos = tts.test_data_label[split_num]['data']
        test_labels = encoder.fit_transform(tts.test_data_label[split_num]['label'])

        # generate and write video descriptor for training videos
        # create tf_record writer
        writer = tf.python_io.TFRecordWriter(os.path.join(train_sp, "rgb_flow_labels.tfrecord"))
        for video, label in zip(train_videos, train_labels):
            # rgb, flow u and flow v features containers.
            features = []
            # each video have multiple frame features which are generated by random crop the frames.
            for tm in tMatrices_list:
                seed = np.random.randint(1, 100)
                for rgb_p, u_p, v_p in zip(rgb_path_list, u_path_list, v_path_list):
                    rgb_ff = np.load(os.path.join(rgb_p, video + '.npy'))
                    u_ff = np.load(os.path.join(u_p, video + '.npy'))
                    v_ff = np.load(os.path.join(v_p, video + '.npy'))
                    tran_m_rgb = Weightedsum(None, None, None).attension_weights_gen(dims, len(rgb_ff), seed)
                    tran_m_flow = Weightedsum(None, None, None).attension_weights_gen(dims, len(u_ff), seed)
                    f = np.stack(
                        (
                            preprocessing.normalize(
                                Weightedsum(video, rgb_ff, None).ws_descriptor_gen(dim, False, tran_m_rgb), axis=0, norm='max'),
                            preprocessing.normalize(
                                Weightedsum(video, u_ff, None).ws_descriptor_gen(dim, False, tran_m_flow), axis=0, norm='max'),
                            preprocessing.normalize(
                                Weightedsum(video, v_ff, None).ws_descriptor_gen(dim, False, tran_m_flow), axis=0, norm='max')
                        ), axis=0
                    )
                    # dims*2048*3
                    features.append(np.swapaxes(f, 0, -1))

            features = np.reshape(features, [len(rgb_path_list) * len(tMatrices_list) * dim, 2048, 3])
            feature = {
                'features': _bytes_feature(features.astype(np.float32).tobytes()),
                'label': _int64_feature(label)
            }
            # write the video descriptors for current video
            # create an example protocol buffer
            example = tf.train.Example(features=tf.train.Features(feature=feature))
            # Serialize to string and write on the file
            writer.write(example.SerializeToString())
        writer.close()
        print("Training data generation done!")

        # generate and write video descriptor for test videos
        # create tf_record writer
        writer = tf.python_io.TFRecordWriter(os.path.join(test_sp, "rgb_flow_labels.tfrecord"))
        for video, label in zip(test_videos, test_labels):
            # rgb, flow u and flow v features containers.
            features = []
            # each video have multiple frame features which are generated by random crop the frames.
            for tm in tMatrices_list:
                seed = np.random.randint(1, 100)
                for rgb_p, u_p, v_p in zip(rgb_path_list, u_path_list, v_path_list):
                    rgb_ff = np.load(os.path.join(rgb_p, video + '.npy'))
                    u_ff = np.load(os.path.join(u_p, video + '.npy'))
                    v_ff = np.load(os.path.join(v_p, video + '.npy'))
                    tran_m_rgb = Weightedsum(None, None, None).attension_weights_gen(dims, len(rgb_ff), seed)
                    tran_m_flow = Weightedsum(None, None, None).attension_weights_gen(dims, len(u_ff), seed)
                    f = np.stack(
                        (
                            preprocessing.normalize(
                                Weightedsum(video, rgb_ff, None).ws_descriptor_gen(dim, False, tran_m_rgb), axis=0,
                                norm='max'),
                            preprocessing.normalize(
                                Weightedsum(video, u_ff, None).ws_descriptor_gen(dim, False, tran_m_flow), axis=0,
                                norm='max'),
                            preprocessing.normalize(
                                Weightedsum(video, v_ff, None).ws_descriptor_gen(dim, False, tran_m_flow), axis=0,
                                norm='max')
                        ), axis=0
                    )
                    # dims*2048*3
                    features.append(np.swapaxes(f, 0, -1))

            features = np.reshape(features, [len(rgb_path_list) * len(tMatrices_list) * dim, 2048, 3])
            feature = {
                'features': _bytes_feature(features.astype(np.float32).tobytes()),
                'label': _int64_feature(label)
            }
            # write the video descriptors for current video
            # create an example protocol buffer
            example = tf.train.Example(features=tf.train.Features(feature=feature))
            # Serialize to string and write on the file
            writer.write(example.SerializeToString())
        writer.close()
        print("Testing data generation done!")
예제 #15
0
    #                    ucf_resNet_crop_flow_ws_save_path_2_v2])

    rgb_feature_list = [[ucf_resNet_crop_save_path_v1, ucf_resNet_crop_save_path_v3, ucf_resNet_crop_save_path_v4][0]]
    u_feature_list = [[ucf_resNet_flow_crop_save_path_1_v1, ucf_resNet_flow_crop_save_path_1_v3,
                      ucf_resNet_flow_crop_save_path_1_v4][0]]
    v_feature_list = [[ucf_resNet_flow_crop_save_path_2_v1, ucf_resNet_flow_crop_save_path_2_v3,
                      ucf_resNet_flow_crop_save_path_2_v4][0]]

    best_acc = 0
    acc_list = []
    for i in range(1):
        # the max length without selection flow is 1776, with selection flow is 1150
        seed_1 = np.random.randint(1, 100)
        seed_2 = np.random.randint(1, 100)
        print("The seed 1 for trans_matrix is:", seed_1, "The seed 2 for trans_matrix is:", seed_2)
        tran_m_1 = Weightedsum('tran_m', [], None).transformation_matrix_gen(dims, 1776, seed_1)
        tran_m_1_norm = preprocessing.normalize(tran_m_1, axis=1)
        tran_m_2 = Weightedsum('tran_m', [], None).transformation_matrix_gen(dims, 1776, seed_2)
        tran_m_2_norm = preprocessing.normalize(tran_m_2, axis=1)

        tMatrices = [tran_m_1_norm, tran_m_2_norm]

        main(rgb_feature_list, u_feature_list, v_feature_list, train_tfRecord_save_path, test_tfRecord_save_path,
             ucf_train_test_splits_save_path, dims, tMatrices)
        accuracy = classify(
            os.path.join(train_tfRecord_save_path, "rgb_flow_labels.tfrecord"),
            os.path.join(test_tfRecord_save_path, "rgb_flow_labels.tfrecord"),
            num_train_data * num_samples_per_training_video,
            num_test_data * num_samples_per_testing_video,
            num_samples_per_testing_video,
            (len(rgb_feature_list)*len(tMatrices)*dims, 2048, 3), dims)