Exemplo n.º 1
0
def re_rank_with_embeddings(result_list, embeddings_matrix,
                            figure_identifiers):
    """Re-rank an initial result list using embedding-based similarity.

    Args:
      result_list: (dictionary) the result list to be re-ranked.
      embeddings_matrix: (numpy array) the embedding matrix for re-ranking.
      figure_identifiers: (list) the identifiers of the figures in the embedding matrix.

    Returns:
      (Dictionary). A re-ranked result list.
    """
    re_ranker = KnnSearcher(embeddings_matrix, figure_identifiers, 0)
    re_ranked_result_list = re_ranker.re_rank_result_list(result_list)
    re_ranked_result_list = utils.min_max_norm(re_ranked_result_list)
    initial_result_list = utils.min_max_norm(result_list)

    final_result_list = {}
    for figure_test_id in initial_result_list:
        single_result_list = {}

        for res_id in initial_result_list[figure_test_id]:
            single_result_list[res_id] = initial_result_list[figure_test_id][res_id] + \
                                         re_ranked_result_list[figure_test_id][res_id]

        sorted_result = [(k, single_result_list[k]) for k in sorted(
            single_result_list, key=single_result_list.get, reverse=True)]
        final_result_list[figure_test_id] = sorted_result
    return final_result_list
    def reprocess(self, batch, cut_list):
        ids = [batch[ind]["id"] for ind in cut_list]
        texts = [batch[ind]["text"] for ind in cut_list]
        mel_targets = [batch[ind]["mel_target"] for ind in cut_list]
        Ds = [batch[ind]["D"] for ind in cut_list]
        f0s = [
            min_max_norm(batch[ind]["f0"],
                         min_val=hparams.f0_min,
                         max_val=hparams.f0_max) for ind in cut_list
        ]
        energies = [
            min_max_norm(batch[ind]["energy"],
                         min_val=hparams.energy_min,
                         max_val=hparams.energy_max) for ind in cut_list
        ]
        for text, D, id_ in zip(texts, Ds, ids):
            if len(text) != len(D):
                print('the dimension of text and duration should be the same')
                print('text: ', sequence_to_text(text))
                print(text, text.shape, D, D.shape, id_)
        length_text = np.array(list())
        for text in texts:
            length_text = np.append(length_text, text.shape[0])

        length_mel = np.array(list())
        for mel in mel_targets:
            length_mel = np.append(length_mel, mel.shape[0])

        texts = pad_1D(texts)
        Ds = pad_1D(Ds)
        mel_targets = pad_2D(mel_targets)
        f0s = pad_1D(f0s)
        energies = pad_1D(energies)
        log_Ds = np.log(Ds + hparams.log_offset)

        out = {
            "id": ids,
            "text": texts,
            "mel_target": mel_targets,
            "D": Ds,
            "log_D": log_Ds,
            "f0": f0s,
            "energy": energies,
            "src_len": length_text,
            "mel_len": length_mel
        }

        return out
Exemplo n.º 3
0
def parent_key_from_parent_graph(df, key_list, g_parent, g_train, time):
    """
    calculate set of top-k keywords from parent graph based on centrality measures
    :param df: dataset in dataframe
    :param key_list: keyword list with integer ids
    :param g_parent: parent graph (previous year of training period)
    :param g_train: train graph (not in use here currently)
    :param time: time information
    :return: calculated top-k keyword set
    """
    list_range = time[5]
    parent_node_feature = build_feature_set(df, key_list, g_parent, "parent")

    parent_node_feature['degree'] = parent_node_feature.apply(lambda row: len(g_parent[row['node_index']]), axis=1)
    parent_node_feature['degree'] = ut.min_max_norm(parent_node_feature['degree'])

    parent_aut = parent_node_feature.sort_values('term_aut', ascending=False)
    parent_aut = parent_aut.reset_index()
    parent_aut = set(parent_aut['node_index'][0:list_range])

    parent_art = parent_node_feature.sort_values('term_art', ascending=False)
    parent_art = parent_art.reset_index()
    parent_art = set(parent_art['node_index'][0:list_range])

    parent_deg = parent_node_feature.sort_values('degree', ascending=False)
    parent_deg = parent_deg.reset_index()
    parent_deg = set(parent_deg['node_index'][0:list_range])

    return parent_aut, parent_art, parent_deg
Exemplo n.º 4
0
    def calc_init_score(self):
        mean = np.average(self.x_train, axis=0)
        std = np.std(self.x_train, axis=0)
        zero_ids = np.where(std == 0)[0]
        if len(zero_ids) > 0:
            logger.warning("! x_train with feature(s) having zero std.")
            std[zero_ids] = 1

        init_score = np.array([np.average(((x - mean) / std) ** 2) for x in self.x_train])
        # init_score = np.array([np.average((x - mean) / std) for x in self.x_train])
        init_score = utils.min_max_norm(init_score)
        return init_score
Exemplo n.º 5
0
    if gpu_id != -1:
        model = model.cuda(gpu_id)

    model.load_state_dict(torch.load(gcn_model_path))
    model.eval()
    vid2ans = {}
    for step, data in enumerate(train_loader):
        (feat, adj, labeled_index_in_the_graph,
         labeled_index), pred, vid = data
        feat, adj, pred = Variable(feat), Variable(adj), Variable(pred)

        if gpu_id != -1:
            feat = feat.cuda(gpu_id)
            adj = adj.cuda(gpu_id)
            pred = pred.cuda(gpu_id)

        output = model(feat, adj).data.cpu().numpy().flatten()
        labeled_index_in_the_graph = np.array(
            labeled_index_in_the_graph).flatten()
        labeled_index = np.array(labeled_index).flatten()
        sample_index = get_sample_index(labeled_index, pred)
        new_pred = pred.data.cpu().numpy().flatten().copy()
        if "Normal" not in vid[0]:
            new_pred[sample_index] = output
        vid2ans[vid[0]] = min_max_norm(new_pred)

    for v in vid2ans:
        output = vid2ans[v]
        output_txt = path.join(output_folder, "%s.txt" % v)
        np.savetxt(output_txt, output, fmt='%.18f', delimiter='\n')
        print "Done: %s" % v
Exemplo n.º 6
0
    def _preprocess(paths, save_path, length, img_size, segm_part_colors):
        video_path, depth_path, segm_path, info_path = paths
        for p in paths:
            if not p.exists():
              print('Sample Not found, skipped. {}'.format(p.parents[0]))
              return
        
        # read all videos
        color_video = dataio.read_video(video_path)
        depth_video = dataio.read_depth_mat(depth_path)
        segm_video = dataio.read_segm_mat(segm_path)
        joints = dataio.read_joints_mat(info_path) # (n_frames, n_points, 2)

        # output path
        id_string = video_path.name[0:-4] # without extension
        name = "{}_{}".format(video_path.parents[1].name, id_string)

        # compute mean center points of human bbox from joints to crop video frames
        # TODO: make pose video
        center_points = []
        for f_joints in joints:
            bottom_left = np.amin(f_joints, axis=0)
            top_right = np.amax(f_joints, axis=0)
            center = (top_right - bottom_left) / 2.0
            center_points.append(center)
        center_point = np.array(center_points).mean(axis=0)
        
        T, H, W, C = color_video.shape
        cx = center_point[0]
        if cx + H > W:
            lx = W - H
        elif cx - H < 0:
            lx = 0
        else:
            lx = cx - H//2

        ### color
        # crop
        color_video = color_video[:, :, lx:lx+H]

        #resize
        color_video = [imresize(img, (img_size, img_size)) for img in color_video]
        color_video = np.stack(color_video)

        # save
        dataio.save_video_as_images(color_video, save_path/name/'color')
        dataio.write_video(color_video, save_path/'color'/(name+".mp4"))
        
        ### depth
        # crop
        depth_video_np  = depth_video[:, :, lx:lx+H]
        depth_video_vis = depth_video[:, :, lx:lx+H]
        
        # resize
        depth_video = [imresize(img, (img_size, img_size), 'nearest') for img in depth_video]
        depth_video = np.stack(depth_video)

        # limit value range of depth video
        fg_values = depth_video[depth_video!=10000000000.0]
        depth_video_vis = np.clip(depth_video, fg_values.min(), fg_values.max())
        depth_video_vis = utils.min_max_norm(depth_video) * 255.
        depth_video_vis = depth_video_vis.astype(np.uint8)
        depth_video_np = np.clip(depth_video, 0, 15.0)

        # save
        np.save(save_path/name/'depth', depth_video_np)
        dataio.write_video(depth_video_vis, save_path/'depth'/(name+".mp4"))

        ### semantic segmentation
        # crop
        segm_video  = segm_video[:, :, lx:lx+H]

        # resize
        segm_video = [imresize(img, (img_size, img_size), 'nearest') for img in segm_video]
        segm_video = np.stack(segm_video)

        # give region color to segmentation video
        T, H, W = segm_video.shape
        N = len(segm_part_colors)
        segm_video_vis = np.zeros((T, H, W, 3), dtype=np.uint8)
        for i in range(N):
            indices = segm_video == i
            segm_video_vis[indices] = segm_part_colors[i]

        segm_video_np = np.eye(N)[segm_video]
        np.save(save_path/name/'segm', segm_video_np)
        dataio.write_video(segm_video_vis, save_path/'segm'/(name+".mp4"))

        return [name, T]
Exemplo n.º 7
0
def dynamic_graph_feature_set(df, key_list, train_data, g_parent, g_train, g_train_static, time):
    """
    calculates node feature dict and train data dict (feature set for non-connected node pairs) for training period
    :param df: dataset in dataframe
    :param key_list: keyword list with integer ids
    :param train_data: train data dict (empty feature set for non-connected node pairs)
    :param g_parent: parent graph
    :param g_train: dynamic training graph- dict of training graphs
    :param g_train_static: static training graph
    :param time: time information
    :return: calculated dict of node features and train data
    """
    nt_score = [25,5,3,1,0]
    ts_train = time[1]
    ts_test = time[2]
    it_index = time[4]
    list_range = time[5]
    node_feature = {}
    year_score = {}
    x1 = range(ts_test - ts_train + 1)
    year_in = np.power(x1, 2)
    max_year_weight = sum(range(1, ts_test - ts_train + 1, 1))
    parent_keys_aut, parent_keys_art, parent_keys_deg = parent_key_from_parent_graph(df, key_list, g_parent, g_train,
                                                                                     time)



    for t in range(ts_train, ts_test, it_index):
        ########### node feature dataframe #############################################################################
        node_feature[t] = build_feature_set(df, key_list, g_train[t], "train")
        node_feature[t]['degree'] = node_feature[t].apply(lambda row:
                                                          len(g_train[t][row['node_index']]), axis=1)
        node_feature[t]['citation'] = node_feature[t].apply(lambda row: feature_citation(g_parent,
                                                                                         g_train, row, t, ts_train),
                                                            axis=1)
        year_score[t] = year_in[t - ts_train + 1]

        ######------calculate set of grapndparents, parents, children, guest--------------------------------------------
        p1_aut = parent_keys_aut.intersection(set(g_train[t].nodes()))
        p2_aut, ch_aut, guest_aut, inc_aut = feature_node_type(p1_aut, g_train[t])
        # print(len(p1),len(p2),len(ch),len(guest))
        # parent_keys = p1.union(p2)
        parent_keys_aut = node_feature[t].sort_values('term_aut', ascending=False)
        parent_keys_aut = parent_keys_aut.reset_index()
        parent_keys_aut = set(parent_keys_aut['node_index'][0:list_range])

        p1_art = parent_keys_art.intersection(set(g_train[t].nodes()))
        p2_art, ch_art, guest_art, inc_art = feature_node_type(p1_art, g_train[t])
        # parent_keys_art = p1_art.union(p2_art)
        parent_keys_art = node_feature[t].sort_values('term_art', ascending=False)
        parent_keys_art = parent_keys_art.reset_index()
        parent_keys_art = set(parent_keys_art['node_index'][0:list_range])

        p1_deg = parent_keys_deg.intersection(set(g_train[t].nodes()))
        p2_deg, ch_deg, guest_deg, inc_deg = feature_node_type(p1_deg, g_train[t])
        # parent_keys_deg = p1_deg.union(p2_deg)
        parent_keys_deg = node_feature[t].sort_values('degree', ascending=False)
        parent_keys_deg = parent_keys_deg.reset_index()
        parent_keys_deg = set(parent_keys_deg['node_index'][0:list_range])

        #####-----------------------------------------------------------------------------------------------------------
        node_feature[t]['degrees'] = ut.min_max_norm(node_feature[t]['degree'])
        #  partition, d_c = feature_partition(g_train[t], node_feature[t])
        #  node_feature[t]['partition_id'] = node_feature[t].apply(lambda row:
        #                                                          partition[row['node_index']], axis=1)
        #  node_feature[t]['partition_cnt'] = node_feature[t].apply(lambda row:
        #                                                           d_c[partition[row['node_index']]]
        #                                                           [row['node_index']], axis=1)

        # node_feature[t]['y_weight'] = node_feature[t].apply(lambda row:
        #                                                     feature_y_weight(g_train_static,
        #                                                                      row['node_index'],
        #                                                                      t,
        #                                                                      year_score,
        #                                                                      max_year_weight), axis=1)
        # node_feature[t]['node_type_aut'] = node_feature[t].apply(lambda row:
        #                                                          min((nt_score[0] if row['node_index'] in p1_aut
        #                                                          else nt_score[1] if row['node_index'] in p2_aut
        #                                                          else nt_score[2] if row['node_index'] in ch_aut
        #                                                          else nt_score[3] if row['node_index'] in guest_aut
        #                                                          else nt_score[4])+
        #                                                              inc_aut[row['node_index']],25), axis=1)
        #
        # node_feature[t]['node_type_art'] = node_feature[t].apply(lambda row:
        #                                                          min((nt_score[0] if row['node_index'] in p1_art
        #                                                          else nt_score[1] if row['node_index'] in p2_art
        #                                                          else nt_score[2] if row['node_index'] in ch_art
        #                                                          else nt_score[3] if row['node_index'] in guest_art
        #                                                          else nt_score[4])+
        #                                                              inc_art[row['node_index']],25), axis=1)
        # node_feature[t]['node_type_deg'] = node_feature[t].apply(lambda row:
        #                                                          min((nt_score[0] if row['node_index'] in p1_deg
        #                                                          else nt_score[1] if row['node_index'] in p2_deg
        #                                                          else nt_score[2] if row['node_index'] in ch_deg
        #                                                          else nt_score[3] if row['node_index'] in guest_deg
        #                                                          else nt_score[4])+inc_deg[row['node_index']],25), axis=1)

        node_feature[t]['node_type_aut'] = node_feature[t].apply(lambda row:
                                                                 nt_score[0] if row['node_index'] in p1_aut
                                                                else nt_score[1] if row['node_index'] in p2_aut
                                                                 else nt_score[2] if row['node_index'] in ch_aut
                                                                 else nt_score[3] if row['node_index'] in guest_aut
                                                                 else nt_score[4],axis=1)

        node_feature[t]['node_type_art'] = node_feature[t].apply(lambda row:
                                                                 nt_score[0] if row['node_index'] in p1_aut
                                                                 else nt_score[1] if row['node_index'] in p2_aut
                                                                 else nt_score[2] if row['node_index'] in ch_aut
                                                                 else nt_score[3] if row['node_index'] in guest_aut
                                                                 else nt_score[4], axis=1)
        node_feature[t]['node_type_deg'] = node_feature[t].apply(lambda row:
                                                                 nt_score[0] if row['node_index'] in p1_aut
                                                                 else nt_score[1] if row['node_index'] in p2_aut
                                                                 else nt_score[2] if row['node_index'] in ch_aut
                                                                 else nt_score[3] if row['node_index'] in guest_aut
                                                                 else nt_score[4],
                                                                 axis=1)

        ######----------------------------------------------------------------------------------------------------------
        #############################  dataframe non-connected node pair features ######################################
        train_data[t] = train_data_frame_dynamic(train_data[t], node_feature[t], g_train[t])
    return node_feature, train_data