Python preprocessing_data_with_unit_var示例，utils.preprocessing_data_with_unit_var Python示例

示例#1

0

显示文件

文件： Similarity_Graph.py 项目： hansen7/LearnFairMetric_Empirical

def confidence_calibration_check(round_idx, respondent_idx, d_metric_dict,
                                 n_neighbors):
    _, X_test, _, y_test, _ = load_dataset(onehot=True)
    X_test = preprocessing_data_with_unit_var(X_test)
    d_metric = d_metric_dict['R%d_W%d' % (respondent_idx, round_idx)]

    weighted_y_test = pd.DataFrame(
        weighted_prediction_round(round_idx)[respondent_idx - 1].values,
        index=y_test.index,
        columns=['weighted_y_test'])
    weighted_y_test['y_test'] = y_test
    weighted_y_test['neighbor reci prop'] = y_test

    for idx in X_test.index:
        diff = X_test.values - X_test.loc[idx].values
        X_test['distance'] = np.dot(diff @ d_metric, diff.T).diagonal()

        max_d = X_test['distance'].sort_values(
            ascending=True).values[n_neighbors]

        neighbor_list = [
            my_idx
            for my_idx in X_test.loc[X_test['distance'] <= max_d].index.values
            if my_idx != idx
        ]

        reci = 0
        for neighbor in neighbor_list:
            reci += weighted_y_test.loc[neighbor, 'y_test']
        weighted_y_test.loc[neighbor,
                            'neighbor reci prop'] = reci / n_neighbors

        X_test.drop(['distance'], axis=1, inplace=True)

    return weighted_y_test

示例#2

0

显示文件

文件： Similarity_Graph.py 项目： hansen7/LearnFairMetric_Empirical

def similarity_fig_from_metric_by_nca_part(round_idx, respondent_idx,
                                           n_neighbors, d_metric_dict):
    """
    similar as nca, only here the neighborhood is defined as the minimal hypersphere
    that contains the n_neighbors similar points of x_i

    :param round_idx: integer, ranging from 1 to 10
    :param respondent_idx: id of respondents, ranging from the 1 to 35
    :param n_neighbors: the number of similar points to define the neighborhood
    :param d_metric_dict: dict to save learned distance metric, indexed by the
    respondent_id and the round_idx
    :return: a similarity connection graph and the similarity edge list
    """
    _, X_test, _, y_test, _ = load_dataset(onehot=True)
    X_test = preprocessing_data_with_unit_var(X_test)
    d_metric = d_metric_dict['R%d_W%d' % (respondent_idx, round_idx)]
    # weighted_y_test = pd.DataFrame(weighted_prediction(round_idx)[respondent_idx - 1].values,
    #                                index=y_test.index)

    weighted_y_test = pd.DataFrame(weighted_prediction_quantile(
        weighted_prediction_round(round_idx))[respondent_idx - 1].values,
                                   index=y_test.index)

    similarity_edges = []

    for idx in X_test.index:
        diff = X_test.values - X_test.loc[idx].values
        X_test['distance'] = np.dot(diff @ d_metric, diff.T).diagonal()

        same_class = X_test.loc[(
            weighted_y_test == weighted_y_test.loc[idx]).values.ravel()]
        max_d = same_class['distance'].sort_values(ascending=True).values[min(
            len(same_class) - 1, n_neighbors)]

        #print('idx: %5d'%idx, ' max_d: %2.2f.'%max_d)
        neighbor_list = [
            my_idx
            for my_idx in X_test.loc[X_test['distance'] <= max_d].index.values
            if my_idx != idx
        ]
        for item in neighbor_list:
            similarity_edges.append((idx, item))
        X_test.drop(['distance'], axis=1, inplace=True)

    fig, edges = similarity_fig_from_weighted_prediction(
        round_idx=round_idx,
        respondent_idx=respondent_idx,
        similarity_edge_list=similarity_edges)

    return fig, edges

示例#3

0

显示文件

文件： Similarity_Graph.py 项目： hansen7/LearnFairMetric_Empirical

def similarity_fig_from_metric_by_nca(round_idx, respondent_idx,
                                      d_metric_dict):
    """
    nca -> neighborhood component analysis

    First is to define the neighborhood of data point x_i in the transformed space,
    it is the minimal hypersphere which centers at x_i, and its radius is equal to
    the farthest point which is similar with x_i. This function is to build similarity
    edges between x_i and all the data points within its neighborhood.

    :param round_idx: integer, ranging from 1 to 10
    :param respondent_idx: id of respondents, ranging from the 1 to 35
    :param d_metric_dict: dict to save learned distance metric, indexed by the
    respondent_id and the round_idx
    :return: a similarity connection graph and the similarity edge list
    """
    _, X_test, _, y_test, _ = load_dataset(onehot=True)
    X_test = preprocessing_data_with_unit_var(X_test)
    d_metric = d_metric_dict['R%d_W%d' % (respondent_idx, round_idx)]

    weighted_y_test = pd.DataFrame(weighted_prediction_quantile(
        weighted_prediction_round(round_idx))[respondent_idx - 1].values,
                                   index=y_test.index)

    similarity_edges = []

    for idx in X_test.index:
        diff = X_test.values - X_test.loc[idx].values
        X_test['distance'] = np.dot(diff @ d_metric, diff.T).diagonal()
        max_d = X_test.loc[(weighted_y_test == weighted_y_test.loc[idx]
                            ).values.ravel()]['distance'].max()

        #print('idx: %5d' % idx, ' max_d: %2.2f.' % max_d)
        neighbor_list = [
            my_idx
            for my_idx in X_test.loc[X_test['distance'] <= max_d].index.values
            if my_idx != idx
        ]
        for item in neighbor_list:
            similarity_edges.append((idx, item))
        X_test.drop(['distance'], axis=1, inplace=True)
    fig, edges = similarity_fig_from_weighted_prediction(
        round_idx=round_idx,
        respondent_idx=respondent_idx,
        similarity_edge_list=similarity_edges)

    return fig, edges

示例#4

0

显示文件

文件： Similarity_Graph.py 项目： hansen7/LearnFairMetric_Empirical

def cal_distance_with_df(d_metric_dict):
    _, X_test, _, y_test, _ = load_dataset(onehot=True)
    X_test = preprocessing_data_with_unit_var(X_test)

    distance_dict = dict()
    for round_idx in range(1, 11):
        for respondent_idx in range(1, 36):
            distance_df = pd.DataFrame(np.zeros((len(X_test), len(X_test))),
                                       index=X_test.index,
                                       columns=X_test.index)
            d_metric = d_metric_dict['R%d_W%d' % (respondent_idx, round_idx)]

            for idx in X_test.index:
                diff = X_test.values - X_test.loc[idx].values
                distance_df[idx] = np.dot(diff @ d_metric, diff.T).diagonal()

            distance_dict["R%d_W%d" %
                          (respondent_idx, round_idx)] = distance_df
            print(respondent_idx, round_idx)

    np.save(r'../new_data/lmnn_distance.npy', distance_dict)
    return None

示例#5

0

显示文件

文件： Similarity_Graph.py 项目： hansen7/LearnFairMetric_Empirical

def similarity_edges_from_metric_by_knn(round_idx, respondent_idx, n_neighbors,
                                        d_metric_dict):
    _, X_test, _, y_test, _ = load_dataset(onehot=True)
    X_test = preprocessing_data_with_unit_var(X_test)
    d_metric = d_metric_dict['R%d_W%d' % (respondent_idx, round_idx)]

    similarity_edges = {}

    for idx in X_test.index:
        diff = X_test.values - X_test.loc[idx].values
        X_test['distance'] = np.dot(diff @ d_metric, diff.T).diagonal()
        max_d = X_test['distance'].sort_values(
            ascending=True).values[n_neighbors]

        neighbor_list = [
            my_idx
            for my_idx in X_test.loc[X_test['distance'] <= max_d].index.values
            if my_idx != idx
        ]
        similarity_edges[idx] = neighbor_list
        X_test.drop(['distance'], axis=1, inplace=True)

    return similarity_edges

示例#6

0

显示文件

文件： Similarity_Graph.py 项目： hansen7/LearnFairMetric_Empirical

def similarity_fig_from_metric_by_thres(round_idx, respondent_idx,
                                        d_metric_dict, d_thres):
    """
    Basically to build the similarity edges between node_i and node_j
    if there d^2(node_i, node_j) within the transformed feature space is
    less than the threshold value, d_thres.

    :param round_idx: integer, ranging from 1 to 10
    :param respondent_idx: id of respondents, ranging from the 1 to 35
    :param d_metric_dict: dict to save learned distance metric, indexed by the
    respondent_id and the round_idx
    :param d_thres: threshold value to determine whether the similarity
    edges should be built
    :return: a similarity connection graph and the similarity edge list
    """
    _, X_test, _, y_test, _ = load_dataset(onehot=True)
    X_test = preprocessing_data_with_unit_var(X_test)
    d_metric = d_metric_dict['R%d_W%d' % (respondent_idx, round_idx)]

    similarity_edges = []

    for idx in X_test.index:
        diff = X_test.values - X_test.loc[idx].values
        X_test['distance'] = np.dot(diff @ d_metric, diff.T).diagonal()
        neighbor_list = [
            my_idx for my_idx in X_test.loc[
                X_test['distance'] <= d_thres].index.values if my_idx != idx
        ]
        for item in neighbor_list:
            similarity_edges.append((idx, item))
        X_test.drop(['distance'], axis=1, inplace=True)
    fig, edges = similarity_fig_from_weighted_prediction(
        round_idx=round_idx,
        respondent_idx=respondent_idx,
        similarity_edge_list=similarity_edges)

    return fig, edges