示例#1
0
文件: fil_.py 项目: Chen-Cai-OSU/Esme
def edgefeat(g, norm=False, fil='ricci'):
    """
    wrapper for edge_probability and ricciCurvature computation
    :param g: graph
    :param fil:  edge_p/ricci/jaccard
    :param whether normalize edge values or not
    :return: gp, a dense numpy array of shape (n_node, n_node)
    """
    g = nx.convert_node_labels_to_integers(g)
    assert nx.is_connected(g)
    adj_m = nx.adj_matrix(g).todense()  # dense matrix
    gp = np.zeros((len(g), len(g)))
    try:
        if fil == 'edge_p':
            gp = np.array(smoother(adj_m, h=0.3))
            gp = np.multiply(adj_m, gp)
        elif fil == 'ricci':
            g = ricciCurvature(g, alpha=0.5, weight='weight')
            ricci_dict = nx.get_edge_attributes(g, 'ricciCurvature')
            for u, v in ricci_dict.keys():
                gp[u][v] = ricci_dict[(u, v)]
            gp += gp.T
        elif fil == 'jaccard':
            jac_list = nx.jaccard_coefficient(g, g.edges(
            ))  # important since jaccard can also be defined on non edge
            for u, v, jac in jac_list:
                gp[u][v] = jac
            gp += gp.T
    except AssertionError:
        print('Have not implemented fil %s. Treat as all zeros' % fil)
        gp = np.zeros((len(g), len(g)))
    assert (gp == gp.T).all()
    if norm: gp = gp / float(max(abs(gp)))
    return gp
示例#2
0
文件: fil_.py 项目: Chen-Cai-OSU/Esme
def nodefeat(g, fil, norm=False, **kwargs):
    """
    :param g:
    :param fil: deg, cc, random
    :return: node feature (np.array of shape (n_node, 1))
    """
    # g = nx.random_geometric_graph(100, 0.2)
    assert nx.is_connected(g)

    if fil == 'deg':
        nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape(
            len(g), 1)
    elif fil == 'cc':
        nodefeat = np.array(list(nx.closeness_centrality(g).values()))
        nodefeat = nodefeat.reshape(len(g), 1)
    elif fil == 'random':
        nodefeat = np.random.random((len(g), 1))
    elif fil == 'hop':
        base = kwargs['base']
        assert type(base) == int
        length = nx.single_source_dijkstra_path_length(g, base)  # dict #
        nodefeat = [length[i] for i in range(len(g))]
        nodefeat = np.array(nodefeat).reshape(len(g), 1)
    elif fil == 'fiedler':
        nodefeat = fiedler_vector(g, normalized=False)  # np.ndarray
        nodefeat = nodefeat.reshape(len(g), 1)
    elif fil == 'ricci':
        g = ricciCurvature(g, alpha=0.5, weight='weight')
        ricci_dict = nx.get_node_attributes(g, 'ricciCurvature')
        ricci_list = [ricci_dict[i] for i in range(len(g))]
        nodefeat = np.array(ricci_list).reshape((len(g), 1))
    else:
        raise Exception('No such filtration: %s' % fil)
    assert nodefeat.shape == (len(g), 1)

    # normalize
    if norm: nodefeat = nodefeat / float(max(abs(nodefeat)))
    return nodefeat
示例#3
0
def function_basis(g,
                   allowed,
                   norm_flag='no',
                   recomputation_flag=False,
                   transformation_flag=True):
    """

    :param g: nx graph
    :param allowed: filtration type, allowed = ['ricci', 'deg', 'hop', 'cc', 'fiedler']
    :param norm_flag: normalization flag
    :param recomputation_flag:
    :param transformation_flag: if apply linear/nonlinear transformation of filtration function
    :return: g with ricci, deg, hop, cc, fiedler computed
    """

    # to save recomputation. Look at the existing feature at first and then simply compute the new one.
    assert nx.is_connected(g)
    if len(g) < 3: return
    existing_features = [g.node[list(g.nodes())[0]].keys()]

    if not recomputation_flag:
        allowed = [
            feature for feature in allowed if feature not in existing_features
        ]
    elif recomputation_flag:
        allowed = allowed

    def norm(g_, key, flag=norm_flag):
        if flag == 'no': return 1
        elif flag == 'yes':
            return np.max(np.abs(nx.get_node_attributes(g_,
                                                        key).values())) + 1e-6
        else:
            raise ('Error')

    # ricci
    g_ricci = g
    if 'ricciCurvature' in allowed:
        try:
            g_ricci = ricciCurvature(g, alpha=0.5, weight='weight')
            assert g_ricci.node.keys() == list(g.nodes())
            ricci_norm = norm(g, 'ricciCurvature', norm_flag)
            for n_ in g_ricci.nodes():
                g_ricci.node[n_]['ricciCurvature'] /= ricci_norm
        except:
            print('RicciCurvature Error for graph, set 0 for all nodes')
            for n in g_ricci.nodes():
                g_ricci.node[n]['ricciCurvature'] = 0

    # degree
    if 'deg' in allowed:
        deg_dict = dict(nx.degree(g_ricci))
        for n in g_ricci.nodes():
            g_ricci.node[n]['deg'] = deg_dict[n]
        deg_norm = norm(g_ricci, 'deg', norm_flag)
        for n in g_ricci.nodes():
            g_ricci.node[n]['deg'] /= np.float(deg_norm)

    # hop
    if 'hop' in allowed:
        distance = nx.floyd_warshall_numpy(g)  # return a matrix
        distance = np.array(distance)
        distance = distance.astype(int)
        if norm_flag == 'no': hop_norm = 1
        elif norm_flag == 'yes': hop_norm = np.max(distance)
        else: raise Exception('norm flag has to be yes or no')
        for n in g_ricci.nodes():
            # if g_ricci has non consecutive nodes, n_idx is the index of hop distance matrix
            n_idx = list(g_ricci.nodes).index(n)
            assert n_idx <= len(g_ricci)
            # print(n, n_idx)
            g_ricci.node[n]['hop'] = distance[n_idx][:] / float(hop_norm)

    # closeness_centrality
    if 'cc' in allowed:
        cc = nx.closeness_centrality(g)  # dict
        cc = {k: v / min(cc.values())
              for k, v in cc.iteritems()}  # no normalization for debug use
        cc = {k: 1.0 / v for k, v in cc.iteritems()}
        for n in g_ricci.nodes():
            g_ricci.node[n]['cc'] = cc[n]

    # fiedler
    if 'fiedler' in allowed:
        fiedler = fiedler_vector(g, normalized=False)  # np.ndarray
        assert max(fiedler) > 0
        fiedler = fiedler / max(np.abs(fiedler))
        assert max(np.abs(fiedler)) == 1
        for n in g_ricci.nodes():
            n_idx = list(g_ricci.nodes).index(n)
            g_ricci.node[n]['fiedler'] = fiedler[n_idx]

    any_node = list(g_ricci.node)[0]
    if 'label' not in g_ricci.node[any_node].keys():
        for n in g_ricci.nodes():
            g_ricci.node[n]['label'] = 0  # add dummy
    else:  # contains label key
        assert 'label' in g_ricci.node[any_node].keys()
        for n in g_ricci.nodes():
            label_norm = 40
            if graph == 'dd_test': label_norm = 90
            g_ricci.node[n]['label'] /= float(label_norm)

    if 'deg' in allowed:
        for n in g_ricci.nodes():
            attribute_mean(g_ricci, n, key='deg', cutoff=1, iteration=0)

        # better normalization, used to include 1_0_deg_std/ deleted now:
        if norm_flag == 'yes':
            for attr in ['1_0_deg_sum']:
                norm_ = norm(g_ricci, attr, norm_flag)
                for n in g_ricci.nodes():
                    g_ricci.node[n][attr] = g_ricci.node[n][attr] / float(
                        norm_)

    if 'label' in allowed:
        for n in g_ricci.nodes():
            attribute_mean(g_ricci, n, key='label', cutoff=1, iteration=0)
        for n in g_ricci.nodes():
            attribute_mean(g_ricci, n, key='label', cutoff=1, iteration=1)

    if 'cc_min' in allowed:
        for n in g_ricci.nodes():
            attribute_mean(g_ricci, n, key='cc')

    if 'ricciCurvature_min' in allowed:
        for n in g_ricci.nodes():
            attribute_mean(g_ricci, n, key='ricciCurvature')

    return g_ricci
示例#4
0
def nodefeat(g, fil, norm=False, **kwargs):
    """
    :param g:
    :param fil: deg, cc, random
    :return: node feature (np.array of shape (n_node, 1))
    """
    # g = nx.random_geometric_graph(100, 0.2)
    t0 = time.time()
    assert nx.is_connected(g)

    if fil == 'deg':
        nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape(
            len(g), 1)
    elif fil == 'cc':
        nodefeat = np.array(list(nx.closeness_centrality(g).values()))
        nodefeat = nodefeat.reshape(len(g), 1)
    elif fil == 'cc_w':
        nodefeat = np.array(
            list(nx.closeness_centrality(g, distance='dist').values()))
        nodefeat = nodefeat.reshape(len(g), 1)
    elif fil == 'random':
        nodefeat = np.random.random((len(g), 1))
    elif fil == 'hop':
        base = kwargs['base']
        assert type(base) == int
        length = nx.single_source_dijkstra_path_length(g, base)  # dict #
        nodefeat = [length[i] for i in range(len(g))]
        nodefeat = np.array(nodefeat).reshape(len(g), 1)

    elif fil == 'fiedler':
        if len(g.edges) == 2 * len(
                g
        ):  # todo hack here. fielder is very slow when n_edges = 2*n_edge
            nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape(
                len(g), 1)
        else:
            nodefeat = fiedler_vector(g, normalized=False)  # np.ndarray
            nodefeat = nodefeat.reshape(len(g), 1)

    elif fil == 'fiedler_w':
        if False:  # len(g.edges) == 2 * len(g):  # todo hack here. fielder is very slow when n_edges = 2*n_edge
            nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape(
                len(g), 1)
        else:
            for u, v in g.edges():
                try:
                    assert 'dist' in g[u][v].keys()
                    g[u][v]['dist'] += 1e-6
                except AssertionError:
                    pass
                    # print(f'g[{u}][{v}] = {g[u][v]}')
            print(f'bottleneck graph {len(g)}/{len(g.edges())}')
            # for line in nx.generate_edgelist(g):
            #     print(line)
            print('-' * 50)
            nodefeat = fiedler_vector(g,
                                      normalized=False,
                                      weight='dist',
                                      method='tracemin_lu')  # np.ndarray
            print('after true fiedler')
            nodefeat = nodefeat.reshape(len(g), 1)

    elif fil == 'fiedler_s':
        nodefeat = fiedler_vector(g, normalized=False)  # np.ndarray
        nodefeat = nodefeat.reshape(len(g), 1)
        nodefeat = np.multiply(nodefeat, nodefeat)

    elif fil == 'ricci':
        try:
            g = ricciCurvature(g, alpha=0.5, weight='weight')
            ricci_dict = nx.get_node_attributes(g, 'ricciCurvature')
            ricci_list = [ricci_dict[i] for i in range(len(g))]
            nodefeat = np.array(ricci_list).reshape((len(g), 1))
        except:
            nodefeat = np.random.random(
                (len(g), 1)
            )  # cvxpy.error.SolverError: Solver 'ECOS' failed. Try another solver.
    elif fil[:3] == 'hks':
        assert fil[3] == '_'
        t = float(fil[4:])
        from Esme.dgms.hks import hks
        nodefeat = hks(g, t)

    elif fil == 'ricci_w':
        try:
            g = ricciCurvature(g, alpha=0.5, weight='dist')
            ricci_dict = nx.get_node_attributes(g, 'ricciCurvature')
            ricci_list = [ricci_dict[i] for i in range(len(g))]
            nodefeat = np.array(ricci_list).reshape((len(g), 1))
        except:
            nodefeat = np.random.random(
                (len(g), 1)
            )  # cvxpy.error.SolverError: Solver 'ECOS' failed. Try another solver.

    else:
        raise Exception('No such filtration: %s' % fil)
    assert nodefeat.shape == (len(g), 1)

    # normalize
    if norm: nodefeat = nodefeat / float(max(abs(nodefeat)))
    if time.time() - t0 > 3:
        from Esme.helper.time import precision_format
        print(
            f'nodefeat takes {precision_format(time.time()-t0, 2)} for g {len(g)}/{len(g.edges)}'
        )
        from Esme.viz.graph import viz_graph
        # viz_graph(g, show=True)
    return nodefeat
示例#5
0
if __name__ == '__main__':

    # a bad example for fiedler
    lines = [
        "0 1 {'dist': 1.3296398}", "0 2 {'dist': 0.9401972}",
        "0 3 {'dist': 0.94019735}", "1 2 {'dist': 0.94019735}",
        "1 3 {'dist': 0.9401972}", "1 4 {'dist': 0.9402065}",
        "3 4 {'dist': 1.3296462}", "3 5 {'dist': 0.9402065}",
        "4 5 {'dist': 0.9401972}"
    ]
    g = nx.parse_edgelist(lines, nodetype=int)
    print(g.edges(data=True))
    # v_weight = fiedler_vector(g, normalized=False, weight='dist')  # np.ndarray
    # v_weight = list(nx.closeness_centrality(g, distance='dist').values())

    g = ricciCurvature(g, alpha=0.5, weight='dist')
    ricci_dict = nx.get_node_attributes(g, 'ricciCurvature')
    v_weight = [ricci_dict[i] for i in range(len(g))]

    print(v_weight)
    sys.exit()

    # g = nx.circulant_graph(10, offsets=[1]*10)
    w_name = 'weightd'
    random.seed(43)
    g = nx.random_tree(20, seed=42)
    for u, v in g.edges():
        g[u][v][w_name] = random.random()
        # print(g[u][v])
    print(g.edges)