Пример #1
0
 def test_effective_size_undirected(self):
     G = self.G.copy()
     nx.set_edge_attributes(G, 1, "weight")
     effective_size = nx.effective_size(G, weight="weight")
     assert almost_equal(effective_size["G"], 4.67, places=2)
     assert almost_equal(effective_size["A"], 2.50, places=2)
     assert almost_equal(effective_size["C"], 1, places=2)
Пример #2
0
 def test_effective_size_undirected(self):
     G = self.G.copy()
     nx.set_edge_attributes(G, 1, 'weight')
     effective_size = nx.effective_size(G, weight='weight')
     assert_almost_equal(round(effective_size['G'], 2), 4.67)
     assert_almost_equal(round(effective_size['A'], 2), 2.50)
     assert_almost_equal(round(effective_size['C'], 2), 1)
Пример #3
0
 def test_effective_size_weighted_undirected(self):
     G = self.G.copy()
     nx.set_edge_attributes(G, 'weight', self.G_weights)
     effective_size = nx.effective_size(G, weight='weight')
     assert_almost_equal(round(effective_size['G'], 2), 5.47)
     assert_almost_equal(round(effective_size['A'], 2), 2.47)
     assert_almost_equal(round(effective_size['C'], 2), 1)
Пример #4
0
 def test_effective_size_weighted_directed(self):
     D = self.D.copy()
     nx.set_edge_attributes(D, 'weight', self.D_weights)
     effective_size = nx.effective_size(D, weight='weight')
     assert_almost_equal(round(effective_size[0], 3), 1.567)
     assert_almost_equal(round(effective_size[1], 3), 1.083)
     assert_almost_equal(round(effective_size[2], 3), 1)
Пример #5
0
 def test_effective_size_undirected(self):
     G = self.G.copy()
     nx.set_edge_attributes(G, 1, 'weight')
     effective_size = nx.effective_size(G, weight='weight')
     assert almost_equal(effective_size['G'], 4.67, places=2)
     assert almost_equal(effective_size['A'], 2.50, places=2)
     assert almost_equal(effective_size['C'], 1, places=2)
Пример #6
0
 def test_effective_size_undirected(self):
     G = self.G.copy()
     nx.set_edge_attributes(G, 1, "weight")
     effective_size = nx.effective_size(G, weight="weight")
     assert effective_size["G"] == pytest.approx(4.67, abs=1e-2)
     assert effective_size["A"] == pytest.approx(2.50, abs=1e-2)
     assert effective_size["C"] == pytest.approx(1, abs=1e-2)
Пример #7
0
 def test_effective_size_weighted_directed(self):
     D = self.D.copy()
     nx.set_edge_attributes(D, self.D_weights, 'weight')
     effective_size = nx.effective_size(D, weight='weight')
     assert_almost_equal(round(effective_size[0], 3), 1.567)
     assert_almost_equal(round(effective_size[1], 3), 1.083)
     assert_almost_equal(round(effective_size[2], 3), 1)
Пример #8
0
 def test_effective_size_weighted_directed(self):
     D = self.D.copy()
     nx.set_edge_attributes(D, self.D_weights, "weight")
     effective_size = nx.effective_size(D, weight="weight")
     assert effective_size[0] == pytest.approx(1.567, abs=1e-3)
     assert effective_size[1] == pytest.approx(1.083, abs=1e-3)
     assert effective_size[2] == pytest.approx(1, abs=1e-3)
Пример #9
0
 def test_effective_size_weighted_directed(self):
     D = self.D.copy()
     nx.set_edge_attributes(D, self.D_weights, "weight")
     effective_size = nx.effective_size(D, weight="weight")
     assert almost_equal(effective_size[0], 1.567, places=3)
     assert almost_equal(effective_size[1], 1.083, places=3)
     assert almost_equal(effective_size[2], 1, places=3)
Пример #10
0
def calculate_networks_indicators(graph):
    """计算基本网络指标"""
    degree_centrality = nx.degree_centrality(graph)
    nodes = list(degree_centrality.keys())
    betweenness_centrality = nx.betweenness_centrality(graph, weight='weight')
    network_indicators = pd.DataFrame({
        'nodes':
        nodes,
        'degree_centrality': [degree_centrality[node] for node in nodes],
        'betweenness_centrality':
        [betweenness_centrality[node] for node in nodes]
    })

    network_indicators['local_reaching_centrality'] = [
        nx.local_reaching_centrality(graph, node, weight='weight')
        for node in nodes
    ]
    constraint = nx.constraint(graph, weight='weight')
    network_indicators['constraint'] = [constraint[node] for node in nodes]
    effective_size = nx.effective_size(graph, weight='weight')
    network_indicators['effective_size'] = [
        effective_size[node] for node in nodes
    ]
    triangles = nx.triangles(graph)
    network_indicators['triangles'] = [triangles[node] for node in nodes]
    clustering = nx.clustering(graph, weight='weight')
    network_indicators['clustering'] = [clustering[node] for node in nodes]

    weight_dict = {
        item[0]: item[1]
        for item in nx.degree(graph, weight='weight')
    }
    degree_dict = {item[0]: item[1] for item in nx.degree(graph)}
    average_weight_dict = {
        weight_key:
        (weight_dict[weight_key] /
         degree_dict[weight_key] if degree_dict[weight_key] != 0 else 0)
        for weight_key in weight_dict.keys()
    }
    network_indicators['tie_strength'] = [
        average_weight_dict[node] for node in nodes
    ]
    network_indicators['number_of_node'] = nx.number_of_nodes(graph)
    network_indicators['density'] = nx.density(graph)
    cliques = nx.graph_clique_number(graph)
    if cliques >= 3:
        network_indicators['cliques'] = cliques
    else:
        network_indicators['cliques'] = 0
    network_indicators['efficiency'] = nx.global_efficiency(graph)
    network_indicators['isolates'] = nx.number_of_isolates(graph)

    network_indicators = network_indicators[[
        'nodes', 'degree_centrality', 'betweenness_centrality',
        'local_reaching_centrality', 'constraint', 'effective_size',
        'triangles', 'clustering', 'tie_strength', 'number_of_node', 'density',
        'cliques', 'efficiency', 'isolates'
    ]]
    return network_indicators
Пример #11
0
 def __get_effective_size(graph: nx.Graph()):
     effective_size = dict(
         filter(lambda x: x[1] > 0,
                nx.effective_size(graph).items()))
     effective_size = {
         k: v
         for k, v in sorted(
             effective_size.items(), key=lambda item: item[1], reverse=True)
     }
     return effective_size
Пример #12
0
def effective_size(graph, nodes, year, indicator_type):
    effective_size = nx.effective_size(graph, weight='weight')
    data = pd.DataFrame({
        'nodes':
        nodes,
        'effective_size': [effective_size[node] for node in nodes]
    })
    if indicator_type == '三年期':
        excel_path = '../data/生成数据/04关系矩阵_中间指标/三年期/' + str(year) + '-' + str(
            year + 2) + '年竞争关系矩阵'
    else:
        excel_path = '../data/生成数据/04关系矩阵_中间指标/五年期/' + str(year) + '-' + str(
            year + 4) + '年竞争关系矩阵'
    folder = os.path.exists(excel_path)
    if not folder:
        os.makedirs(excel_path)
    data.to_excel(excel_writer=excel_path + '/effective_size指标.xlsx',
                  index=False)
    print(str(year) + '年' + 'effective_size' + '计算完毕!')
Пример #13
0
def calculate_key_inventor_hole_label(base_data_array,
                                      inventor_patents_rows_dict,
                                      key_inventor):
    """
    计算关键研发者结构洞指数与中心度
    :param base_data_array:原始数据
    :param inventor_patents_count_dict: 用上一步的结果,用于获取所有节点
    :return:
    """

    inventor_data_rows = inventor_patents_rows_dict[key_inventor]
    # 获取关键研发者自我中心网中所有节点
    network_node_list = get_key_inventor_partner(base_data_array,
                                                 inventor_patents_rows_dict,
                                                 key_inventor)
    # 初始化无向网络
    key_inventor_network = nx.Graph()
    # 向网络中加入节点
    key_inventor_network.add_nodes_from(network_node_list)
    # 向网络中增加边的连线
    for row_index in inventor_data_rows:
        row_data = base_data_array[row_index]
        # 读取申请人所在列
        inventor_value = row_data[const.INVENTOR_COL]
        if inventor_value:
            inventor_value_list = inventor_value.split(",")
            if inventor_value_list and len(inventor_value_list) > 1:
                # 从inventor_value_list中获取所有2个研发者的组合
                for inventor_partner_array in itertools.combinations(
                        inventor_value_list, 2):
                    key_inventor_network.add_edge(inventor_partner_array[0],
                                                  inventor_partner_array[1])

    # 获取对应节点的度
    key_inventor_degree = key_inventor_network.degree(key_inventor)
    if key_inventor_degree == 0:
        hole_effi = const.ALONE_NODE_HOLE_EFFI
    else:
        hole_effi = nx.effective_size(
            key_inventor_network)[key_inventor] / key_inventor_degree
    return [hole_effi, key_inventor_degree]
Пример #14
0
def parse_all_metrics(api, edge_df, user_id, directory=None, long=False):
    '''
    Will get all Tier 3 metrics for a user_id

    Parameters
    ----------
    api : Tweepy API hook
    edge_df : Edgelist of Pandas DataFrame
    user_id : User ID string
    directory : Directory to look for data
        The default is None.
    long : Whether to get metrics that take a long time. The default is False.

    Returns
    -------
    Feature Data Frame

    '''
    import pandas as pd
    import twitter_col
    import json, io, gzip, os
    import time
    import progressbar
    import networkx as nx
    from collections import Counter
    import community
    import numpy as np

    #    user_id = '1919751'
    G = nx.from_pandas_edgelist(edge_df,
                                'from',
                                'to',
                                edge_attr=['type'],
                                create_using=nx.DiGraph())
    #    G=nx.gnp_random_graph(100, 0.4, seed=None, directed=True)
    G2 = G.to_undirected()

    largest_component = max(nx.connected_component_subgraphs(G2), key=len)

    print("Nodes in largest compo:", len(largest_component.nodes))

    data = {
        "user_id": [],
        "scrape_date": [],
        "num_nodes": [],
        "num_links": [],
        "density": [],
        "isolates": [],
        "dyad_isolates": [],
        "triad_isolates": [],
        "compo_over_4": [],
        #            "average_shortest_path_length": [],
        "clustering_coefficient": [],
        "transitivity": [],
        #            "network_diameter": [],
        "reciprocity": [],
        "graph_degree_centrality": [],
        "graph_betweenness_centrality": [],
        "mean_eigen_centrality": [],
        "simmelian_ties": [],
        "triad_003": [],
        "triad_012": [],
        "triad_102": [],
        "triad_021D": [],
        "triad_021U": [],
        "triad_021C": [],
        "triad_111D": [],
        "triad_111U": [],
        "triad_030T": [],
        "triad_030C": [],
        "triad_201": [],
        "triad_120D": [],
        "triad_120U": [],
        "triad_120C": [],
        "triad_210": [],
        "triad_300": [],
        "num_louvaine_groups": [],
        "size_largest_louvaine_group": [],
        "ego_effective_size": []
    }

    if long:
        data.pop("graph_betweenness_centrality")
        data.pop("ego_effective_size")
        data.pop("simmelian_ties")

    data['user_id'].append(user_id)
    data['scrape_date'].append(time.strftime('%Y%m%d-%H%M%S'))
    data['num_nodes'].append(nx.number_of_nodes(G))
    data['num_links'].append(nx.number_of_edges(G))
    data['density'].append(nx.density(G))

    compo_sizes = [
        len(c)
        for c in sorted(nx.connected_components(G2), key=len, reverse=True)
    ]
    compo_freq = Counter(compo_sizes)

    #    print('isolates')
    data['isolates'].append(compo_freq[1])
    #    print('triad_islolates')
    data['triad_isolates'].append(compo_freq[3])
    data['dyad_isolates'].append(compo_freq[2])
    data['compo_over_4'].append(len([x for x in compo_sizes if x > 3]))
    #    print('shortest path')
    #    data['average_shortest_path_length'].append(nx.average_shortest_path_length(largest_component))
    #    print('clustering_coefficient')
    data['clustering_coefficient'].append(nx.average_clustering(G2))
    #    print('transitivity')
    data['transitivity'].append(nx.transitivity(G))
    #    print('diameter')
    #    data['network_diameter'].append(nx.diameter(largest_component))
    #    print('reciprocity')
    data['reciprocity'].append(nx.reciprocity(G))
    #    print('effective size')
    if not long:
        if user_id in list(G.nodes):
            ef = nx.effective_size(G, nodes=[user_id])
            data['ego_effective_size'].append(ef[user_id])
        else:
            data['ego_effective_size'].append(0)

#    print('degree')
    data['graph_degree_centrality'].append(graph_centrality(G, kind='degree'))
    #    print('betweenness')
    if not long:
        data['graph_betweenness_centrality'].append(
            graph_centrality(largest_component, kind='betweenness'))
#    print('eigen_centrality')
    try:
        eig = list(nx.eigenvector_centrality_numpy(G).values())
        data['mean_eigen_centrality'].append(np.mean(eig))
    except:
        data['mean_eigen_centrality'].append(0)

#    print('simmelian')
#    if long:
    data['simmelian_ties'].append(get_simmelian_ties(G, sparse=True))
    #    print('census')
    census = nx.triadic_census(G)

    data['triad_003'].append(census['003'])
    data['triad_012'].append(census['012'])
    data['triad_102'].append(census['021C'])
    data['triad_021D'].append(census['021D'])
    data['triad_021U'].append(census['021U'])
    data['triad_021C'].append(census['030C'])
    data['triad_111D'].append(census['030T'])
    data['triad_111U'].append(census['102'])
    data['triad_030T'].append(census['111D'])
    data['triad_030C'].append(census['111U'])
    data['triad_201'].append(census['120C'])
    data['triad_120D'].append(census['120D'])
    data['triad_120U'].append(census['120U'])
    data['triad_120C'].append(census['201'])
    data['triad_210'].append(census['210'])
    data['triad_300'].append(census['300'])

    partition = community.best_partition(G2)
    p_df = pd.DataFrame.from_dict(partition, orient='index')
    #    print('louvaine')
    data['num_louvaine_groups'].append(len(set(partition.values())))
    data['size_largest_louvaine_group'].append(p_df[0].value_counts().max())

    df = pd.DataFrame(data)
    return (df)
Пример #15
0
 def test_effective_size_undirected_borgatti(self):
     effective_size = nx.effective_size(self.G)
     assert_almost_equal(round(effective_size['G'], 2), 4.67)
     assert_almost_equal(round(effective_size['A'], 2), 2.50)
     assert_almost_equal(round(effective_size['C'], 2), 1)
Пример #16
0
 def test_effective_size_undirected_borgatti(self):
     effective_size = nx.effective_size(self.G)
     assert effective_size["G"] == pytest.approx(4.67, abs=1e-2)
     assert effective_size["A"] == pytest.approx(2.50, abs=1e-2)
     assert effective_size["C"] == pytest.approx(1, abs=1e-2)
Пример #17
0
 def test_effective_size_directed(self):
     effective_size = nx.effective_size(self.D)
     assert_almost_equal(round(effective_size[0], 3), 1.167)
     assert_almost_equal(round(effective_size[1], 3), 1.167)
     assert_almost_equal(round(effective_size[2], 3), 1)
Пример #18
0
 def test_effective_size_borgatti_isolated(self):
     G = self.G.copy()
     G.add_node(1)
     effective_size = nx.effective_size(G)
     assert_true(math.isnan(effective_size[1]))
Пример #19
0
 def test_effective_size_isolated(self):
     G = self.G.copy()
     G.add_node(1)
     nx.set_edge_attributes(G, self.G_weights, 'weight')
     effective_size = nx.effective_size(G, weight='weight')
     assert_true(math.isnan(effective_size[1]))
Пример #20
0
 def test_effective_size_undirected_borgatti(self):
     effective_size = nx.effective_size(self.G)
     assert_almost_equal(round(effective_size['G'], 2), 4.67)
     assert_almost_equal(round(effective_size['A'], 2), 2.50)
     assert_almost_equal(round(effective_size['C'], 2), 1)
Пример #21
0
 def test_effective_size_directed(self):
     effective_size = nx.effective_size(self.D)
     assert_almost_equal(round(effective_size[0], 3), 1.167)
     assert_almost_equal(round(effective_size[1], 3), 1.167)
     assert_almost_equal(round(effective_size[2], 3), 1)
Пример #22
0
 def test_effective_size_directed(self):
     effective_size = nx.effective_size(self.D)
     assert almost_equal(effective_size[0], 1.167, places=3)
     assert almost_equal(effective_size[1], 1.167, places=3)
     assert almost_equal(effective_size[2], 1, places=3)
Пример #23
0
 def test_effective_size_borgatti_isolated(self):
     G = self.G.copy()
     G.add_node(1)
     effective_size = nx.effective_size(G)
     assert_true(math.isnan(effective_size[1]))
Пример #24
0
 def test_effective_size_isolated(self):
     G = self.G.copy()
     G.add_node(1)
     nx.set_edge_attributes(G, 'weight', self.G_weights)
     effective_size = nx.effective_size(G, weight='weight')
     assert_true(math.isnan(effective_size[1]))
Пример #25
0
 def test_effective_size_directed(self):
     effective_size = nx.effective_size(self.D)
     assert effective_size[0] == pytest.approx(1.167, abs=1e-3)
     assert effective_size[1] == pytest.approx(1.167, abs=1e-3)
     assert effective_size[2] == pytest.approx(1, abs=1e-3)
Пример #26
0
def features_part2(info):
    """
    third set of features.
    """
    G = info['G']
    n = info['num_nodes']
    num_units = info['num_units']
    edges = info['edges']
    nedges = len(edges)

    H = G.to_undirected()

    res = dict()
    cc = nx.closeness_centrality(G)
    res['closeness_centrality'] = cc[n - 1]
    res['closeness_centrality_mean'] = np.mean(list(cc.values()))

    bc = nx.betweenness_centrality(G)
    res['betweenness_centrality_mean'] = np.mean(list(bc.values()))

    cfcc = nx.current_flow_closeness_centrality(H)
    res['current_flow_closeness_centrality_mean'] = np.mean(list(
        cfcc.values()))

    cfbc = nx.current_flow_betweenness_centrality(H)
    res['current_flow_betweenness_centrality_mean'] = np.mean(
        list(cfbc.values()))

    soc = nx.second_order_centrality(H)
    res['second_order_centrality_mean'] = np.mean(list(soc.values())) / n

    cbc = nx.communicability_betweenness_centrality(H)
    res['communicability_betweenness_centrality_mean'] = np.mean(
        list(cbc.values()))

    comm = nx.communicability(H)
    res['communicability'] = np.log(comm[0][n - 1])
    res['communicability_start_mean'] = np.log(np.mean(list(comm[0].values())))
    res['communicability_end_mean'] = np.log(
        np.mean(list(comm[n - 1].values())))

    res['radius'] = nx.radius(H)
    res['diameter'] = nx.diameter(H)
    res['local_efficiency'] = nx.local_efficiency(H)
    res['global_efficiency'] = nx.global_efficiency(H)
    res['efficiency'] = nx.efficiency(H, 0, n - 1)

    pgr = nx.pagerank_numpy(G)
    res['page_rank'] = pgr[n - 1]
    res['page_rank_mean'] = np.mean(list(pgr.values()))

    cnstr = nx.constraint(G)
    res['constraint_mean'] = np.mean(list(cnstr.values())[:-1])

    effsize = nx.effective_size(G)
    res['effective_size_mean'] = np.mean(list(effsize.values())[:-1])

    cv = np.array(list(nx.closeness_vitality(H).values()))
    cv[cv < 0] = 0
    res['closeness_vitality_mean'] = np.mean(cv) / n

    res['wiener_index'] = nx.wiener_index(H) / (n * (n - 1) / 2)

    A = nx.to_numpy_array(G)
    expA = expm(A)
    res['expA'] = np.log(expA[0, n - 1])
    res['expA_mean'] = np.log(np.mean(expA[np.triu_indices(n)]))

    return res
Пример #27
0
 def test_effective_size_undirected_borgatti(self):
     effective_size = nx.effective_size(self.G)
     assert almost_equal(effective_size["G"], 4.67, places=2)
     assert almost_equal(effective_size["A"], 2.50, places=2)
     assert almost_equal(effective_size["C"], 1, places=2)