Пример #1
0
def graph_characteristics(graphName):
    '''
    Return all the characteristics of a graph that we present in the paper.
    '''
    storedFolder = roles.graph_folder(graphName)

    inGraph = graph_analysis.IO.load_data("../Data/Graphs/" + storedFolder +
                                          "/" + graphName +
                                          ".GT.graph").next()
    groupTaxa, blackList = roles.graph_node_clusters(graphName,
                                                     inGraph,
                                                     metric="default")
    res = []

    headers = [
        "Graph Name", "\#Nodes", "\#Edges", "Edge density", "Clustering Coef.",
        "Diameter", "Role Taxonomy", "\#Clusters"
    ]
    res.append(paper_graph_name(graphName))
    res.append(inGraph.num_vertices())
    res.append(inGraph.num_edges())
    res.append(
        round(inGraph.num_edges() / (2 * float(inGraph.num_vertices())), 2))
    res.append(round(clustering.global_clustering(inGraph)[0], 3))
    res.append(topology.pseudo_diameter(inGraph)[0])
    res.append(graphRoles[graphName])
    res.append(len(set(groupTaxa)))
    return res, headers
Пример #2
0
def examine_graph(graph: Graph,
                  experiment: str,
                  graphname: str,
                  real: bool,
                  directed: bool = True) -> Properties:
    vertices = graph.num_vertices()
    edges = graph.num_edges()
    total_degrees = graph.get_total_degrees(np.arange(vertices))
    min_degree = np.min(total_degrees)
    max_degree = np.max(total_degrees)
    avg_degree = vertex_average(graph, "total")[0]
    largest_component = extract_largest_component(
        graph, directed=False).num_vertices()
    num_islands = np.sum(total_degrees == 0)
    cc = global_clustering(graph)[0]
    # _degrees, _counts = np.unique(total_degrees, return_counts=True)
    # log_degrees = np.log(_degrees)
    # log_counts = np.log(_counts)
    # regressor = LinearRegression()
    # regressor.fit(log_degrees.reshape(-1, 1), log_counts)
    # exponent = regressor.coef_[0]
    result = powerlaw.Fit(total_degrees,
                          xmin=1,
                          discrete=True,
                          xmax=max_degree)
    exponent = -result.alpha
    percentile = np.percentile(total_degrees, 95)
    # print("Exponent for this graph is: ", exponent)
    # print("Using powerlaw package: e = {} xmin = {} xmax = {}".format(
    #     exponent2, result.xmin, result.xmax))
    # print("degrees: {}\ncounts: {}".format(_degrees[:20], _counts[:20]))
    return Properties(experiment, graphname, real, vertices, edges, min_degree,
                      max_degree, avg_degree, largest_component, num_islands,
                      cc, directed, exponent, percentile)
Пример #3
0
def metrics(file, use_cache=True):
    # use cache or recompute
    cache = os.path.splitext(file)[0] + ".json"
    if use_cache and os.path.isfile(cache):
        print('using cached metrics for', os.path.basename(file))
        with open(cache, "r") as fp:
            return json.load(fp)
    print('computing metrics for', os.path.basename(file))

    # read file
    g = load_graph(file)
    degrees = list(g.degree_property_map("out"))
    with open(file) as f:
        metalines = [next(f) for x in range(13)]

    # gather data
    metrics = {}
    metrics['file'] = os.path.basename(file)
    metrics['edges'] = int(metalines[5].split()[-1])
    metrics['rounds'] = int(metalines[1].split()[-1])
    metrics['max_degree'] = max(degrees)
    metrics['avg_degree'] = mean(degrees)
    metrics['min_degree'] = min(degrees)
    metrics['local_clustering'] = mean(local_clustering(g).get_array())
    metrics['global_clustering'] = global_clustering(g)[0]
    metrics['pseudo_diameter'] = int(pseudo_diameter(g)[0])
    fit = powerlaw.Fit(degrees, discrete=True, verbose=False)
    metrics['exponent'] = fit.alpha
    metrics['KS'] = fit.power_law.KS()
    metrics['x_min'] = fit.xmin

    with open(cache, "w") as fp:
        json.dump(metrics, fp)

    return metrics
Пример #4
0
def calculate_measures(g, tmp_measures=None, measure_list=['BC', 'T', 'E']):
    if tmp_measures is None:    
        tmp_measures = dict((k, []) for k in measure_list)

    tmp_measures['BC'].append(np.mean(gtc.betweenness(g)[0].get_array()))
    tmp_measures['T'].append(gtclust.global_clustering(g)[0])
    tmp_measures['E'].append(np.mean(gtc.closeness(g,harmonic=True).get_array()))
    
    return tmp_measures
Пример #5
0
def calculate_measures(g, tmp_measures=None, measure_list=['BC', 'T', 'E']):
    if tmp_measures is None:
        tmp_measures = dict((k, []) for k in measure_list)

    tmp_measures['BC'].append(np.mean(gtc.betweenness(g)[0].get_array()))
    tmp_measures['T'].append(gtclust.global_clustering(g)[0])
    tmp_measures['E'].append(
        np.mean(gtc.closeness(g, harmonic=True).get_array()))

    return tmp_measures
Пример #6
0
def f_global_clustering(U,
                        stats,
                        options={
                            'features': [],
                            'skip_features': []
                        }):
    """"""

    if not 'global_clustering' in options['features'] or (
            'skip_features' in options
            and 'global_clustering' in options['skip_features']):
        log.debug('Skipping global_clustering')
        return

    stats['global_clustering'] = global_clustering(U)[0]
    log.debug('done global_clustering')
Пример #7
0
def global_clustering_binary_undirected(g):
    '''
    Returns the undirected global clustering coefficient.

    This corresponds to the ratio of undirected triangles to the number of
    undirected triads.

    Parameters
    ----------
    g : :class:`~nngt.Graph`
        Graph to analyze.

    References
    ----------
    .. [gt-global-clustering] :gtdoc:`clustering.global_clustering`
    '''
    # use undirected graph view, filter parallel edges
    u = GraphView(g.graph, directed=False)
    u = GraphView(u, efilt=label_parallel_edges(u).fa == 0)

    return gtc.global_clustering(u, weight=None)[0]
Пример #8
0
 def global_clustering_coeff(g):
     return global_clustering(g)[0]
Пример #9
0
def get_descriptors(network, short_name, nx_network, already_calculated=False):
    def _prefixToTitle(prefix):
        if prefix == 'a':
            return "Artists"
        elif prefix == 't':
            return "Tags"
        elif prefix == 'u':
            return 'Users'

    filename = "cache/{}.pickle".format(short_name)
    if os.path.isfile(filename):
        result = pickle.load(open(filename, 'rb'))
        return result

    result = {}
    prefix1, prefix2 = short_name[0], short_name[1]
    t1 = _prefixToTitle(prefix1)
    t2 = _prefixToTitle(prefix2)
    result['name'] = short_name
    result['title_dd1'] = PLOT_TITLES[short_name].format(t1, "")
    result['title_dd2'] = PLOT_TITLES[short_name].format(t2, "")
    result['title_dd1_acum'] = PLOT_TITLES[short_name].format(
        t1, " Cumulative")
    result['title_dd2_acum'] = PLOT_TITLES[short_name].format(
        t2, " Cumulative")
    result['title_wd'] = PLOT_TITLES['wd'].format("", t1, t2)
    result['title_wd_acum'] = PLOT_TITLES['wd'].format("Cumulative ", t1, t2)
    result['title_cd'] = PLOT_TITLES['cd'].format(t1, t2)
    result['title_sp'] = PLOT_TITLES['sp'].format(t1, t2)
    result['filename_dd'] = '{}_dd'.format(short_name)  # degree input dist
    result['filename_ddl'] = '{}_dd_log'.format(
        short_name)  # degree dist (log)
    result['filename_dd1'] = '{}_{}_dd'.format(short_name[0],
                                               short_name)  # degree input dist
    result['filename_dd2'] = '{}_{}_dd'.format(short_name[1],
                                               short_name)  # degree input dist
    result['filename_dd1l'] = '{}_{}_dd_log'.format(
        short_name[0], short_name)  # degree input dist
    result['filename_dd2l'] = '{}_{}_dd_log'.format(
        short_name[1], short_name)  # degree input dist
    result['filename_dd1_acum'] = '{}_{}_dd_acum'.format(
        short_name[0], short_name)  # degree input dist
    result['filename_dd2_acum'] = '{}_{}_dd_acum'.format(
        short_name[1], short_name)  # degree input dist
    result['filename_wd'] = '{}_wd'.format(short_name)  # weight distribution
    result['filename_wdl'] = '{}_wd_log'.format(
        short_name)  # weight distribution
    result['filename_wd_acum'] = '{}_wd_acum'.format(
        short_name)  # weight distribution
    result['filename_sp'] = '{}_sp'.format(short_name)  # shortest path
    result['filename_cd'] = '{}_cd'.format(short_name)  # components
    result['filename_cdl'] = '{}_cd_log'.format(short_name)  #

    nodes = network.get_vertices()
    edges = network.get_edges()
    result['num_nodes'] = {}
    result['num_nodes']['total'] = nodes.shape[0]

    result['num_edges'] = edges.shape[0]

    result['degree'] = {"total": {}, "prefix1": {}, "prefix2": {}}
    result['degree']["total"]['max'] = network.get_out_degrees(nodes).max()
    result['degree']["total"]['min'] = network.get_out_degrees(nodes).min()
    result['degree']["total"]['avg'] = network.get_out_degrees(nodes).mean()
    result['degree']["total"]["counts"], result['degree']["total"][
        "bins"] = st.vertex_hist(network, "out")

    nodes1, nodes2 = [], []
    for node in nodes:
        if prefix1 in network.vp['id'][node]:
            nodes1.append(node)
        elif prefix2 in network.vp['id'][node]:
            nodes2.append(node)

    result['num_nodes']['prefix1'] = len(nodes1)
    result['degree']["prefix1"]['max'] = network.get_out_degrees(nodes1).max()
    result['degree']["prefix1"]['min'] = network.get_out_degrees(nodes1).min()
    result['degree']["prefix1"]['avg'] = network.get_out_degrees(nodes1).mean()
    result['degree']["prefix1"]["counts"], result['degree']["prefix1"][
        "bins"] = np.histogram(
            network.get_out_degrees(nodes1),
            bins=15)  # result['degree']["total"]["bins"].shape[0]
    result['degree']["prefix1"]["d"] = network.get_out_degrees(
        nodes1)  # result['degree']["total"]["bins"].shape[0]
    if prefix1 == prefix2:
        nodes2 = nodes1
    result['num_nodes']['prefix2'] = len(nodes2)
    result['degree']["prefix2"]['max'] = network.get_out_degrees(nodes2).max()
    result['degree']["prefix2"]['min'] = network.get_out_degrees(nodes2).min()
    result['degree']["prefix2"]['avg'] = network.get_out_degrees(nodes2).mean()
    result['degree']["prefix2"]["counts"], result['degree']["prefix2"][
        "bins"] = np.histogram(network.get_out_degrees(nodes2), bins=15)
    result['degree']["prefix2"]["d"] = network.get_out_degrees(
        nodes2)  # result['degree']["total"]["bins"].shape[0]

    result['weights'] = {}
    weights = []

    for v1, v2 in nx_network.edges():
        weight = nx_network.get_edge_data(v1, v2)['weight']
        weights.append(weight)

    # result['weights']['counts'], result['weights']['bins'] = np.histogram(weights, bins=8)
    result['weights']['d'] = weights

    # estimated diamater  and longest path
    d, (v1, v2) = top.pseudo_diameter(network)
    result['diameter'] = d
    d_path = "{}-{}".format(network.vp['id'][v1], network.vp['id'][v2])
    result['diameter_path'] = d_path

    result['clustering'] = clu.global_clustering(network)

    if not already_calculated:
        net2 = gt.Graph(network)  # undirected version
        net2.set_directed(False)
        result['sp'] = {}
        result['sp']['counts'], result['sp']['bins'] = shortest_paths(net2)
        # connected components

        _, c2 = top.label_components(net2)

        result['components'] = {}
        result['components']['num'] = len(c2)
        result['components']['bins'] = range(len(c2))
        result['components']['counts'] = c2

    pickle.dump(result, open(filename, "wb"))
    return result
Пример #10
0
    def evaluate_sampling(self, full_graph: Graph, sampled_graph: Graph,
                          full_partition: BlockState,
                          sampled_graph_partition: BlockState,
                          block_mapping: Dict[int, int],
                          vertex_mapping: Dict[int,
                                               int], assignment: np.ndarray):
        """Evaluates the goodness of the samples.

        Parameters
        ----------
        full_graph : Graph
            the full, unsampled Graph object
        sampled_graph : Graph
            the sampled graph
        full_partition : Partition
            the partitioning results on the full graph
        sampled_graph_partition : Partition
            the partitioning results on the sampled graph
        block_mapping : Dict[int, int]
            the mapping of blocks from the full graph to the sampled graph
        vertex_mapping : Dict[int, int]
            the mapping of vertices from the full graph to the sampled graph
        assignment : np.ndarray[int]
            the true vertex-to-community mapping
        """
        #####
        # General
        #####
        self.sampled_graph_num_vertices = sampled_graph.num_vertices()
        self.sampled_graph_num_edges = sampled_graph.num_edges()
        self.blocks_retained = sampled_graph_partition.get_B(
        ) / full_partition.get_B()
        # pseudo_diameter returns a tuple: (diameter, (start_vertex, end_vertex))
        self.sampled_graph_diameter = pseudo_diameter(sampled_graph)[0]
        self.full_graph_diameter = pseudo_diameter(full_graph)[0]
        for vertex in sampled_graph.vertices():
            if (vertex.in_degree() + vertex.out_degree()) == 0:
                self.sampled_graph_island_vertices += 1
        self.sampled_graph_largest_component = extract_largest_component(
            sampled_graph, directed=False).num_vertices()
        self.full_graph_largest_component = extract_largest_component(
            full_graph, directed=False).num_vertices()

        ######
        # Expansion quality (http://portal.acm.org/citation.cfm?doid=1772690.1772762)
        ######
        # Expansion factor = Neighbors of sample / size of sample
        # Maximum expansion factor = (size of graph - size of sample) / size of sample
        # Expansion quality = Neighbors of sample / (size of graph - size of sample)
        # Expansion quality = 1 means sample is at most 1 edge away from entire graph
        sampled_graph_vertices = set(vertex_mapping.keys())
        neighbors = set()
        for vertex in sampled_graph_vertices:
            for neighbor in full_graph.get_out_neighbors(vertex):
                neighbors.add(neighbor)
        neighbors = neighbors - sampled_graph_vertices
        self.expansion_quality = len(neighbors) / (
            full_graph.num_vertices() - sampled_graph.num_vertices())

        ######
        # Clustering coefficient
        ######
        self.sampled_graph_clustering_coefficient = global_clustering(
            sampled_graph)[0]
        self.full_graph_clustering_coefficient = global_clustering(
            full_graph)[0]

        ######
        # Info on communities
        ######
        self.get_community_details(
            assignment,
            full_partition.get_blocks().get_array(),
            sampled_graph_partition.get_blocks().get_array(), vertex_mapping)

        if np.unique(
                assignment
        ).size == 1:  # Cannot compute below metrics if no true partition is provided
            return

        #####
        # % difference in ratio of within-block to between-block edges
        #####
        sample_assignment = assignment[np.fromiter(vertex_mapping.keys(),
                                                   dtype=np.int32)]
        true_sampled_graph_partition = partition_from_truth(
            sampled_graph, sample_assignment)
        sampled_graph_blockmatrix = true_sampled_graph_partition.get_matrix()
        self.sampled_graph_edge_ratio = sampled_graph_blockmatrix.diagonal(
        ).sum() / sampled_graph_blockmatrix.sum()
        true_full_partition = partition_from_truth(full_graph, assignment)
        full_blockmatrix = true_full_partition.get_matrix()
        self.graph_edge_ratio = full_blockmatrix.diagonal().sum(
        ) / full_blockmatrix.sum()

        #####
        # Normalized difference from ideal-block membership
        #####
        membership_size = max(np.max(assignment),
                              np.max(sample_assignment)) + 1
        full_graph_membership_nums = np.zeros(membership_size)
        for block_membership in assignment:
            full_graph_membership_nums[block_membership] += 1
        sampled_graph_membership_nums = np.zeros(membership_size)
        for block_membership in sample_assignment:
            sampled_graph_membership_nums[block_membership] += 1
        ideal_block_membership_nums = full_graph_membership_nums * \
            (sampled_graph.num_vertices() / full_graph.num_vertices())
        difference_from_ideal_block_membership_nums = np.abs(
            ideal_block_membership_nums - sampled_graph_membership_nums)
        self.difference_from_ideal_sample = np.sum(
            difference_from_ideal_block_membership_nums /
            sampled_graph.num_vertices())
Пример #11
0
def _get_clustering_coefficient(G):
    '''Return the clustering coefficient :math:`C(G)`.'''
    return global_clustering(G)[0]