Пример #1
0
def get_centrality(graph, method, topk=None):

    if method == "edge_betweeness_centrality":
        output = nx.edge_betweenness_centrality(graph)
    elif method == "betweenness_centrality":
        output = nx.betweenness_centrality(graph)
    elif method == "closeness_centrality":
        output = nx.closeness_centrality(graph)
    elif method == "eigenvector_centrality":
        output = nx.eigenvector_centrality(graph)
    elif method == "in_degree_centrality":
        output = nx.in_degree_centrality(graph)
    elif method == "out_degree_centrality":
        output = nx.out_degree_centrality(graph)
    elif method == "pagerank":
        output = pagerank(graph)
    else:
        return
    print(len(output))
    output = np.array(create_array(output))
    mean = round(np.mean(output), 4)
    if topk:
        arg_sorted_results = np.argsort(output)[::-1][:topk]
    else:
        arg_sorted_results = np.argsort(output)[::-1]

    return output, arg_sorted_results, mean
def undirected_page_rank(topic,
                         corpus,
                         n_docs=100,
                         sim="TF-IDF",
                         threshold=0.4,
                         max_iter=50,
                         damping=0.15,
                         use_priors=False,
                         weighted=False):

    if sim not in ("TF", "TF-IDF", "BM25"):
        raise ValueError(
            "Invalid similarity criterion: please use 'TF', 'TF-IDF' or 'BM25'"
        )

    use_idf = sim != "TF"

    graph = build_graph(corpus, use_idf=use_idf, threshold=threshold)

    priors = get_priors(graph, topic, sim) if use_priors else None
    weight = "weight" if weighted else None

    # "Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge"
    page_rank = pagerank(graph,
                         max_iter=max_iter,
                         alpha=1 - damping,
                         personalization=priors,
                         weight=weight)

    return Counter(page_rank).most_common(n_docs)
Пример #3
0
 def eval_rank_graph(self, graph, algorithm="pagerank"):
     # should return a list of tuples (result,score) sorted in
     # reversed order (ie highest score first)
     if algorithm == "pagerank":
         ranked = pagerank(graph)
     elif algorithm == "hits":
         ranked = hits(graph, max_iter=10000)[1]  # 0: hubs, 1: authorities
     elif algorithm == "indegree":
         ranked = graph.in_degree()
     else:
         self.log.error(
             "Unknown ranking algorithm %s specified" % algorithm)
     sortedrank = sorted(
         iter(list(ranked.items())), key=itemgetter(1), reverse=True)
     return sortedrank
Пример #4
0
 def eval_rank_graph(self, graph, algorithm="pagerank"):
     # should return a list of tuples (result,score) sorted in
     # reversed order (ie highest score first)
     if algorithm == "pagerank":
         ranked = pagerank(graph)
     elif algorithm == "hits":
         ranked = hits(graph, max_iter=10000)[1]  # 0: hubs, 1: authorities
     elif algorithm == "indegree":
         ranked = graph.in_degree()
     else:
         self.log.error("Unknown ranking algorithm %s specified" %
                        algorithm)
     sortedrank = sorted(iter(list(ranked.items())),
                         key=itemgetter(1),
                         reverse=True)
     return sortedrank
Пример #5
0
	for r in replacements:
		text = re.sub(r[0], r[1], text)
	return text

def preprocess(raw):
	blob = TextBlob(raw)
	return blob.noun_phrases

	# tokens = nltk.word_tokenize(raw)
	# return tokens

def create_graph(tokens, dist):
	graph = nx.Graph()

	for index, token in enumerate(tokens):
		for i in range(dist):
			try:
				graph.add_edge(token, tokens[index+i])
			except IndexError:
				pass

	return graph

with open('input.txt', 'r') as f:
	raw = f.read()

tokens = preprocess(raw)
graph = create_graph(tokens, 5)
pr = pagerank(graph)
print(sorted(pr.items(), key=operator.itemgetter(1), reverse=True)[:10])
Пример #6
0
    G = nx.Graph()
    G.add_edge(0, 1)
    G.add_edge(0, 2)
    G.add_edge(1, 2)
    G.add_edge(1, 3)
    G.add_edge(3, 4)
    G.add_edge(3, 5)
    G.add_edge(3, 6)
    G.add_edge(4, 5)
    G.add_edge(5, 6)
    G = load_dataset("facebook_large\\musae_facebook_edges.csv")
    import time

    print("vectorized pagerank")
    start = time.time()
    v = vectorized_pagerank(G, dense=False)
    end = time.time()
    print("Time: ", end - start)
    print(v)
    # v = eig_pagerank(G)

    # v = pagerank_numpy(G)
    # print(v)
    N = len(G)
    print("networkx pagerank")
    start = time.time()
    v = pagerank(G)
    end = time.time()
    print("Time: ", end - start)
    print(v[0], v[1], v[2], "...", v[N - 3], v[N - 2], v[N - 1])
Пример #7
0
    score = grad.sum(dim=0)
    _, indexs = th.topk(score.abs(), k)
    signs = th.zeros(data.features.shape[1])
    for i in indexs:
        signs[i] = score[i].sign()
    return signs, indexs


assert args.train + args.test + args.validation <= 1
NumTrain = int(data.size * args.train)
NumTest = int(data.size * args.test)
NumVal = int(data.size * args.validation)


nxg = nx.Graph(data.g.to_networkx())
page = pagerank(nxg)
between = betweenness(nxg)
PAGERANK = sorted([(page[i], i) for i in range(data.size)], reverse=True)
BETWEEN = sorted([(between[i], i) for i in range(data.size)], reverse=True)
Important_score = getScore(args.steps, data)
Important_list = sorted([(Important_score[i], i) for i in range(data.size)],
                        reverse=True)
bar, Baseline_Degree, Baseline_Random = getThrehold(data.g, data.size,
                                                    args.threshold,
                                                    args.num_node)
Baseline_Pagerank = getIndex(data.g, PAGERANK, bar, args.num_node)
Baseline_Between = getIndex(data.g, BETWEEN, bar, args.num_node)
RWCS = getIndex(data.g, Important_list, bar, args.num_node)
GC_RWCS = getScoreGreedy(args.steps, data, bar, args.num_node, args.beta)
model, optimizer = init_model()
idx_train, idx_val, idx_test = split_data(data, NumTrain, NumTest, NumVal)
Пример #8
0
        [it[1] for it in sorted(
            average_degree_connectivity(nx_g).items(), reverse=True
        )]
    ))

    nx_dh = np.array(degree_histogram(nx_g)) / nx_g.number_of_nodes()
    nx_cdh = np.flip(np.flip(
        (np.array(degree_histogram(nx_g)) / nx_g.number_of_nodes())
        , 0
    ).cumsum(),0)

    nx_dc = np.array(
            [it[1] for it in sorted(nx_degree_centrality(nx_g).items())]
    )

    nx_pr = np.array([val for val in pagerank(nx_g).values()])
    nx_hc = np.array(
        [val for val in nx_harmonic_centrality(nx_g).values()]
    ) / nx_g.number_of_nodes()

    nx_bc = np.array(
        [val for val in nx_betweenness_centrality(nx_g).values()]
    )

    write_gml(nx_g, './graph.gml')
    gt_g = gt.load_graph('./graph.gml')

    gt_apl = avg_path_length(gt_g)
    gt_ad = avg_degree(gt_g)
    gt_gcc = gb_clus_coef(gt_g)
    gt_lcc = lcl_clus_coef(gt_g)
def partition_featurize_graph_fpdwl(G,k=100,dims=64,wl_steps=1,
                                    distribution_offset=0,distribution_exponent=0):
    """
    Partition+Anchor a graph using Fluid communities+Pagerank and produce node features using Degree+WL
    (Hence fpdwl)
    -----------
    Parameters:
    G : NetworkX graph
    k : number of blocks in partition
    dims : dimension of feature space
    wl_steps : number of Weisfeiler-Lehman aggregations to carry out
    -------
    Returns:
    p : dict with keys=node labels and values=probabilities on nodes
    partition : list of sets containing node labels
    node_subset : list of anchor node labels
    dists : distances between anchors
    features : degree+WL based node features
    """
    pr = pagerank(G)
    # Partition graph via Fluid
    partition_iter = asyn_fluidc(G,k)
    partition = []
    for i in partition_iter:
        partition.append(i)

    # Create anchors via PageRank
    anchors = []
    for p in partition:
        part_pr = {}
        for s in p:
            part_pr[s] = pr[s]
        anchors.append(max(part_pr, key=part_pr.get))
    anchors = sorted(anchors) # Fix an ordering on anchors

    # Featurize using degrees and Weisfeiler-Lehman
    degrees = dict(nx.degree(G))
    # One-hot encoding of degrees
    for key in degrees.keys():
        deg = degrees[key]
        feat = np.zeros(dims)
        if deg < dims:
            feat[deg]+=1 #Create one-hot encoding
        degrees[key] = feat #Replace scalar degree with one-hot vector
    for i in range(wl_steps):
        degrees = wl_label(G,degrees)
    # Rename, obtain sorted node names and features
    features = degrees
    a,b = list(zip(*sorted(features.items())))
    nodes = list(a)
    features = np.array(b)

    # Obtain probability vector
    p = np.array([(G.degree(n)+distribution_offset)**distribution_exponent for n in nodes])
    p = p/np.sum(p)

    # Rename anything else
    node_subset = anchors
    node_subset_idx = [nodes.index(v) for v in node_subset] #indices of anchor nodes in node list

    return nodes, features, p, partition, node_subset, node_subset_idx