Exemplo n.º 1
0
def load_train_test_graphs(dataset, recache_input):
    raw_mat_path = 'data/{}.npz'.format(dataset)
    train_graph_path = 'data/{}/train_graph.pkl'.format(dataset)
    test_graph_path = 'data/{}/test_graph.pkl'.format(dataset)

    if recache_input:
        print('loading sparse matrix from {}'.format(raw_mat_path))
        m = load_sparse_csr(raw_mat_path)

        print('splitting train and test...')
        train_m, test_m = split_train_test(
            m,
            weights=[0.9, 0.1])

        print('converting to nx.DiGraph')
        train_g = nx.from_scipy_sparse_matrix(train_m, create_using=nx.DiGraph(), edge_attribute='sign')
        test_g = nx.from_scipy_sparse_matrix(test_m, create_using=nx.DiGraph(), edge_attribute='sign')
                
        print('saving train and test graphs...')
        nx.write_gpickle(train_g, train_graph_path)
        nx.write_gpickle(test_g, test_graph_path)
    else:
        print('loading train and test graphs...')
        train_g = nx.read_gpickle(train_graph_path)
        test_g = nx.read_gpickle(test_graph_path)
    return train_g, test_g
Exemplo n.º 2
0
    def test_from_scipy_sparse_matrix_parallel_edges(self):
        """Tests that the :func:`networkx.from_scipy_sparse_matrix` function
        interprets integer weights as the number of parallel edges when
        creating a multigraph.

        """
        A = sparse.csr_matrix([[1, 1], [1, 2]])
        # First, with a simple graph, each integer entry in the adjacency
        # matrix is interpreted as the weight of a single edge in the graph.
        expected = nx.DiGraph()
        edges = [(0, 0), (0, 1), (1, 0)]
        expected.add_weighted_edges_from([(u, v, 1) for (u, v) in edges])
        expected.add_edge(1, 1, weight=2)
        actual = nx.from_scipy_sparse_matrix(A, parallel_edges=True,
                                             create_using=nx.DiGraph())
        assert_graphs_equal(actual, expected)
        actual = nx.from_scipy_sparse_matrix(A, parallel_edges=False,
                                             create_using=nx.DiGraph())
        assert_graphs_equal(actual, expected)
        # Now each integer entry in the adjacency matrix is interpreted as the
        # number of parallel edges in the graph if the appropriate keyword
        # argument is specified.
        edges = [(0, 0), (0, 1), (1, 0), (1, 1), (1, 1)]
        expected = nx.MultiDiGraph()
        expected.add_weighted_edges_from([(u, v, 1) for (u, v) in edges])
        actual = nx.from_scipy_sparse_matrix(A, parallel_edges=True,
                                             create_using=nx.MultiDiGraph())
        assert_graphs_equal(actual, expected)
        expected = nx.MultiDiGraph()
        expected.add_edges_from(set(edges), weight=1)
        # The sole self-loop (edge 0) on vertex 1 should have weight 2.
        expected[1][1][0]['weight'] = 2
        actual = nx.from_scipy_sparse_matrix(A, parallel_edges=False,
                                             create_using=nx.MultiDiGraph())
        assert_graphs_equal(actual, expected)
def submatrix_pull_via_networkx(matrix, node_array, directed=True):

    if directed:
        graph = nx.from_scipy_sparse_matrix(matrix, create_using=nx.DiGraph())
    else:
        graph = nx.from_scipy_sparse_matrix(matrix, create_using=nx.Graph())

    sub_graph = graph.subgraph(list(node_array))

    sub_matrix = nx.to_scipy_sparse_matrix(sub_graph, dtype=np.float64, format="csr")

    return sub_matrix
Exemplo n.º 4
0
    def configuration_model(self, return_copy=False):
        """ Reads AdjMatrixSequence Object and returns an edge randomized version.
            Result is written to txt file.
        """
        if self.is_directed:
            nx_creator = nx.DiGraph()
        else:
            nx_creator = nx.Graph()

        if return_copy:
            x = self[:]
        else:
            x = self

        # t_edges=[]
        for i in range(len(self)):
            print "configuration model: ", i
            graphlet = nx.from_scipy_sparse_matrix(x[i], create_using=nx_creator)
            graphlet = gwh.randomize_network(graphlet)
            x[i] = nx.to_scipy_sparse_matrix(graphlet, dtype="int")
            # for u,v in graphlet.edges():
            #    t_edges.append((u,v,i))

        # gwh.write_array(t_edges,"Configuration_model.txt")

        if return_copy:
            return x
        else:
            return
Exemplo n.º 5
0
def learnStructure(dataP, dataS, Pp, Ps, TAN= True):
    tempMatrix = [[0 for i in range(len(dataP))] for j in range(len(dataP))]
    for i in range(len(dataP)):
        for j in range(i+1, len(dataP)):
            temp = 0.0
            if np.corrcoef(dataP[i], dataP[j])[0][1] != 1.0:
                temp += Pp * math.log(1-((np.corrcoef(dataP[i], dataP[j])[0][1])**2))
            if np.corrcoef(dataS[i], dataS[j])[0][1] != 1.0:
                temp += Ps * math.log(1-((np.corrcoef(dataS[i], dataS[j])[0][1])**2))
            temp *= (0.5)
            tempMatrix[i][j] = temp
            #tempMatrix[j][i] = temp
    MaxG = nx.DiGraph()
    if TAN:
        G = nx.from_scipy_sparse_matrix(minimum_spanning_tree(csr_matrix(tempMatrix)))
        adjList = G.adj
        i = 0
        notReturnable = {}
        MaxG = getDirectedTree(adjList, notReturnable, MaxG, i)
    else:
        G = nx.Graph(np.asmatrix(tempMatrix))
        adjList = sorted([(u,v,d['weight']) for (u,v,d) in G.edges(data=True)], key=lambda x:x[2])
        i = 2
        MaxG = getDirectedGraph(adjList, MaxG, i)
    return MaxG
Exemplo n.º 6
0
    def identity_conversion(self, G, A, create_using):
        GG = nx.from_scipy_sparse_matrix(A, create_using=create_using)
        self.assert_equal(G, GG)

        GW = nx.to_networkx_graph(A, create_using=create_using)
        self.assert_equal(G, GW)

        GI = create_using.__class__(A)
        self.assert_equal(G, GI)

        ACSR = A.tocsr()
        GI = create_using.__class__(ACSR)
        self.assert_equal(G, GI)

        ACOO = A.tocoo()
        GI = create_using.__class__(ACOO)
        self.assert_equal(G, GI)

        ACSC = A.tocsc()
        GI = create_using.__class__(ACSC)
        self.assert_equal(G, GI)

        AD = A.todense()
        GI = create_using.__class__(AD)
        self.assert_equal(G, GI)

        AA = A.toarray()
        GI = create_using.__class__(AA)
        self.assert_equal(G, GI)
Exemplo n.º 7
0
def community(document):
	sentences = sent_tokenize(document) 
	bow_matrix = CountVectorizer(stop_words = 'english').fit_transform(sentences)
	normalized = TfidfTransformer().fit_transform(bow_matrix)
	similarity_graph = normalized * normalized.T
	nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
	sub_graphs = []
    #n gives the number of sub graphs
	edge_wts = nx_graph.edges(data=True)
	edge_wts.sort(key=lambda (a, b, dct): dct['weight'],reverse=True)
	k = 10 #number of sentence in summary
	G = nx.Graph()
	for i in nx_graph.nodes():
		G.add_node(i)
	for u,v,d in edge_wts:
		G.add_edge(u,v,d)
		sub_graphs = nx.connected_component_subgraphs(G)
		# print sub_graphs
		n = len(sub_graphs)
		if n == k:	break
	inSummary = [0 for i in range(len(sentences))]

	n = len(sub_graphs)
	for i in range(n):
		sen = [sentences[j] for j in (sub_graphs[i].nodes())]
		arr = [j for j in (sub_graphs[i].nodes())]
		scores = textrank(sen)
		# print (scores)
		# print (arr)
		for j in range(len(arr)):
			inSummary[arr[j]] = scores[j];
	# print inSummary
	summ = [(sentences[i],inSummary[i]) for i in range(len(inSummary)) ]
	# print summ[0]
	return summ
Exemplo n.º 8
0
def compute_clusters_statistic(test_statistic, proximity_matrix, verbose=False):
    """Given a test statistic for each unit and a boolean proximity
    matrix among units, compute the cluster statistic using the
    connected components graph algorithm. It works for sparse
    proximity matrices as well.

    Returns the clusters and their associated cluster statistic.
    """
    # Build a graph from the proximity matrix:
    if issparse(proximity_matrix):
        graph = from_scipy_sparse_matrix(proximity_matrix)
    else:
        graph = from_numpy_matrix(proximity_matrix)

    # Compute connected components:
    clusters = connected_components(graph)
    if verbose: print("Nr. of clusters: %s. Clusters sizes: %s" % (len(clusters), np.array([len(cl) for cl in clusters])))
    # Compute the cluster statistic:
    cluster_statistic = np.zeros(len(clusters))
    for i, cluster in enumerate(clusters):
        cluster_statistic[i] = test_statistic[cluster].sum()

    # final cleanup to prepare easy-to-use results:
    idx = np.argsort(cluster_statistic)[::-1]
    clusters = np.array([np.array(cl, dtype=np.int) for cl in clusters], dtype=np.object)[idx]
    if clusters[0].dtype == np.object: # THIS FIXES A NUMPY BUG (OR FEATURE?)
        # The bug: it seems not possible to create ndarray of type
        # np.object from arrays all of the *same* lenght and desired
        # dtype, i.e. dtype!=np.object. In this case the desired dtype
        # is automatically changed into np.object. Example:
        # array([array([1], dtype=int)], dtype=object)
        clusters = clusters.astype(np.int)

    cluster_statistic = cluster_statistic[idx]
    return clusters, cluster_statistic
Exemplo n.º 9
0
def textrank(document):
    pst = PunktSentenceTokenizer()
    sentences = pst.tokenize(document)

    # Bag of Words
    from sklearn.feature_extraction.text import CountVectorizer
    cv = CountVectorizer()
    bow_matrix = cv.fit_transform(sentences)

    from sklearn.feature_extraction.text import TfidfTransformer
    normalized_matrix = TfidfTransformer().fit_transform(bow_matrix)

    ## mirrored matrix where the rows and columns correspond to 
    ## sentences, and the elements describe how similar the
    ## sentences are. score 1 means sentences are exactly the same.
    similarity_graph = normalized_matrix * normalized_matrix.T
    similarity_graph.toarray()

    # PageRank
    import networkx as nx
    nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)

    ## mapping of sentence indices to scores. use them to associate
    ## back to the original sentences and sort them
    scores = nx.pagerank(nx_graph)
    ranked = sorted(((scores[i], s) for i,s in enumerate(sentences)), reverse=True)
    print ranked[0][1]
Exemplo n.º 10
0
def textrank(sentences):
    bow_matrix = CountVectorizer().fit_transform(sentences)
    normalized = TfidfTransformer().fit_transform(bow_matrix)
    similarity_graph = normalized * normalized.T
    nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
    scores = nx.pagerank(nx_graph)
    return sorted(((scores[i], i, s) for i, s in enumerate(sentences)), reverse=True)
Exemplo n.º 11
0
    def plot2d(self, title=None, domain=[-1, 1], codomain=[-1, 1], predict=True):
        f, ax = plt.subplots()

        x1 = np.linspace(*domain, 100)
        x2 = np.linspace(*codomain, 100)

        n_samples, n_features = self.X_.shape
        G = nx.from_scipy_sparse_matrix(self.A_)
        pos = {i: self.X_[i] for i in range(n_samples)}
        cm_sc = ListedColormap(["#AAAAAA", "#FF0000", "#0000FF"])

        if title is not None:
            ax.set_title(title)

        ax.set_xlabel("$x_1$")
        ax.set_ylabel("$x_2$")
        ax.set_xlim(domain)
        ax.set_ylim(codomain)

        nx.draw_networkx_nodes(G, pos, ax=ax, node_size=25, node_color=self.y_, cmap=cm_sc)

        if predict:
            xx1, xx2 = np.meshgrid(x1, x2)
            xfull = np.c_[xx1.ravel(), xx2.ravel()]
            z = self.predict(xfull).reshape(100, 100)

            levels = np.array([-1, 0, 1])
            cm_cs = plt.cm.RdYlBu

            if self.params["gamma_i"] != 0.0:
                nx.draw_networkx_edges(G, pos, ax=ax, edge_color="#AAAAAA")

            ax.contourf(xx1, xx2, z, levels, cmap=cm_cs, alpha=0.25)

        return (f, ax)
Exemplo n.º 12
0
def format_out_relations(relations, out_):
    """Format relations in the format they is detemined in parameter out_.

    Parameters
    ----------
    relations: scipy.sparse matrix
        the relations expressed in a sparse way.
    out_: optional, ['sparse', 'network', 'sp_relations']
        the output format we desired.

    Returns
    -------
    relations: decided format
        the relations expressed in the decided format.

    """

    if out_ == 'sparse':
        relations_o = relations
    elif out_ == 'network':
        relations_o = nx.from_scipy_sparse_matrix(relations)
    elif out_ == 'sp_relations':
        relations_o = RegionDistances(relations)
    elif out_ == 'list':
        relations_o = []
        for i in range(relations.shape[0]):
            relations_o.append(list(relations.getrow(i).nonzero()[0]))
    return relations_o
Exemplo n.º 13
0
def classify_samples(data, labels, unmarked_idxs,
                     sample_size, n_runs, n_clusters):
    unmarked_point_probs = {}
    all_idxs = range(len(unmarked_idxs))
    random.shuffle(all_idxs)
    keep_raw_idxs = sorted(all_idxs[:sample_size])
    delete_raw_idxs = sorted(all_idxs[sample_size:])
    keep_idxs, delete_idxs = (unmarked_idxs[keep_raw_idxs],
                              unmarked_idxs[delete_raw_idxs])

    bagging_graph = nx.from_scipy_sparse_matrix(data)
    bagging_graph.remove_nodes_from(delete_idxs)
    bagging_adj_matrix = nx.to_scipy_sparse_matrix(bagging_graph)
    bagging_labels = np.delete(labels, delete_idxs, 0)
    bagging_unmarked_idxs = np.where(
        bagging_labels[:, 0] == -1)[0]

    clf = TransductiveClassifier(n_runs, n_clusters)
    clf.fit(bagging_adj_matrix, bagging_labels)
    assert len(keep_idxs) == len(bagging_unmarked_idxs)
    for i, idx in enumerate(keep_idxs):
        unmarked_point_probs[idx] = clf.transduction_[
            bagging_unmarked_idxs[i]]

    return unmarked_point_probs
Exemplo n.º 14
0
def plot_subgraph_links(sparse_m, query, degree=0, layout="std", graph=None):

    cond = np.where(query)[0]

    if graph is None:
        graph = nx.from_scipy_sparse_matrix(sparse_m)

    if degree == 0:
        sub1 = cond
        node_color = "r"
    elif degree == 1:
        sub1 = list(set(cond) | set(
            compute_sub_adj(sparse_m, cond)))
 #       print(sub1)
        node_color = [("r" if (n in cond) else "b") for n in sub1]
 #       print(node_color)
    elif degree == 2:
        sub0 = set(cond) | set(compute_sub_adj(sparse_m, cond))
        sub1 = list(sub0 | set(compute_sub_adj(sparse_m, list(sub0))))
        node_color = [("r" if (n in cond) else "b" if (
            n in sub0) else "y") for n in sub1]

    renderer[layout](
        graph.subgraph(sub1),
        nodelist=list(sub1),
        node_color=node_color,
        alpha=0.5,
        labels={n: str(n) for n in sub1})
Exemplo n.º 15
0
def draw_adjacency_graph (A,
    node_color=[], 
    size=10,
    layout='graphviz', 
    prog = 'neato',
    node_size=80):

    graph = nx.from_scipy_sparse_matrix(A)

    plt.figure(figsize=(size,size))
    plt.grid(False)
    plt.axis('off')

    if layout == 'graphviz':
        pos = nx.graphviz_layout(graph, prog = prog)
    else:
        pos = nx.spring_layout(graph)

    if not node_color:
        node_color='gray'
    nx.draw_networkx_nodes(graph, pos,
                           node_color = node_color, 
                           alpha = 0.6, 
                           node_size = node_size, 
                           cmap = plt.get_cmap('autumn'))
    nx.draw_networkx_edges(graph, pos, alpha = 0.5)
    plt.show()
Exemplo n.º 16
0
def cover(socp_data, N):
    if not settings.paths['graclus']:
        raise Exception(
            "Please provide a path to graclus: settings.paths['graculus'] = PATH.")

    """stacks the socp data and partitions it into N
    local dicts describing constraints R <= s"""
    n = socp_data['c'].shape[0]

    # form the Laplacian and use graculus to partition
    L = form_laplacian(socp_data)
    graph = nx.from_scipy_sparse_matrix(L)

    d = nx.convert.to_dict_of_lists(graph)

    edgepath = "graclus.edgelist"
    with open(edgepath, "w") as f:
        f.write("%d %d\n" % (graph.number_of_nodes(), graph.number_of_edges()))
        for k, v in d.iteritems():
            f.write("%d %s\n" %
                    (k + 1, ' '.join(map(lambda x: str(x + 1), v))))

    import subprocess
    outpath = "graclus.edgelist.part.%d" % N
    proc = subprocess.Popen([settings.paths['graclus'], edgepath, str(N)])
    proc.wait()

    lines = open(outpath, "r").readlines()

    part_vert = []
    for l in lines:
        part_vert.append(int(l.strip()))

    return part_vert[n:]
Exemplo n.º 17
0
def draw_adjacency_graph(adjacency_matrix,
                         node_color=None,
                         size=10,
                         layout='graphviz',
                         prog='neato',
                         node_size=80,
                         colormap='autumn'):
    """draw_adjacency_graph."""
    graph = nx.from_scipy_sparse_matrix(adjacency_matrix)

    plt.figure(figsize=(size, size))
    plt.grid(False)
    plt.axis('off')

    if layout == 'graphviz':
        pos = nx.graphviz_layout(graph, prog=prog)
    else:
        pos = nx.spring_layout(graph)

    if len(node_color) == 0:
        node_color = 'gray'
    nx.draw_networkx_nodes(graph, pos,
                           node_color=node_color,
                           alpha=0.6,
                           node_size=node_size,
                           cmap=plt.get_cmap(colormap))
    nx.draw_networkx_edges(graph, pos, alpha=0.5)
    plt.show()
Exemplo n.º 18
0
def find_min_spanning_tree(A):
	"""
		Input:
			A : Adjecency matrix in scipy.sparse format.
		Output:
			T : Minimum spanning tree.
			run_time : Total runtime to find minimum spanning tree 

	"""
	# Record start time.
	start = time.time()

	# Check if graph is pre-processed, if yes then don't process it again.
	if os.path.exists('../Data/dcg_graph.json'):
		with open('../Data/dcg_graph.json') as data:
			d = json.load(data)
		G = json_graph.node_link_graph(d)

	# If graph is not preprocessed then convert it to a Graph and save it to a JSON file.
	else:
		G = from_scipy_sparse_matrix(A)
		data = json_graph.node_link_data(G)
		with open('../Data/dcg_graph.json', 'w') as outfile:
			json.dump(data, outfile)

	# Find MST.
	T = minimum_spanning_tree(G)

	#Record total Runtime
	run_time = time.time()-start
	return T, run_time
Exemplo n.º 19
0
    def get_key_sentences(self, n=5):
        '''
        Uses a simple implementation of TextRank to extract the top N sentences
        from a document.

        Sources:
        - Original paper: http://acl.ldc.upenn.edu/acl2004/emnlp/pdf/Mihalcea.pdf
        - Super useful blog post: http://joshbohde.com/blog/document-summarization
        - Wikipedia: http://en.wikipedia.org/wiki/Automatic_summarization#Unsupervised_keyphrase_extraction:_TextRank
        '''
        # Tokenize the document into sentences. More NLP preprocesing should also happen here. 
        sentence_tokenizer = PunktSentenceTokenizer()
        sentences = sentence_tokenizer.tokenize(self.doc)

        # Calculate word counts and TFIDF vectors
        word_counts = CountVectorizer(min_df=0).fit_transform(sentences)
        normalized = TfidfTransformer().fit_transform(word_counts) 

        # Normalized graph * its transpose yields a sentence-level similarity matrix
        similarity_graph = normalized * normalized.T
     
        nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
        scores = nx.pagerank(nx_graph)
        return sorted(((scores[i],s) for i,s in enumerate(sentences)),
                      reverse=True)[n]
Exemplo n.º 20
0
def test_graph_degree():
    "Graph: Graph Degree"
    A = rand_dm(25, 0.5)
    deg = graph_degree(A.data)
    G = nx.from_scipy_sparse_matrix(A.data)
    nx_deg = G.degree()
    nx_deg = array([nx_deg[k] for k in range(25)])
    assert_equal((deg - nx_deg).all(), 0)
Exemplo n.º 21
0
 def __test_save_and_load_graph_npz(self, x):
     '''Test save and load a Networkx DiGraph in npz format with np-array wrapping.'''
     out_file = tempfile.TemporaryFile()
     np.savez(out_file, x=np.array([nx.to_scipy_sparse_matrix(x)]))
     out_file.seek(0) # Only needed here to simulate closing & reopening file
     x2 = np.load(out_file)
     y = nx.from_scipy_sparse_matrix(x2['x'][0], nx.DiGraph())
     assert_equal(x.nodes(), y.nodes(), 'Saving and loading did not restore the original object')
     assert_equal(x.edges(), y.edges(), 'Saving and loading did not restore the original object')
Exemplo n.º 22
0
def text_rank4(content):
    sents = list(cut_sentence(content))
    vect = TfidfVectorizer(min_df=1,tokenizer=Tokenize)
    tfidf = vect.fit_transform(sents)
    tfidf_graph = tfidf*tfidf.T
    nx_graph = nx.from_scipy_sparse_matrix(tfidf_graph)
    scores = nx.pagerank(nx_graph)
    res = sorted(((scores[i],i) for i,s in enumerate(sents)), reverse=True)
    top_n_summary = [sents[i] for _,i in sorted(res[:3])]
    print 'text_rank4', u'。 '.join(top_n_summary).replace('\r','').replace('\n','')+u'。'
Exemplo n.º 23
0
 def to_networkx(self, directed=None):
   '''Converts this Graph object to a networkx-compatible object.
   Requires the networkx library.'''
   import networkx as nx
   directed = directed if directed is not None else self.is_directed()
   cls = nx.DiGraph if directed else nx.Graph
   adj = self.matrix()
   if ss.issparse(adj):
     return nx.from_scipy_sparse_matrix(adj, create_using=cls())
   return nx.from_numpy_matrix(adj, create_using=cls())
Exemplo n.º 24
0
def make_json_graph(msm, request):
    c = float(request.get_argument('cutoff'))
    e = str(request.get_argument('resize'))
    t = sparse.csr_matrix(msm.transmat_.copy())
    t.data[t.data < c] = 0.0
    t.eliminate_zeros()
    G = nx.from_scipy_sparse_matrix(t, create_using=nx.DiGraph())
    metric = resize[e](G, msm, t)
    nx.set_node_attributes(G, 'size', metric)
    G.remove_nodes_from(nx.isolates(G))
    return json_graph.node_link_data(G)
Exemplo n.º 25
0
    def test_symmetric(self):
        """Tests that a symmetric matrix has edges added only once to an
        undirected multigraph when using
        :func:`networkx.from_scipy_sparse_matrix`.

        """
        A = sparse.csr_matrix([[0, 1], [1, 0]])
        G = nx.from_scipy_sparse_matrix(A, create_using=nx.MultiGraph())
        expected = nx.MultiGraph()
        expected.add_edge(0, 1, weight=1)
        assert_graphs_equal(G, expected)
Exemplo n.º 26
0
 def __init__(self,adj_matrix=None,file_name=None): 
     if adj_matrix== None:        
         self.adj_list, self.adj_matrix, self.station_lookup, self.index_lookup, \
         self.num_stations = self.__readInput(file_name)
         self.initializeGraph()
     else :
         self.adj_matrix=adj_matrix
         self.adj_list = self.adj_matrix_to_list(self.adj_matrix)
         self.graph_obj = nx.from_scipy_sparse_matrix(csr_matrix(adj_matrix))
         self.num_stations = len(adj_matrix)
         self.testing = True
Exemplo n.º 27
0
def cover(socp_data, N):
    """stacks the socp data and partitions it into N
    local dicts describing constraints R <= s"""
    n = socp_data['c'].shape[0]

    # form the Laplacian and use pymetis to partition
    L = form_laplacian(socp_data)
    graph = nx.from_scipy_sparse_matrix(L)
    cuts, part_vert = pm.part_graph(N, graph)

    return part_vert[n:]
Exemplo n.º 28
0
def remove_small_components(full_adj_matrix, labels, min_nodes):
    ## get rid of components with fewer than min_nodes nodes
    g = nx.from_scipy_sparse_matrix(full_adj_matrix)
    cpt_nodes = nx.connected_components(g)
    nodes = []
    for cpt in cpt_nodes:
        if len(cpt) >= min_nodes:
            nodes.extend(cpt)
    subgraph = g.subgraph(nodes)
    return (nx.to_scipy_sparse_matrix(subgraph, format="csc"),
            labels[subgraph.nodes()])
Exemplo n.º 29
0
	def textrank(self, document):
	    sentence_tokenizer = PunktSentenceTokenizer()
	    sentences = sentence_tokenizer.tokenize(document)

	    bow_matrix = CountVectorizer().fit_transform(sentences)
	    normalized = TfidfTransformer().fit_transform(bow_matrix)

	    similarity_graph = normalized * normalized.T

	    nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
	    scores = nx.pagerank(nx_graph)
	    return sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
Exemplo n.º 30
0
def rank_sentences(sentences):
    simple_sent = [preprocess(s) for s in sentences]

    sim_mat = TfidfVectorizer().fit_transform(simple_sent)
    nx_graph = nx.from_scipy_sparse_matrix(sim_mat * sim_mat.T)
    scores = nx.pagerank(nx_graph)

    results = ((scores[i], i, s) for i,s in enumerate(sentences))

    results = sorted(results, key=lambda result: result[1])
    results = sorted(results, key=lambda result: result[0], reverse=True)

    return results
Exemplo n.º 31
0
Arquivo: seed.py Projeto: jpgard/ffn
    def refresh_seeds(self, n_trees=1) -> np.ndarray:
        """Fetch a new set of tip seeds from the current canvas."""
        new_seeds = list()
        logging.info("TipTracerSeedPolicy skeletonizing and extracting seeds")
        # Transform logits to probabilities, apply threshold, and skeletonize to
        # extract the locations of leaf nodes ("tips")
        c_t = expit(np.squeeze(self.canvas.seed))
        c_t = np.nan_to_num(c_t)
        c_t = (c_t >= self.skeletonization_threshold).astype(np.uint8)
        s_t = morphology.skeletonize(c_t)
        self._check_save_skeleton(s_t)
        g_t, c_t, _ = skeleton_to_csgraph(s_t)
        g_t = nx.from_scipy_sparse_matrix(g_t)
        # Get connected components and extract leaf nodes, sorting from large to small.
        subgraphs = sorted(nx.connected_components(g_t), key=len, reverse=True)
        for subgraph_nodes in subgraphs[:n_trees]:

            leaf_node_ids = [
                node_id for node_id, node_degree in g_t.degree(subgraph_nodes)
                if node_degree == 1
            ]
            # Produce a nested list of [y, x] coordinates of leaf nodes in this subgraph.
            leaf_node_yx = c_t[leaf_node_ids, :].astype(int).tolist()
            new_seeds.extend(leaf_node_yx)

        # Add z-coordinate to new_seeds and append to list of seed coords.
        new_seeds = np.hstack((np.zeros((len(new_seeds), 1),
                                        dtype=int), new_seeds,
                               np.full((len(new_seeds), 1),
                                       self.idx,
                                       dtype=int)))

        # Compute the unique union of existing coords and new seeds (do not re-seed in
        # locations which have already been seeded.

        coord_update = np.vstack((self.coords, new_seeds))
        coord_update = np.unique(coord_update, axis=0)
        coord_update = coord_update[np.argsort(coord_update[:, 3])]
        self.coords = coord_update
    def gerarGrafoNx(nome):
        """
        Parameters
        ----------
        nome: str
            Nome do arquivo .mkt que contem a matriz do grafo 
            que se deseja inicializar um objeto da classe
            da biblioteca NetworkX
        Returns
        -------
        grafo
            Objeto da classe NetworkX que representa um grafo
        """
        matriz = mmread(nome + ".mtx")

        simetrica = ReducaoLarguraBanda.ehSimetrica(matriz)
        if (not simetrica):
            matriz += matriz.transpose()
        # print(matriz)

        grafo = nx.from_scipy_sparse_matrix(matriz)
        return grafo, simetrica
Exemplo n.º 33
0
def init_setup():
    data = Dataset(root='/tmp/', name=args.dataset, setting='nettack')
    injecting_nodes(data)

    adj, features, labels = data.adj, data.features, data.labels

    StaticGraph.graph = nx.from_scipy_sparse_matrix(adj)
    dict_of_lists = nx.to_dict_of_lists(StaticGraph.graph)

    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    device = torch.device('cuda') if args.ctx == 'gpu' else 'cpu'

    # gray box setting
    adj, features, labels = preprocess(adj,
                                       features,
                                       labels,
                                       preprocess_adj=False,
                                       sparse=True,
                                       device=device)
    # Setup victim model
    victim_model = GCN(nfeat=features.shape[1],
                       nclass=labels.max().item() + 1,
                       nhid=16,
                       dropout=0.5,
                       weight_decay=5e-4,
                       device=device)

    victim_model = victim_model.to(device)
    victim_model.fit(features, adj, labels, idx_train, idx_val)
    setattr(victim_model, 'norm_tool',
            GraphNormTool(normalize=True, gm='gcn', device=device))

    output = victim_model.predict(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))

    return features, labels, idx_train, idx_val, idx_test, victim_model, dict_of_lists, adj
Exemplo n.º 34
0
def textrank_text_summarizer(documents,
                             num_sentences=2,
                             feature_type='frequency'):

    vec, dt_matrix = build_feature_matrix(norm_sentences, feature_type='tfidf')
    similarity_matrix = (dt_matrix * dt_matrix.T)

    similarity_graph = networkx.from_scipy_sparse_matrix(similarity_matrix)
    scores = networkx.pagerank(similarity_graph)

    ranked_sentences = sorted(
        ((score, index) for index, score in scores.items()), reverse=True)

    top_sentence_indices = [
        ranked_sentences[index][1] for index in range(num_sentences)
    ]
    top_sentence_indices.sort()
    s = ''
    for index in top_sentence_indices:
        s = s + ' ' + sentences[index]
        print(sentences[index])
    return s
Exemplo n.º 35
0
    def graph_from_scipy(x: ScipyGraph, **props) -> NetworkXGraph:
        from ..python.types import dtype_casting

        aprops = ScipyGraph.Type.compute_abstract_properties(
            x, {
                "is_directed", "edge_type", "edge_dtype", "node_type",
                "node_dtype"
            })

        nx_graph = nx.from_scipy_sparse_matrix(
            x.value,
            create_using=nx.DiGraph if aprops["is_directed"] else nx.Graph,
            edge_attribute="weight",
        )

        if aprops["edge_type"] == "set":
            # Remove weight attribute
            for _, _, attr in nx_graph.edges(data=True):
                del attr["weight"]
        else:
            caster = dtype_casting[aprops["edge_dtype"]]
            for _, _, attr in nx_graph.edges(data=True):
                attr["weight"] = caster(attr["weight"])

        is_sequential_node_list = (x.node_list == np.arange(len(
            x.node_list))).all()
        if not is_sequential_node_list:
            pos2id = dict(enumerate(x.node_list))
            nx.relabel_nodes(nx_graph, pos2id, False)

        if x.node_vals is not None:
            caster = dtype_casting[aprops["node_dtype"]]
            node_weights = {
                idx: caster(val)
                for idx, val in zip(x.node_list, x.node_vals)
            }
            nx.set_node_attributes(nx_graph, node_weights, name="weight")

        return NetworkXGraph(nx_graph, aprops=aprops)
Exemplo n.º 36
0
def subgraph_extraction_labeling(ind,
                                 A,
                                 h=1,
                                 max_nodes_per_hop=None,
                                 node_information=None):
    # extract the h-hop enclosing subgraph around link 'ind'
    dist = 0
    nodes = set([ind[0], ind[1]])
    visited = set([ind[0], ind[1]])
    fringe = set([ind[0], ind[1]])
    nodes_dist = [0, 0]
    for dist in range(1, h + 1):
        fringe = neighbors(fringe, A)
        fringe = fringe - visited
        visited = visited.union(fringe)
        if max_nodes_per_hop is not None:
            if max_nodes_per_hop < len(fringe):
                fringe = random.sample(fringe, max_nodes_per_hop)
        if len(fringe) == 0:
            break
        nodes = nodes.union(fringe)
        nodes_dist += [dist] * len(fringe)
    # move target nodes to top
    nodes.remove(ind[0])
    nodes.remove(ind[1])
    nodes = [ind[0], ind[1]] + list(nodes)
    subgraph = A[nodes, :][:, nodes]
    # apply node-labeling
    labels = node_label(subgraph)
    # get node features
    features = None
    if node_information is not None:
        features = node_information[nodes]
    # construct nx graph
    g = nx.from_scipy_sparse_matrix(subgraph)
    # remove link between target nodes
    if g.has_edge(0, 1):
        g.remove_edge(0, 1)
    return g, labels.tolist(), features
Exemplo n.º 37
0
 def load_data_GraphSaint(self):
     temp_data = self.load_m()
     train_data = self.process_graph_data(*temp_data)
     adj_full, adj_train, feat_full, class_arr, role = train_data
     adj_full = adj_full.astype(np.int32)
     # adj_train = adj_train.astype(np.int32)
     # adj_full_norm = adj_norm(adj_full)
     self._num_classes = class_arr.shape[1]
     # adj = _coo_scipy2torch(adj_full_norm.tocoo())
     print("create graph")
     t = time.time()
     graph = nx.from_scipy_sparse_matrix(adj_full)
     #convert the graph to the LOL format
     undirected_graph = lol.LolGraph(directed=False, weighted=False)
     undirected_graph.convert(list(graph.edges))
     self._g = undirected_graph
     print("took", time.time() - t)
     # nx.write_edgelist(self._g, "amazon.edgelist")
     # self._labels = torch.tensor(np.argwhere(class_arr==1).T[1])
     self._labels = torch.tensor(class_arr)
     self._X = torch.tensor(feat_full).to(dtype=torch.float)
     self.in_features = feat_full.shape[1]
Exemplo n.º 38
0
def order_points(points):
    """
    https://stackoverflow.com/questions/37742358/sorting-points-to-form-a-continuous-line
    """
    
    clf = NearestNeighbors(2).fit(points) #calc nearest neighbour
    G = clf.kneighbors_graph() #create sparse matrix
    T = nx.from_scipy_sparse_matrix(G) #construct graph from sparse matrix
    # order paths
    paths = [list(nx.dfs_preorder_nodes(T, i)) for i in range(len(points))]
    mindist = np.inf
    minidx = 0
    for i in range(len(points)):
        p = paths[i]           # order of nodes
        ordered = points[p]    # ordered nodes
        # find cost of that order by the sum of euclidean distances between points (i) and (i+1)
        cost = (((ordered[:-1] - ordered[1:])**2).sum(1)).sum()
        if cost < mindist:
            mindist = cost
            minidx = i
    
    return paths[minidx]
Exemplo n.º 39
0
def summarize(text):
    print("Summary...")
    sentences_token = sent_tokenize(text)

    # Feature Extraction
    vectorizer = CountVectorizer(min_df=1, decode_error='replace')
    sent_bow = vectorizer.fit_transform(sentences_token)
    transformer = TfidfTransformer(norm='l2', smooth_idf=True, use_idf=True)
    sent_tfidf = transformer.fit_transform(sent_bow)

    similarity_graph = sent_tfidf * sent_tfidf.T

    nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
    scores = nx.pagerank(nx_graph)
    text_rank_graph = sorted(
        ((scores[i], s) for i, s in enumerate(sentences_token)), reverse=True)
    #      print(scores)
    number_of_sents = int(0.4 * len(text_rank_graph))
    del text_rank_graph[number_of_sents:]
    summary = ' '.join(word for _, word in text_rank_graph)

    return summary
Exemplo n.º 40
0
    def make_ConformationalNetwork(self):

        neigh = NearestNeighbors(radius=1, metric='chebyshev')
        neigh.fit(self.ijk_centers)
        net_centers = nx.from_scipy_sparse_matrix(
            neigh.radius_neighbors_graph())
        del (neigh)

        net_rotations = nx.Graph()
        net_rotations.add_nodes_from(range(self.num_rotations))
        for ii in range(self.num_rotations):
            neighs = hp.get_all_neighbours(self.nside, ii, nest=False)
            neighs[neighs == -1] = 0
            net_rotations.add_edges_from(
                zip(np.full(neighs.shape[0], ii), neighs))
        del (neighs)

        net = nx.cartesian_product(net_centers, net_rotations)

        del (net_rotations, net_centers)

        return net
def get_highest_pagerank_scores(fileid, n=5):

    with open(fileid, encoding="utf-8") as f:
        text = f.read()
        sentences = re.findall(r'.*?\n', text[0:10000], flags=re.DOTALL)

    vectorizer = CountVectorizer()
    matrix = vectorizer.fit_transform(sentences)

    transformer = TfidfTransformer()
    normalized = transformer.fit_transform(matrix)

    similarity_graph = normalized * normalized.T
    nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)

    scores = nx.pagerank(nx_graph)

    index_scores = scores.items()
    sorted_scores = sorted(index_scores, key=lambda x: x[1], reverse=True)

    for index, score in sorted_scores[:n]:
        print(score, sentences[index])
def calculate_comment_tree_hirsch(comment_tree):
    comment_tree_nx = nx.from_scipy_sparse_matrix(comment_tree,
                                                  create_using=nx.Graph())

    if len(comment_tree_nx) == 0:
        comment_tree_hirsch = 0.0
    else:
        node_to_depth = nx.shortest_path_length(comment_tree_nx, 0)

        depth_to_nodecount = collections.defaultdict(int)

        for k, v in node_to_depth.items():
            depth_to_nodecount[v] += 1

        comment_tree_hirsch = max(node_to_depth.values())
        while True:
            if depth_to_nodecount[comment_tree_hirsch] >= comment_tree_hirsch:
                break
            else:
                comment_tree_hirsch -= 1

    return comment_tree_hirsch
Exemplo n.º 43
0
    def generate_summary(self, sents):

        cv = CountVectorizer(ngram_range=(2, 2))
        bow_matrix = cv.fit_transform(sents)

        normalized = TfidfTransformer().fit_transform(bow_matrix)

        similarity_graph = normalized * normalized.T

        nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
        # print "graph built"
        scores = nx.pagerank(nx_graph)
        text_rank_graph = sorted(((scores[i], s) for i, s in enumerate(sents)),
                                 reverse=False)
        # print text_rank_graph
        number_of_nodes = int(0.3 * len(text_rank_graph))

        if number_of_nodes < 3:
            number_of_nodes = 3

        del text_rank_graph[number_of_nodes:]
        summaries = {}
        removed_sentences = []
        for _, sentence in text_rank_graph:

            for index, document in enumerate(self.documents):
                if sentence in document:
                    found = True
                    if index in summaries:
                        sentences = summaries[index]
                        sentences.append(sentence.strip())
                        summaries[index] = sentences
                    else:
                        summaries[index] = [sentence.strip()]

        # summary = ' '.join(sentence.strip() for _,sentence in text_rank_graph)
        # print summary
        return summaries, removed_sentences
def draw_clustered_mlp(weights_path,
                       clustering_result,
                       n_clusters=4,
                       is_first_square=True,
                       ax=None):
    """Draw MLP with its spectral clustering."""

    weights = load_weights(weights_path)
    layer_widths = extract_layer_widths(weights)
    if 'cnn' in str(
            weights_path).lower():  # if cnn, omit input layer and fc layers
        is_first_square = False
        cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str(
            weights_path).lower() else CNN_MODEL_PARAMS
        n_conv_layers = len(cnn_params['conv'])
        weights = weights[1:n_conv_layers]
        layer_widths = layer_widths[1:n_conv_layers + 1]

    labels, metrics = clustering_result

    G = nx.from_scipy_sparse_matrix(weights_to_graph(weights))

    pos = set_nodes_positions(G.nodes, layer_widths, labels, is_first_square)

    color_mapper = get_color_mapper(n_clusters)

    color_map = [color_mapper[label] for label in labels]

    if ax is None:
        _, ax = plt.subplots(1)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        nx.draw(G, pos=pos, node_color=color_map, width=0, node_size=10, ax=ax)

    draw_metrics(metrics, ax)

    return ax, labels, metrics
Exemplo n.º 45
0
def load_graph(path, name, is_weighted):
    """
    Data loader assuming the format is a text file with columns of : target source (e.g. 1 2) or target source weight
    (e.g. 1 2 0.34). If you have a different format, you may want to create your own data loader.
    :param path: The path to the edgelist file
    :param name: The name of te dataset
    :param is_weighted: True if the graph is weighted, False otherwise.
    :return: A Directed networkx graph with an attribute of "weight" for each edge.
    """
    if name == "Yelp":
        with open(os.path.join(path, "yelp_data.p"), 'rb') as f:
            G = pickle.load(f)
        G = add_weights(G)
    elif name == "Youtube" or name == "Flickr":
        inputFile = os.path.join(path, "{}.mat".format(name))
        features_struct = scipy.io.loadmat(inputFile)
        data = scipy.sparse.csr_matrix(features_struct["network"])
        G = nx.from_scipy_sparse_matrix(data)
        # no need to add weights, already has
    else:
        if is_weighted:
            G = nx.read_weighted_edgelist(os.path.join(path, name + ".txt"),
                                          create_using=nx.DiGraph(),
                                          delimiter=",")
            if G.number_of_nodes() == 0:
                G = nx.read_weighted_edgelist(os.path.join(
                    path, name + ".txt"),
                                              create_using=nx.DiGraph())
        else:
            G = nx.read_edgelist(os.path.join(path, name + ".txt"),
                                 create_using=nx.DiGraph(),
                                 delimiter=",")
            if G.number_of_nodes() == 0:
                G = nx.read_edgelist(os.path.join(path, name + ".txt"),
                                     create_using=nx.DiGraph())
            # put weights equal to 1
            G = add_weights(G)
    return G
Exemplo n.º 46
0
    def summarize(document):
        sentences = sent_tokenize(document)
        bow_matrix = CountVectorizer(
            stop_words='english').fit_transform(sentences)
        normalized = TfidfTransformer().fit_transform(bow_matrix)
        similarity_graph = normalized * normalized.T
        nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
        sub_graphs = []
        #n gives the number of sub graphs
        edge_wts = nx_graph.edges(data=True)
        edge_wts.sort(key=lambda (a, b, dct): dct['weight'], reverse=True)
        k = 10  #number of sentence in summary
        G = nx.Graph()
        for i in nx_graph.nodes():
            G.add_node(i)
        for u, v, d in edge_wts:
            G.add_edge(u, v, d)
            sub_graphs = nx.connected_component_subgraphs(G)
            # print sub_graphs
            n = len(sub_graphs)
            if n == k: break
        inSummary = [0 for i in range(len(sentences))]

        n = len(sub_graphs)
        for i in range(n):
            sen = [sentences[j] for j in (sub_graphs[i].nodes())]
            arr = [j for j in (sub_graphs[i].nodes())]
            scores = CommunitySummarizer.textrank(sen)
            # print (scores)
            # print (arr)
            for j in range(len(arr)):
                inSummary[arr[j]] = scores[j]
        # print inSummary
        summ = [
            sentences[i] for i in range(len(inSummary)) if inSummary[i] >= 1
        ]
        # print len(summ)
        return summ
Exemplo n.º 47
0
def get_n2v_embedding(graph, binary):

    ## construct the embedding and return the binary..
    #./node2vec -i:graph/karate.edgelist -o:emb/karate.emb -l:3 -d:24 -p:0.3 -dr -v

    ## get the graph..
    G = nx.from_scipy_sparse_matrix(graph, edge_attribute='weight')
    for e in G.edges():
        if e[0] == e[1]:
            G.remove_edge(e[0], e[0])

    if not os.path.exists("tmp"):
        os.makedirs("tmp")

    tmp_graph = "tmp/tmpgraph.edges"
    out_graph = "tmp/tmpgraph.emb"

    number_of_nodes = len(G.nodes())
    number_of_edges = len(G.edges())

    print("Graph has {} edges and {} nodes.".format(number_of_edges,
                                                    number_of_nodes))
    ## n e + for loop..
    f = open(tmp_graph, "w+")
    #f.write(str(number_of_nodes)+" "+str(number_of_edges)+"\n")
    for e in G.edges(data=True):
        f.write(str(e[0]) + " " + str(e[1]) + " " + str(e[2]['weight']) + "\n")
    f.close()

    print("Starting graphlet counts..")
    call([
        binary, "-i:" + tmp_graph, "-o:" + out_graph, "-l:3", "-d:128",
        "-p:0.3", "-dr", "-v"
    ])
    matf = np.loadtxt(out_graph, delimiter=" ", skiprows=1)
    call(["rm", "-rf", "tmp"])
    print("Finished n2v:", matf.shape)
    return matf
def draw_clustered_net_imagenet(clustering_results, n_clusters=10):

    fig, ax = plt.subplots(figsize=(20, 30))

    fig.suptitle(clustering_results['network'])

    conv_connections = clustering_results['conv_connections']
    layer_widths = [cc[0]['weights'].shape[0] for cc in conv_connections[1:]]
    dense_sizes = get_dense_sizes(conv_connections)
    layer_widths.extend(list(dense_sizes.values()))

    labels = clustering_results['labels']
    adj_mat = connections_to_graph_imagenet(conv_connections)
    G = nx.from_scipy_sparse_matrix(adj_mat)
    pos = set_nodes_positions(G.nodes,
                              layer_widths,
                              labels,
                              is_first_square=False,
                              dx=2,
                              dy=2,
                              jitter=0)

    color_mapper = get_color_mapper(n_clusters)
    color_map = [color_mapper[label] for label in labels]

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        nx.draw(G, pos=pos, node_color=color_map, width=0, node_size=4, ax=ax)

    metrics = {
        k: clustering_results[k]
        for k in [
            'ncut', 'ave_in_out', 'n_samples', 'mean', 'stdev', 'z_score',
            'percentile'
        ]
    }

    draw_metrics(metrics, ax)
Exemplo n.º 49
0
def text_rank(sentence_list, alpha=0.85):
    corpus = []
    for sentence in sentence_list:
        document = ' '.join(sentence)
        corpus.append(document)
    count_vec = CountVectorizer()
    # 计算个词语出现的次数
    X = count_vec.fit_transform(corpus)
    # 类调用
    transformer = TfidfTransformer()
    # print(transformer)
    # 将词频矩阵X统计成TF-IDF值
    tf_idf_vec = transformer.fit_transform(X)
    similarity = nx.from_scipy_sparse_matrix(tf_idf_vec * tf_idf_vec.T)

    scores = nx.pagerank(similarity, alpha=alpha)

    vectors = []
    tf_idf_vec = tf_idf_vec.toarray()
    scores_val = list(scores.values())
    for i in range(len(scores_val)):
        vectors.append(tf_idf_vec[i] * scores_val[i])
    return np.array(vectors)
Exemplo n.º 50
0
def to_networkx(G, directed=True):
    """Convert Scipy sparse matrix to networkx graph to

    Parameters
    ----------
    G : Scipy sparse matrix
        a Scipy sparse matrix
    directed : bool, optional
        whether convert to a directed graph, by default None,
        if checks if the graph is directed and convert it to propert type

    Returns
    -------
    networkx graph  
        a netwotkx graph
    """
    if directed is None:
        directed = is_directed(G)
    if directed:
        create_using = nx.DiGraph
    else:
        create_using = nx.Graph
    return nx.from_scipy_sparse_matrix(G, create_using=create_using)
Exemplo n.º 51
0
    def _score_generator(self, sentences, sentence_vectors):
        sentence_count = len(sentences)
        similarity_matrix = dok_matrix((sentence_count, sentence_count),
                                       dtype=np.float32)
        for i in range(len(sentences)):
            for j in range(len(sentences)):
                if i != j and len(sentence_vectors) > i:
                    value = cosine_similarity(
                        sentence_vectors[i].reshape(1, 100),
                        sentence_vectors[j].reshape(1, 100))[0, 0]
                    similarity_matrix[i, j] = value

        # Before proceeding further, let’s convert the similarity matrix sim_mat into a graph. The nodes of this graph will
        # represent the sentences and the edges will represent the similarity scores between the sentences. On this graph,
        # we will apply the PageRank algorithm to arrive at the sentence rankings.
        try:
            nx_graph = nx.from_scipy_sparse_matrix(similarity_matrix)
            scores = nx.pagerank(nx_graph, max_iter=200)
        except Exception as e:
            log.getLogger().error(str(e))
            return []

        return scores
Exemplo n.º 52
0
def get_text_summarization_text_rank(text, num_sentences=3, feature_type='tfidf'):
    # parse and normalize document
    normalized_sentences = normalize_document(text, lemmatize=False, expand_cont=False, remove_special_char=False,
                                              remove_stop_words=False, lower_case=False)
    # construct weighted document term matrix
    vec, dt_matrix = build_feature_matrix(normalized_sentences, feature_type=feature_type)
    # construct the document similarity matrix
    similarity_matrix = (dt_matrix * dt_matrix.T)
    # build the similarity graph
    similarity_graph = networkx.from_scipy_sparse_matrix(similarity_matrix)
    # compute pagerank scores for all the sentences
    scores = networkx.pagerank(similarity_graph)
    # rank sentences based on their scores
    ranked_sentences = sorted(((score, index) for index, score in scores.items()), reverse=True)
    # get the top sentence indices for our summary
    top_sentence_indices = [ranked_sentences[index][1] for index in range(num_sentences)]
    top_sentence_indices.sort()
    # construct the document summary
    summary_sentences = []
    for index in top_sentence_indices:
        summary_sentences.append(normalized_sentences[index])

    return summary_sentences
Exemplo n.º 53
0
def textRank(document):
    sentence_tokenizer = PunktSentenceTokenizer()
    sentences = sentence_tokenizer.tokenize(document)

    bow_matrix = CountVectorizer().fit_transform(sentences)
    normalized = TfidfTransformer().fit_transform(bow_matrix)

    similarity_graph = normalized * normalized.T

    nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
    scores = nx.pagerank(nx_graph)
    text_rank_graph = sorted(((scores[i], s) for i, s in enumerate(sentences)),
                             reverse=True)
    number_of_nodes = int(0.25 * len(text_rank_graph))

    if number_of_nodes < 3:
        number_of_nodes = 3

    del text_rank_graph[number_of_nodes:]

    summary = ' '.join(word for _, word in text_rank_graph)

    return summary
Exemplo n.º 54
0
    def __init__(self, samples: pd.DataFrame, samples_features: pd.DataFrame):
        self.samples = samples
        sorted_names = np.sort(self.samples[self.df_col].unique())
        samples_features = samples_features.sort_index()
        self.names = {name: i for i, name in enumerate(sorted_names)}
        self.rnames = {v: k for k, v in self.names.items()}
        self.empirical_single_marginals = Counter()
        self.empirical_pair_marginals = Counter()
        self.adj_matrix = dok_matrix((len(self.names), len(self.names)))

        self.process_samples()

        for (label1, label2), count in self.empirical_pair_marginals.items():
            self.adj_matrix[label1, label2] = self.adj_matrix[label2,
                                                              label1] = count

        for label, count in self.empirical_single_marginals.items():
            self.adj_matrix[label, label] = count

        self.nx_graph: nx.Graph = nx.from_scipy_sparse_matrix(self.adj_matrix)
        nx.relabel_nodes(self.nx_graph, self.rnames)
        self.dgl_graph = dgl.DGLGraph(self.nx_graph)
        self.graph_features = samples_features.values
Exemplo n.º 55
0
def save_gephi_graph(output_dir,A,y,k,multi_label=False):
    import networkx as nx

    labels=[]
    if(multi_label):
        nY= [" ".join(row) for row in y]
        labels = dict(zip(range(len(y)), nY))
    else:
        y = [str(i) for i in y]
        labels = dict(zip(range(len(y)), y))

    print(labels)

    G = nx.from_scipy_sparse_matrix(A)
    # print(G.edges())
    # G=G.to_directed()
    # print(G.edges())

    nx.set_node_attributes(G, labels, 'labels')
    print("Writing gephi")
    nx.write_gexf(G, output_dir+'graph_knn_'+str(k)+'.gexf')

    return
Exemplo n.º 56
0
    def preprocess(self, adj, features, graph=None):

        if self.normalize_features:
            features = self._normalize_features(features)

        if graph is None:
            graph = nx.from_scipy_sparse_matrix(adj, create_using=nx.DiGraph)

        (self.batch_adj, self.batch_features, self.batch_labels,
         self.cluster_member,
         self.mapper) = partition_graph(adj,
                                        features,
                                        self.labels,
                                        graph,
                                        n_cluster=self.n_cluster)

        if self.normalize_rate is not None:
            self.batch_adj = self._normalize_adj(self.batch_adj,
                                                 self.normalize_rate)

        with self.device:
            self.batch_adj, self.batch_features = self._to_tensor(
                [self.batch_adj, self.batch_features])
Exemplo n.º 57
0
def load_data(dataset_str):
    if dataset_str == 'blog':
        G, adj, features = graph_reader(
            './data/BlogCatalog-dataset/data/edges.csv')
    elif dataset_str == 'flickr':
        G, adj, features = graph_reader('./data/Flickr-dataset/data/edges.csv')
    elif dataset_str in ['cora', 'citeseer']:
        G, adj, features = load_cc(dataset_str)
    elif dataset_str == 'wiki':
        import scipy.io as sio
        A = sio.loadmat('./data/POS.mat')['network']
        G = nx.from_scipy_sparse_matrix(A)
        adj = nx.adjacency_matrix(G)
        features = None
    elif 'dblp' in dataset_str:
        G, adj, edge_labels = read_dblp_small(
            './data/dblp-small/net_co_author.txt')
        features = None
    else:
        assert False
    n_nodes = adj.shape[0]

    return G, adj, features
Exemplo n.º 58
0
 def summarize(self, text, num=320):
     # 切句
     if type(text) == str:
         sentences = cut_sentence(text)
     elif type(text) == list:
         sentences = text
     else:
         raise RuntimeError("text type must be list or str")
     # tf-idf相似度
     matrix = tdidf_sim(sentences)
     matrix_norm = TfidfTransformer().fit_transform(matrix)
     # 构建相似度矩阵
     tfidf_sim = nx.from_scipy_sparse_matrix(matrix_norm * matrix_norm.T)
     # nx.pagerank
     sens_scores = nx.pagerank(tfidf_sim)
     # 得分排序
     sen_rank = sorted(sens_scores.items(),
                       key=lambda x: x[1],
                       reverse=True)
     # 保留topk个, 防止越界
     topk = min(len(sentences), num)
     # 返回原句子和得分
     return [(sr[1], sentences[sr[0]]) for sr in sen_rank][0:topk]
Exemplo n.º 59
0
    def __init__(self,
                 metric_space,
                 cover_list,
                 clusterer,
                 prune=True,
                 backend='networkx'):
        # build metric space as distance matrix
        self.partition_node_map = {}
        self.N = metric_space.shape[0]
        self.cover = self.build_cover(cover_list)
        self.node_row_matrix = self.build_topological_model(
            metric_space, self.cover, clusterer)
        self.adjacency_matrix = self.node_row_matrix.dot(
            self.node_row_matrix.T)
        self.cooccurence_matrix = self.node_row_matrix.T.dot(
            self.node_row_matrix)
        if prune:
            pruned_node_set = self._prune(self.adjacency_matrix)
            self.raw_node_row_matrix = self.node_row_matrix
            self.raw_adjacency_matrix = self.adjacency_matrix
            self.raw_cooccurence_matrix = self.cooccurence_matrix
            partition_keys = sorted(self.partition_node_map.keys())
            re_index = 0
            new_partition_node_map = {}
            for node in partition_keys:
                if node in pruned_node_set:
                    new_partition_node_map[re_index] = self.partition_node_map[
                        node]
                    re_index += 1
            self.partition_node_map = new_partition_node_map
            self.node_row_matrix = self.node_row_matrix[pruned_node_set, :]
            self.adjacency_matrix = self.node_row_matrix.dot(
                self.node_row_matrix.T)
            self.cooccurence_matrix = self.node_row_matrix.T.dot(
                self.node_row_matrix)

        self.graph = nx.from_scipy_sparse_matrix(self.adjacency_matrix)
def compute(tribes, adj_matrix, conv, precision):
    import networkx as nx

    spectra = []
    pbar = progressbar.ProgressBar()

    for tribe in pbar(tribes):
        tribe_ids = conv.indices(tribe)
        adj_submat = adj_matrix[np.ix_(tribe_ids, tribe_ids)]
        G = nx.from_scipy_sparse_matrix(adj_submat, create_using=nx.DiGraph)

        # Find the largest connected component of the graph
        largest = max(nx.strongly_connected_components(G), key=len)

        if len(largest) <= 2:  # Needs at least a certain size...
            spectra.append([])
        else:
            # Adjacency matrix of the tribe's strong component
            tribe_strong_adj_submat = nx.to_numpy_array(G.subgraph(largest),
                                                        dtype='int8')

            # Make a diagonal matrix of inverses of outdegrees in the tribe
            diag_outdegree_inverses = np.diagflat(
                np.power(
                    np.sum(tribe_strong_adj_submat, axis=1).astype(float), -1))

            # The transition probability matrix
            tr_prob = diag_outdegree_inverses @ tribe_strong_adj_submat

            # Find the eigenvalues
            eig = scipy.linalg.eig(tr_prob)[0]

            # Order the non-zero eigenvalues and round to desired precision
            spectrum = np.unique(np.round(eig[np.nonzero(eig)], precision))
            spectra.append(spectrum)

    return spectra