def initialize_Markov_chain(connection, config): """ This initializes an empty Markov chain and returns the transition matrix and state space :param connection: ipython-cypher connection string (eg: http://username:[email protected]/7474/db/data :param config: ipython-cypher configuration named tuple :return: transition matrix (numpy array) and state space (list of tuples: (rel, node)) """ relationship_types = cypher.run("MATCH ()-[r]-() RETURN DISTINCT type(r)", conn=connection, config=config) relationship_types = [item[0] for item in relationship_types] node_labels = cypher.run("MATCH (n) RETURN DISTINCT labels(n)[1]", conn=connection, config=config) node_labels = [item[0] for item in node_labels] # Markov chain will have states = (relationship_label, node_label) since this is a multigraph state_space = [] for relationship_type in relationship_types: for node_label in node_labels: state = (relationship_type, node_label) state_space.append(state) #trans_mat = np.zeros((len(state_space), len(state_space))) quad_to_matrix_index = dict() for state1 in state_space: for state2 in state_space: quad_to_matrix_index[state1 + state2] = (state_space.index(state1), state_space.index(state2)) return state_space, quad_to_matrix_index
def EigenVectorCentrality(self): print("DOGUM GUNU PAGERANK CENTRALITY") resultDogum = cypher.run( "UNWIND range(1,10) AS round MATCH (n:User) WHERE rand() < 0.1 MATCH (n:User)-[:DOGUMGUNU*..10]->(m:User) SET m.rank = coalesce(m.rank,0) + 1" ) resultDogumRank = cypher.run( "MATCH (n:User) WHERE n.rank is not null return id(n), n.rank order by n.rank desc" ) print(resultDogumRank) #For other relationship pagerank relationship is like below '''
def ClosenessCentrality(self): print("DOGUM GUNU CLOSENESS CENTRALITY") resultsDogumClose = cypher.run( "MATCH (a:User), (b:User) WHERE a<>b WITH length(shortestPath((a)-[:DOGUMGUNU*]-(b))) AS dist, a, b RETURN DISTINCT id(a), sum(1.0/dist) AS closenessCentrality ORDER BY closenessCentrality DESC" ) print(resultsDogumClose) print("TESEKKUR CLOSENESS CENTRALITY") resultsTesekkurClose = cypher.run( "MATCH (a:User), (b:User) WHERE a<>b WITH length(shortestPath((a)-[:TESEKKUR*]-(b))) AS dist, a, b RETURN DISTINCT id(a), sum(1.0/dist) AS closenessCentrality ORDER BY closenessCentrality DESC" ) print(resultsTesekkurClose) print("TAKDIR CLOSENESS CENTRALITY") resultsTakdirClose = cypher.run( "MATCH (a:User), (b:User) WHERE a<>b WITH length(shortestPath((a)-[:TAKDIR*]-(b))) AS dist, a, b RETURN DISTINCT id(a), sum(1.0/dist) AS closenessCentrality ORDER BY closenessCentrality DESC" ) print(resultsTakdirClose)
def DegreeCentrality(self): print("DOGUM GUNU DEGREE CENTRALITY") resultsDogumDegree = cypher.run( "MATCH (n:User)-[r:DOGUMGUNU]-(m:User) return n.name,n.uID,n.departmanName,count(r) as DegreeScore order by DegreeScore desc" ) print(resultsDogumDegree) print("TESEKKUR DEGREE CENTRALITY") resultsTesekkurDegree = cypher.run( "MATCH (n:User)-[r:TESEKKUR]-(m:User) return n.name,n.uID,n.departmanName,count(r) as DegreeScore order by DegreeScore desc" ) print(resultsTesekkurDegree) print("TAKDIR DEGREE CENTRALITY") resultsTakdirDegree = cypher.run( "MATCH (n:User)-[r:TAKDIR]-(m:User) return n.name,n.uID,n.departmanName,count(r) as DegreeScore order by DegreeScore desc" ) print(resultsTakdirDegree)
def BetweennessCentrality(self): print("DOGUM GUNU BETWEENNESS CENTRALITY") resultsDogumBetween = cypher.run( "MATCH p=allShortestPaths((source:User)-[:DOGUMGUNU*]-(target:User)) UNWIND nodes(p)[1..-1] as n RETURN id(n), count(*) as betweenness order by betweenness desc" ) print(resultsDogumBetween) print("TESEKKUR BETWEENNESS CENTRALITY") resultsTesekkurBetween = cypher.run( "MATCH p=allShortestPaths((source:User)-[:TESEKKUR*]-(target:User)) UNWIND nodes(p)[1..-1] as n RETURN id(n), count(*) as betweenness order by betweenness desc" ) print(resultsTesekkurBetween) print("TAKDIR BETWEENNESS CENTRALITY") resultsTakdirBetween = cypher.run( "MATCH p=allShortestPaths((source:User)-[:TAKDIR*]-(target:User)) UNWIND nodes(p)[1..-1] as n RETURN id(n), count(*) as betweenness order by betweenness desc" ) print(resultsTakdirBetween)
def drawGraph(self): print("GRAPH") results = cypher.run( "MATCH p= (:User)-[:DOGUMGUNU]->(:User) RETURN p") #results = cypher.run("MATCH p= (:User)-[:TESEKKUR]->(:User) RETURN p") #results = cypher.run("MATCH p= (:User)-[:TAKDIR]->(:User) RETURN p") g = results.get_graph() nx.draw(g)
def run_cypher_query(self, query): """ Runs a Cypher query :param query :return: ResultSet (cypher) """ data = cypher.run(query, conn=self.connection) return data.get_graph()
def test_get_graph(): query = "MATCH path=allShortestPaths((s:omim_disease)-[*1..%d]-(t:disont_disease)) " \ "WHERE s.name='%s' AND t.name='%s' " \ "RETURN path" % (4, 'OMIM:137920', 'DOID:11476') res = cypher.run(query, conn=connection, config=config) graph = get_graph(res, directed=True) if type(graph) is not nx.classes.MultiDiGraph: raise (Exception("A networkx graph was not returned")) if graph.number_of_nodes() < 1: raise (Exception("An empty graph was returned"))
def expected_graph_distance(omim, doid, max_path_len=4, directed=True, connection=connection, defaults=defaults): """ Given a source omim and target doid, extract the subgraph from neo4j consisting of all paths connecting these two nodes. Treat this as a uniform Markov chain (all outgoing edges with equal weight) and calculate the expected path length. This is equivalent to starting a random walker at the source node and calculating how long, on average, it takes to reach the target node. :param omim: Input OMIM ID (eg: 'OMIM:1234'), source :param doid: Input DOID ID (eg: 'DOID:1234'), target :param max_path_len: maximum path length to consider (default=4) :param directed: treat the Markov chain as directed or undirected (default=True (directed)) :param connection: ipython-cypher connection string :param defaults: ipython-cypher configurations named tuple :return: a pair of floats giving the expected path length from source to target, and target to source respectively along with the basis (list of omim ID's). """ if directed: query = "MATCH path=allShortestPaths((s:omim_disease)-[*1..%d]->(t:disont_disease)) " \ "WHERE s.name='%s' AND t.name='%s' " \ "RETURN path" % (max_path_len, omim, doid) else: query = "MATCH path=allShortestPaths((s:omim_disease)-[*1..%d]-(t:disont_disease)) " \ "WHERE s.name='%s' AND t.name='%s' " \ "RETURN path" % (max_path_len, omim, doid) res = cypher.run(query, conn=connection, config=defaults) graph = get_graph(res, directed=directed) # Note: I may want to make this directed, but sometimes this means no path from OMIM mat = nx.to_numpy_matrix(graph) # get the indidence matrix basis = [i[1] for i in list(graph.nodes(data='names'))] # basis for the matrix (i.e. list of ID's) doid_index = basis.index(doid) # position of the target omim_index = basis.index(omim) # position of the source #print(omim) # diagnostics if directed: # if directed, then add a sink node just after the target, make sure we can pass over it sink_column = np.zeros((mat.shape[0], 1)) sink_column[doid_index] = 1 # connect doid to sink node sink_row = np.zeros((1, mat.shape[0] + 1)) sink_row[0, -1] = 1 # make sink node got to itself mat = np.vstack([np.append(mat, sink_column, 1), sink_row]) # append the sink row and column row_sums = mat.sum(axis=1) zero_indicies = np.where(row_sums == 0)[0] # even after this, some nodes may not have out-going arrows for index in zero_indicies: mat[index, index] = 1 # put a self loop in, so we don't get division by zero row_sums = mat.sum(axis=1) mat_norm = mat / row_sums # row normalize else: row_sums = mat.sum(axis=1) mat_norm = mat / row_sums exp_o_to_d = np.sum([float(i) * LA.matrix_power(mat_norm, i)[omim_index, doid_index] for i in range(15)]) exp_d_to_o = np.sum([float(i) * LA.matrix_power(mat_norm, i)[doid_index, omim_index] for i in range(15)]) if exp_o_to_d == 0: exp_o_to_d = float("inf") # no such path if exp_d_to_o == 0: exp_d_to_o = float("inf") # no such path return (exp_o_to_d, exp_d_to_o) # (E(source->target), E(target->source))
def calculatePageRankCentralityUsingNetworkX(session): """ calculates pagerank using Neo4j session. """ print "page rank using networkx:" results = cypher.run( "MATCH (a:Employee)-[r:appreciation]-(b:Employee) RETURN id(a), id(b),r", conn="http://*****:*****@localhost:7474/db/data") g = results.get_graph() data = nx.pagerank_numpy(g) print data #give chart a number and draw chart according the data plt.figure(6) drawChart(data, "Page Rank using NetworkX")
def calculateEigenvectorCentrality(session): """ calculates eigenvector centrality using Networkx. """ print "Eigenvector centrality:" # get the data from neo4j results = cypher.run("MATCH p = ()-[]-() RETURN p", conn="http://*****:*****@localhost:7474/db/data") g = results.get_graph() # calculate the eigenvector centrality using networkx dictionary_centrality = nx.eigenvector_centrality_numpy(g) #give chart a number and draw chart according the data plt.figure(4) drawChart(dictionary_centrality, "Eigenvector Centrality")
def return_subgraph_paths_of_type(session, omim, doid, relationship_list, debug=False): """ This function extracts the subgraph of a neo4j database consisting of those paths that have the relationships (in order) of those given by relationship_list :param session: neo4j session :param omim: source OMIM ID (eg: OMIM:1234) :param doid: target disease ID (eg: 'DOID:1235') :param relationship_list: list of relationships (must be valid neo4j relationship types), if this is a list of lists then the subgraph consisting of all valid paths will be returned :param debug: Flag indicating if the cypher query should be returned :return: networkx graph """ if not any(isinstance(el, list) for el in relationship_list): # It's a single list of relationships query = "MATCH path=(s:disease)-" for i in range(len(relationship_list) - 1): query += "[:" + relationship_list[i] + "]-()-" query += "[:" + relationship_list[-1] + "]-" + "(t:disease) " query += "WHERE s.name='%s' and t.name='%s' " % (omim, doid) query += "RETURN path" if debug: return query else: # it's a list of lists query = "MATCH (s:disease{name:'%s'}) " % omim for rel_index in range(len(relationship_list)): rel_list = relationship_list[rel_index] query += "OPTIONAL MATCH path%d=(s)-" % rel_index for i in range(len(rel_list) - 1): query += "[:" + rel_list[i] + "]-()-" query += "[:" + rel_list[-1] + "]-" + "(t:disease)" query += " WHERE t.name='%s' " % doid query += "RETURN " for rel_index in range(len(relationship_list) - 1): query += "collect(path%d)+" % rel_index query += "collect(path%d)" % (len(relationship_list) - 1) if debug: return query graph = get_graph(cypher.run(query, conn=connection, config=defaults)) return graph
import cypher import matplotlib.pyplot as plt data = cypher.run("MATCH (a)-[l:LINK]-(b) RETURN a, b, l", conn="http://*****:*****@localhost:7474/") data.get_graph() data.draw() import time plt.show() while True: time.sleep(0.5)
def get_neighbor_count(): #Get neighbor count return cypher.run( "MATCH (n)--(m) RETURN n.username, count(m) as neighbors", conn="http://*****:*****@localhost:7474")
def get_neo4_graph(): query = "MATCH p = ()-[]-() RETURN p" results = cypher.run(query, conn="http://*****:*****@localhost:7474") return results.get_graph()
import sys import itertools from py2neo import * import cypher graph = Graph() query = """ MATCH r = (p1:Product)-[:WITH]->(p2:Product) WHERE p1.id = '1' RETURN count(r) """ results = cypher.run(query) # df = results.get_dataframe() # df.head() for d in results: print(d)