def testGetAllEdgeIndices(self): graph = DictGraph() graph.addEdge("a", "b") graph.addEdge("a", "c") graph.addEdge("a", "d") graph.addEdge("d", "e") edgeIndices = graph.getAllEdgeIndices() keys = graph.getAllVertexIds() self.assertEquals(edgeIndices.shape[0], graph.getNumEdges()) for i in range(edgeIndices.shape[0]): self.assertTrue(graph.getEdge(keys[int(edgeIndices[i, 0])], keys[edgeIndices[i, 1]]) == 1) graph = DictGraph(False) graph.addEdge("a", "b") graph.addEdge("b", "a") graph.addEdge("a", "c") graph.addEdge("a", "d") graph.addEdge("d", "e") edgeIndices = graph.getAllEdgeIndices() keys = graph.getAllVertexIds() self.assertEquals(edgeIndices.shape[0], graph.getNumEdges()) for i in range(edgeIndices.shape[0]): self.assertTrue(graph.getEdge(keys[int(edgeIndices[i, 0])], keys[edgeIndices[i, 1]]) == 1)
def testRemoveVertex(self): graph = DictGraph() graph.addEdge(0, 1) graph.addEdge(0, 2) graph.addEdge(0, 3) graph.addEdge(1, 2) graph.addEdge(2, 3) graph.addEdge(3, 4) graph.removeVertex(4) self.assertFalse(graph.vertexExists(4)) self.assertFalse(graph.edgeExists(3, 4)) graph.removeVertex(3) self.assertFalse(graph.vertexExists(3)) self.assertFalse(graph.edgeExists(2, 3)) self.assertFalse(graph.edgeExists(0, 3)) graph.removeVertex(2) self.assertFalse(graph.vertexExists(2)) self.assertFalse(graph.edgeExists(1, 2)) self.assertFalse(graph.edgeExists(0, 2)) self.assertTrue(graph.getAllVertexIds() == [0, 1]) self.assertTrue(graph.getAllEdges() == [(0, 1)]) #Try directed graph graph = DictGraph(False) graph.addEdge(0, 1) graph.addEdge(1, 0) graph.addEdge(0, 3) graph.addEdge(1, 2) graph.addEdge(2, 3) graph.addEdge(3, 4) graph.removeVertex(0) self.assertFalse(graph.vertexExists(0)) self.assertFalse(graph.edgeExists(0, 1)) self.assertFalse(graph.edgeExists(0, 3)) self.assertFalse(graph.edgeExists(1, 0)) graph.removeVertex(2) self.assertFalse(graph.vertexExists(2)) self.assertFalse(graph.edgeExists(1, 2)) self.assertFalse(graph.edgeExists(2, 3)) self.assertTrue(graph.getAllVertexIds() == [1, 3, 4]) self.assertTrue(graph.getAllEdges() == [(3, 4)])
def testGetWeightMatrix(self): graph = DictGraph() graph.addEdge("a", "b") graph.addEdge("a", "c") graph.addEdge("a", "d") graph.addEdge("d", "e") W = graph.getWeightMatrix() keys = graph.getAllVertexIds() for i in range(len(keys)): for j in range(len(keys)): if W[i, j] == 1: self.assertEquals(graph.getEdge(keys[i], keys[j]), 1) else: self.assertEquals(graph.getEdge(keys[i], keys[j]), None) #Try a directed graph graph = DictGraph(False) graph.addEdge("a", "b") graph.addEdge("a", "c") graph.addEdge("a", "d") graph.addEdge("d", "e") W = graph.getWeightMatrix() for i in range(len(keys)): for j in range(len(keys)): if W[i, j] == 1: self.assertEquals(graph.getEdge(keys[i], keys[j]), 1) else: self.assertEquals(graph.getEdge(keys[i], keys[j]), None)
def testGetAllVertexIds(self): dictGraph = DictGraph(True) dictGraph.addEdge(1, 2, 12) dictGraph.addEdge(1, 3, 18) dictGraph.setVertex(5, 12) self.assertEquals(dictGraph.getAllVertexIds(), [1, 2, 3, 5])
def testSetVertices(self): graph = DictGraph() vertexIndices = [1, 2, 3] vertices = ["a", "b", "c"] graph.setVertices(vertexIndices, vertices) vertexIndices2 = graph.getAllVertexIds() vertices2 = graph.getVertices(vertexIndices2) self.assertEquals(vertexIndices, vertexIndices2) self.assertEquals(vertices, vertices2)
def testDijkstrasAlgorithm(self): graph = DictGraph() graph.addEdge(0, 1, 1) graph.addEdge(1, 2, 1) graph.addEdge(1, 3, 1) graph.addEdge(2, 4, 1) graph.setVertex(4, 1) self.assertTrue((graph.dijkstrasAlgorithm(0) == numpy.array([0, 1, 2, 2, 3])).all()) self.assertTrue((graph.dijkstrasAlgorithm(1) == numpy.array([1, 0, 1, 1, 2])).all()) self.assertTrue((graph.dijkstrasAlgorithm(2) == numpy.array([2, 1, 0, 2, 1])).all()) self.assertTrue((graph.dijkstrasAlgorithm(3) == numpy.array([2, 1, 2, 0, 3])).all()) self.assertTrue((graph.dijkstrasAlgorithm(4) == numpy.array([3, 2, 1, 3, 0])).all()) #Test a graph which has an isolated node graph = DictGraph() graph.setVertex(5, 1) graph.addEdge(0, 1, 1) graph.addEdge(1, 2, 1) graph.addEdge(1, 3, 1) self.assertTrue((graph.dijkstrasAlgorithm(0) == numpy.array([0, 1, 2, 2, numpy.inf])).all()) #Test a graph in a ring graph = DictGraph() graph.addEdge(0, 1, 1) graph.addEdge(1, 2, 1) graph.addEdge(2, 3, 1) graph.addEdge(3, 4, 1) graph.addEdge(4, 0, 1) self.assertTrue((graph.dijkstrasAlgorithm(0) == numpy.array([0, 1, 2, 2, 1])).all()) #Try case in which vertex ids are not numbers graph = DictGraph() graph.addEdge("a", "b", 1) graph.addEdge("b", "c", 1) graph.addEdge("b", "d", 1) graph.addEdge("c", "e", 1) inds = Util.argsort(graph.getAllVertexIds()) self.assertTrue((graph.dijkstrasAlgorithm("a")[inds] == numpy.array([0, 1, 2, 2, 3])).all()) self.assertTrue((graph.dijkstrasAlgorithm("b")[inds] == numpy.array([1, 0, 1, 1, 2])).all()) self.assertTrue((graph.dijkstrasAlgorithm("c")[inds] == numpy.array([2, 1, 0, 2, 1])).all()) self.assertTrue((graph.dijkstrasAlgorithm("d")[inds] == numpy.array([2, 1, 2, 0, 3])).all()) self.assertTrue((graph.dijkstrasAlgorithm("e")[inds] == numpy.array([3, 2, 1, 3, 0])).all())
def testDegreeSequence(self): graph = DictGraph() graph.setVertex("a", 10) graph["b", "c"] = 1 graph["b", "d"] = 1 graph["d", "e"] = 1 graph["e", "e"] = 1 degreeDict = {} degreeDict2 = {"a": 0, "b": 2, "c": 1, "d": 2, "e": 3} for i, id in enumerate(graph.getAllVertexIds()): degreeDict[id] = graph.degreeSequence()[i] self.assertEquals(degreeDict, degreeDict2)
def testAdjacencyList(self): graph = DictGraph() graph.addEdge("a", "b", 1) graph.addEdge("b", "c", 1) graph.addEdge("b", "d", 1) graph.addEdge("c", "e", 1) graph.setVertex("f", 1) neighbourIndices, neighbourWeights = graph.adjacencyList() vertexIds = graph.getAllVertexIds() for i in range(len(neighbourIndices)): for k, j in enumerate(neighbourIndices[i]): self.assertTrue(graph.edgeExists(vertexIds[i], vertexIds[j])) self.assertEquals(graph[vertexIds[i], vertexIds[j]], neighbourWeights[i][k])
def testGetSparseWeightMatrix(self): graph = DictGraph() graph.addEdge("a", "b") graph.addEdge("a", "c") graph.addEdge("a", "d", "blah") graph.addEdge("d", "e", -1.1) graph.addEdge("c", "b", 2) W = graph.getSparseWeightMatrix() keys = graph.getAllVertexIds() for i in range(len(keys)): for j in range(len(keys)): if graph.edgeExists(keys[i], keys[j]) and not isinstance(graph.getEdge(keys[i], keys[j]), numbers.Number): self.assertEquals(1, W[i, j]) elif W[i, j] != 0: self.assertEquals(graph.getEdge(keys[i], keys[j]), W[i, j]) else: self.assertEquals(graph.getEdge(keys[i], keys[j]), None) #Try a directed graph graph = DictGraph(False) graph.addEdge("a", "b") graph.addEdge("a", "c", "test") graph.addEdge("a", "d") graph.addEdge("d", "e") graph.addEdge("c", "a", 0.1) W = graph.getSparseWeightMatrix() for i in range(len(keys)): for j in range(len(keys)): if graph.edgeExists(keys[i], keys[j]) and not isinstance(graph.getEdge(keys[i], keys[j]), numbers.Number): self.assertEquals(1, W[i, j]) elif W[i, j] != 0: self.assertEquals(graph.getEdge(keys[i], keys[j]), W[i, j]) else: self.assertEquals(graph.getEdge(keys[i], keys[j]), None)
def __init__(self, minGraphSize=500, maxGraphSize=None, dayStep=30): dataDir = PathDefaults.getDataDir() + "cluster/" edgesFilename = dataDir + "Cit-HepTh.txt" dateFilename = dataDir + "Cit-HepTh-dates.txt" #Note the IDs are integers but can start with zero so we prefix "1" to each ID edges = [] file = open(edgesFilename, 'r') file.readline() file.readline() file.readline() file.readline() for line in file: (vertex1, sep, vertex2) = line.partition("\t") vertex1 = vertex1.strip() vertex2 = vertex2.strip() edges.append([vertex1, vertex2]) #if vertex1 == vertex2: # print(vertex1) file.close() logging.info("Loaded edge file " + str(edgesFilename) + " with " + str(len(edges)) + " edges") #Keep an edge graph graph = DictGraph(False) graph.addEdges(edges) logging.info("Created directed citation graph with " + str(graph.getNumEdges()) + " edges and " + str(graph.getNumVertices()) + " vertices") #Read in the dates articles appear in a dict which used the year and month #as the key and the value is a list of vertex ids. For each month we include #all papers uploaded that month and those directed cited by those uploads. startDate = datetime.date(1990, 1, 1) file = open(dateFilename, 'r') file.readline() numLines = 0 subgraphIds = [] for line in file: (id, sep, date) = line.partition("\t") id = id.strip() date = date.strip() inputDate = datetime.datetime.strptime(date.strip(), "%Y-%m-%d") inputDate = inputDate.date() if graph.vertexExists(id): tDelta = inputDate - startDate graph.vertices[id] = tDelta.days subgraphIds.append(id) #If a paper cites another, it must have been written before #the citing paper - enforce this rule. for neighbour in graph.neighbours(id): if graph.getVertex(neighbour) == None: graph.setVertex(neighbour, tDelta.days) subgraphIds.append(neighbour) elif tDelta.days < graph.getVertex(neighbour): graph.setVertex(neighbour, tDelta.days) numLines += 1 file.close() subgraphIds = set(subgraphIds) graph = graph.subgraph(list(subgraphIds)) logging.debug(graph) logging.info("Loaded date file " + str(dateFilename) + " with " + str(len(subgraphIds)) + " dates and " + str(numLines) + " lines") W = graph.getSparseWeightMatrix() W = W + W.T vList = VertexList(W.shape[0], 1) vList.setVertices(numpy.array([graph.getVertices(graph.getAllVertexIds())]).T) #Note: we have 16 self edges and some two-way citations so this graph has fewer edges than the directed one self.graph = SparseGraph(vList, W=W) logging.debug(self.graph) #Now pick the max component components = self.graph.findConnectedComponents() self.graph = self.graph.subgraph(components[0]) logging.debug("Largest component graph: " + str(self.graph)) self.minGraphSize = minGraphSize self.maxGraphSize = maxGraphSize self.dayStep = dayStep
def testGetIterator(self): generator = CitationIterGenerator() iterator = generator.getIterator() lastW = iterator.next() for W in iterator: self.assertTrue((W-W.T).getnnz() == 0) self.assertTrue((lastW - W[0:lastW.shape[0], 0:lastW.shape[0]]).getnnz() ==0 ) lastW = W numVertices = W.shape[0] #Now compute the vertexIds manually: dataDir = PathDefaults.getDataDir() + "cluster/" edgesFilename = dataDir + "Cit-HepTh.txt" dateFilename = dataDir + "Cit-HepTh-dates.txt" #We can't load in numbers using numpy since some may start with zero edges = [] file = open(edgesFilename, 'r') file.readline() file.readline() file.readline() file.readline() for line in file: (vertex1, sep, vertex2) = line.partition("\t") vertex1 = vertex1.strip() vertex2 = vertex2.strip() edges.append([int("1" + vertex1), int("1" + vertex2)]) edges = numpy.array(edges, numpy.int) #Check file read correctly self.assertTrue((edges[0, :] == numpy.array([11001, 19304045])).all()) self.assertTrue((edges[1, :] == numpy.array([11001, 19308122])).all()) self.assertTrue((edges[9, :] == numpy.array([11001, 19503124])).all()) vertexIds1 = numpy.unique(edges) logging.info("Number of graph vertices: " + str(vertexIds1.shape[0])) file = open(dateFilename, 'r') file.readline() vertexIds2 = [] for line in file: (id, sep, date) = line.partition("\t") id = id.strip() date = date.strip() vertexIds2.append(int("1" + id)) #Check file read correctly vertexIds2 = numpy.array(vertexIds2, numpy.int) self.assertTrue((vertexIds2[0:10] == numpy.array([19203201, 19203202, 19203203, 19203204, 19203205, 19203206, 19203207, 19203208, 19203209, 19203210], numpy.int)).all()) vertexIds2 = numpy.unique(numpy.array(vertexIds2, numpy.int)) graph = DictGraph(False) graph.addEdges(edges) #Find the set of vertices with known citation vertices = [] vertexId2Set = set(vertexIds2.tolist()) for i in graph.getAllVertexIds(): Util.printIteration(i, 50000, edges.shape[0]) if i in vertexId2Set: vertices.append(i) vertices.extend(graph.neighbours(i)) logging.debug("Number of final vertices: " + str(numVertices)) numVertices2 = numpy.unique(numpy.array(vertices)).shape[0] self.assertEquals(numVertices, numVertices2) #Now compare the weight matrices using the undirected graph #Note the order of vertices is different from the iterator graph = DictGraph() graph.addEdges(edges) subgraph = graph.subgraph(numpy.unique(numpy.array(vertices))) W2 = subgraph.getSparseWeightMatrix() self.assertEquals(W.getnnz(), W2.getnnz())