def vectorStatistics(self, graph, treeStats=False, eigenStats=True): """ Find a series of statistics for the given input graph which can be represented as vector values. """ Parameter.checkClass(graph, AbstractMatrixGraph) Parameter.checkBoolean(treeStats) statsDict = {} statsDict["inDegreeDist"] = graph.inDegreeDistribution() statsDict["outDegreeDist"] = graph.degreeDistribution() logging.debug("Computing hop counts") P = graph.findAllDistances(False) statsDict["hopCount"] = graph.hopCount(P) logging.debug("Computing triangle count") if graph.getNumVertices() != 0: statsDict["triangleDist"] = numpy.bincount( graph.triangleSequence()) else: statsDict["triangleDist"] = numpy.array([]) #Get the distribution of component sizes logging.debug("Finding distribution of component sizes") if graph.isUndirected(): components = graph.findConnectedComponents() if len(components) != 0: statsDict["componentsDist"] = numpy.bincount( numpy.array([len(c) for c in components], numpy.int)) #Make sure weight matrix is symmetric if graph.getNumVertices() != 0 and eigenStats: logging.debug("Computing eigenvalues/vectors") W = graph.getWeightMatrix() W = (W + W.T) / 2 eigenDistribution, V = numpy.linalg.eig(W) i = numpy.argmax(eigenDistribution) statsDict["maxEigVector"] = V[:, i] statsDict["eigenDist"] = numpy.flipud( numpy.sort(eigenDistribution[eigenDistribution > 0])) gc.collect() else: statsDict["maxEigVector"] = numpy.array([]) statsDict["eigenDist"] = numpy.array([]) if treeStats: logging.debug("Computing statistics on trees") trees = graph.findTrees() statsDict["treeSizesDist"] = numpy.bincount( [len(x) for x in trees]) treeDepths = [ GraphUtils.treeDepth((graph.subgraph(x))) for x in trees ] statsDict["treeDepthsDist"] = numpy.bincount(treeDepths) return statsDict
def testTreeDepth(self): numVertices = 4 numFeatures = 1 vList = VertexList(numVertices, numFeatures) graph = SparseGraph(vList, False) graph.addEdge(0, 1) graph.addEdge(0, 2) graph.addEdge(2, 3) self.assertEquals(GraphUtils.treeDepth(graph), 2) numVertices = 5 vList = VertexList(numVertices, numFeatures) graph = SparseGraph(vList, False) graph.addEdge(0, 1) graph.addEdge(0, 2) graph.addEdge(2, 3) graph.addEdge(3, 4) self.assertEquals(GraphUtils.treeDepth(graph), 3)
def vectorStatistics(self, graph, treeStats=False, eigenStats=True): """ Find a series of statistics for the given input graph which can be represented as vector values. """ Parameter.checkClass(graph, AbstractMatrixGraph) Parameter.checkBoolean(treeStats) statsDict = {} statsDict["inDegreeDist"] = graph.inDegreeDistribution() statsDict["outDegreeDist"] = graph.degreeDistribution() logging.debug("Computing hop counts") P = graph.findAllDistances(False) statsDict["hopCount"] = graph.hopCount(P) logging.debug("Computing triangle count") if graph.getNumVertices() != 0: statsDict["triangleDist"] = numpy.bincount(graph.triangleSequence()) else: statsDict["triangleDist"] = numpy.array([]) #Get the distribution of component sizes logging.debug("Finding distribution of component sizes") if graph.isUndirected(): components = graph.findConnectedComponents() if len(components) != 0: statsDict["componentsDist"] = numpy.bincount(numpy.array([len(c) for c in components], numpy.int)) #Make sure weight matrix is symmetric if graph.getNumVertices()!=0 and eigenStats: logging.debug("Computing eigenvalues/vectors") W = graph.getWeightMatrix() W = (W + W.T)/2 eigenDistribution, V = numpy.linalg.eig(W) i = numpy.argmax(eigenDistribution) statsDict["maxEigVector"] = V[:, i] statsDict["eigenDist"] = numpy.flipud(numpy.sort(eigenDistribution[eigenDistribution>0])) gc.collect() else: statsDict["maxEigVector"] = numpy.array([]) statsDict["eigenDist"] = numpy.array([]) if treeStats: logging.debug("Computing statistics on trees") trees = graph.findTrees() statsDict["treeSizesDist"] = numpy.bincount([len(x) for x in trees]) treeDepths = [GraphUtils.treeDepth((graph.subgraph(x))) for x in trees] statsDict["treeDepthsDist"] = numpy.bincount(treeDepths) return statsDict
def scalarStatistics(self, graph, slowStats=True, treeStats=False): """ Find a series of statistics for the given input graph which can be represented as scalar values. Return results as a vector. """ #This method is a bit of a mess Parameter.checkClass(graph, AbstractSingleGraph) Parameter.checkBoolean(slowStats) Parameter.checkBoolean(treeStats) statsArray = numpy.ones(self.numStats)*-1 statsArray[self.numVerticesIndex] = graph.getNumVertices() statsArray[self.numEdgesIndex] = graph.getNumEdges() statsArray[self.numDirEdgesIndex] = graph.getNumDirEdges() statsArray[self.densityIndex] = graph.density() if graph.isUndirected(): logging.debug("Finding connected components") subComponents = graph.findConnectedComponents() logging.debug("Done") statsArray[self.numComponentsIndex] = len(subComponents) nonSingletonSubComponents = [c for c in subComponents if len(c) > 1] statsArray[self.numNonSingletonComponentsIndex] = len(nonSingletonSubComponents) triOrMoreSubComponents = [c for c in subComponents if len(c) > 2] statsArray[self.numTriOrMoreComponentsIndex] = len(triOrMoreSubComponents) logging.debug("Studying max component") if len(subComponents) != 0: maxCompGraph = graph.subgraph(list(subComponents[0])) statsArray[self.maxComponentSizeIndex] = len(subComponents[0]) if len(subComponents) >= 2: statsArray[self.secondComponentSizeIndex] = len(subComponents[1]) statsArray[self.maxComponentEdgesIndex] = maxCompGraph.getNumEdges() statsArray[self.meanComponentSizeIndex] = sum([len(x) for x in subComponents])/float(statsArray[self.numComponentsIndex]) statsArray[self.maxCompMeanDegreeIndex] = numpy.mean(maxCompGraph.outDegreeSequence()) else: statsArray[self.maxComponentSizeIndex] = 0 statsArray[self.maxComponentEdgesIndex] = 0 statsArray[self.meanComponentSizeIndex] = 0 statsArray[self.geodesicDistMaxCompIndex] = 0 if graph.getNumVertices() != 0: statsArray[self.meanDegreeIndex] = numpy.mean(graph.outDegreeSequence()) else: statsArray[self.meanDegreeIndex] = 0 if slowStats: if self.useFloydWarshall: logging.debug("Running Floyd-Warshall") P = graph.floydWarshall(False) else: logging.debug("Running Dijkstra's algorithm") P = graph.findAllDistances(False) statsArray[self.diameterIndex] = graph.diameter(P=P) statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter(self.q, P=P) statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0] statsArray[self.geodesicDistanceIndex] = graph.geodesicDistance(P=P) statsArray[self.harmonicGeoDistanceIndex] = graph.harmonicGeodesicDistance(P=P) if graph.isUndirected() and len(subComponents) != 0: statsArray[self.geodesicDistMaxCompIndex] = graph.geodesicDistance(P=P, vertexInds=list(subComponents[0])) if treeStats: logging.debug("Computing statistics on trees") trees = graph.findTrees() statsArray[self.numTreesIndex] = len(trees) nonSingletonTrees = [c for c in trees if len(c) > 1] statsArray[self.numNonSingletonTreesIndex] = len(nonSingletonTrees) statsArray[self.meanTreeSizeIndex] = numpy.mean([len(x) for x in trees]) treeDepths = [GraphUtils.treeDepth((graph.subgraph(list(x)))) for x in trees] statsArray[self.meanTreeDepthIndex] = numpy.mean(treeDepths) if len(trees) != 0: maxTreeGraph = graph.subgraph(trees[0]) statsArray[self.maxTreeSizeIndex] = len(trees[0]) statsArray[self.maxTreeDepthIndex] = GraphUtils.treeDepth(maxTreeGraph) if len(trees) >= 2: secondTreeGraph = graph.subgraph(trees[1]) statsArray[self.secondTreeSizeIndex] = len(trees[1]) statsArray[self.secondTreeDepthIndex] = GraphUtils.treeDepth(secondTreeGraph) return statsArray
def scalarStatistics(self, graph, slowStats=True, treeStats=False): """ Find a series of statistics for the given input graph which can be represented as scalar values. Return results as a vector. """ #This method is a bit of a mess Parameter.checkClass(graph, AbstractSingleGraph) Parameter.checkBoolean(slowStats) Parameter.checkBoolean(treeStats) statsArray = numpy.ones(self.numStats) * -1 statsArray[self.numVerticesIndex] = graph.getNumVertices() statsArray[self.numEdgesIndex] = graph.getNumEdges() statsArray[self.numDirEdgesIndex] = graph.getNumDirEdges() statsArray[self.densityIndex] = graph.density() if graph.isUndirected(): subComponents = graph.findConnectedComponents() statsArray[self.numComponentsIndex] = len(subComponents) nonSingletonSubComponents = [ c for c in subComponents if len(c) > 1 ] statsArray[self.numNonSingletonComponentsIndex] = len( nonSingletonSubComponents) triOrMoreSubComponents = [c for c in subComponents if len(c) > 2] statsArray[self.numTriOrMoreComponentsIndex] = len( triOrMoreSubComponents) #logging.debug("Studying max component") if len(subComponents) != 0: maxCompGraph = graph.subgraph(list(subComponents[0])) statsArray[self.maxComponentSizeIndex] = len(subComponents[0]) if len(subComponents) >= 2: statsArray[self.secondComponentSizeIndex] = len( subComponents[1]) statsArray[ self.maxComponentEdgesIndex] = maxCompGraph.getNumEdges() statsArray[self.meanComponentSizeIndex] = sum([ len(x) for x in subComponents ]) / float(statsArray[self.numComponentsIndex]) statsArray[self.maxCompMeanDegreeIndex] = numpy.mean( maxCompGraph.outDegreeSequence()) else: statsArray[self.maxComponentSizeIndex] = 0 statsArray[self.maxComponentEdgesIndex] = 0 statsArray[self.meanComponentSizeIndex] = 0 statsArray[self.geodesicDistMaxCompIndex] = 0 if graph.getNumVertices() != 0: statsArray[self.meanDegreeIndex] = numpy.mean( graph.outDegreeSequence()) else: statsArray[self.meanDegreeIndex] = 0 if slowStats: if self.useFloydWarshall: logging.debug("Running Floyd-Warshall") P = graph.floydWarshall(False) else: logging.debug("Running Dijkstra's algorithm") P = graph.findAllDistances(False) statsArray[self.diameterIndex] = graph.diameter(P=P) statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter( self.q, P=P) statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0] statsArray[self.geodesicDistanceIndex] = graph.geodesicDistance( P=P) statsArray[ self. harmonicGeoDistanceIndex] = graph.harmonicGeodesicDistance(P=P) if graph.isUndirected() and len(subComponents) != 0: statsArray[ self.geodesicDistMaxCompIndex] = graph.geodesicDistance( P=P, vertexInds=list(subComponents[0])) if treeStats: logging.debug("Computing statistics on trees") trees = graph.findTrees() statsArray[self.numTreesIndex] = len(trees) nonSingletonTrees = [c for c in trees if len(c) > 1] statsArray[self.numNonSingletonTreesIndex] = len(nonSingletonTrees) statsArray[self.meanTreeSizeIndex] = numpy.mean( [len(x) for x in trees]) treeDepths = [ GraphUtils.treeDepth((graph.subgraph(list(x)))) for x in trees ] statsArray[self.meanTreeDepthIndex] = numpy.mean(treeDepths) if len(trees) != 0: maxTreeGraph = graph.subgraph(trees[0]) statsArray[self.maxTreeSizeIndex] = len(trees[0]) statsArray[self.maxTreeDepthIndex] = GraphUtils.treeDepth( maxTreeGraph) if len(trees) >= 2: secondTreeGraph = graph.subgraph(trees[1]) statsArray[self.secondTreeSizeIndex] = len(trees[1]) statsArray[ self.secondTreeDepthIndex] = GraphUtils.treeDepth( secondTreeGraph) return statsArray