def setSelfEdges(self, selfEdges): """ :param selfEdges: whether to allow self edges :type selfEdges: :class:`bool` """ Parameter.checkBoolean(selfEdges) self.selfEdges = selfEdges
def diameter(self, useWeights=False, P=None): """ Finds the diameter of a graph i.e. the longest shortest path. If useWeights is True then the weights in the adjacency matrix are used if P is not provided. :param useWeights: Whether to use edge weights to compute a diameter. :type useWeights: :class:`bool` :param P: An optional nxn matrix whose ijth entry is the shortest path from i to j. :type P: :class:`ndarray` :returns: The diameter of this graph. """ Parameter.checkBoolean(useWeights) if P!=None and (type(P) != numpy.ndarray or P.shape != (self.getNumVertices(), self.getNumVertices())): logging.debug("P.shape = " + P.shape + " W.shape = " + str(self.W.shape)) raise ValueError("P must be array of same size as weight matrix of graph") if self.getNumEdges() == 0: return 0 if P == None: P = self.floydWarshall(useWeights) else: P = P.copy() if useWeights == False: return int(numpy.max(P[P!=float('inf')])) else: return float(numpy.max(P[P!=float('inf')]))
def setSampleReplace(self, sampleReplace): """ :param sampleReplace: A boolean to decide whether to sample with replacement. :type sampleReplace: :class:`bool` """ Parameter.checkBoolean(sampleReplace) self.sampleReplace = sampleReplace
def sequenceVectorStats(self, graph, subgraphIndices, treeStats=False, eigenStats=True): """ Pass in a list of graphs are returns a series of statistics. Each list element is a dict of vector statistics. """ Parameter.checkClass(graph, AbstractMatrixGraph) for inds in subgraphIndices: Parameter.checkList(inds, Parameter.checkInt, [0, graph.getNumVertices()]) Parameter.checkBoolean(treeStats) numGraphs = len(subgraphIndices) statsDictList = [] for i in range(numGraphs): Util.printIteration(i, self.vectorPrintStep, numGraphs) subgraph = graph.subgraph(subgraphIndices[i]) statsDictList.append( self.vectorStatistics(subgraph, treeStats, eigenStats)) return statsDictList
def sequenceScalarStats(self, graph, subgraphIndices, slowStats=True, treeStats=False): """ Pass in a graph and list of subgraph indices and returns a series of statistics. Each row corresponds to the statistics on the subgraph. """ Parameter.checkClass(graph, AbstractMatrixGraph) for inds in subgraphIndices: Parameter.checkList(inds, Parameter.checkInt, [0, graph.getNumVertices()]) Parameter.checkBoolean(slowStats) Parameter.checkBoolean(treeStats) numGraphs = len(subgraphIndices) statsMatrix = numpy.zeros((numGraphs, self.numStats)) for i in range(numGraphs): Util.printIteration(i, self.printStep, numGraphs) #logging.debug("Subgraph size: " + str(len(subgraphIndices[i]))) subgraph = graph.subgraph(subgraphIndices[i]) statsMatrix[i, :] = self.scalarStatistics(subgraph, slowStats, treeStats) return statsMatrix
def vectorStatistics(self, graph, treeStats=False, eigenStats=True): """ Find a series of statistics for the given input graph which can be represented as vector values. """ Parameter.checkClass(graph, AbstractMatrixGraph) Parameter.checkBoolean(treeStats) statsDict = {} statsDict["inDegreeDist"] = graph.inDegreeDistribution() statsDict["outDegreeDist"] = graph.degreeDistribution() logging.debug("Computing hop counts") P = graph.findAllDistances(False) statsDict["hopCount"] = graph.hopCount(P) logging.debug("Computing triangle count") if graph.getNumVertices() != 0: statsDict["triangleDist"] = numpy.bincount( graph.triangleSequence()) else: statsDict["triangleDist"] = numpy.array([]) #Get the distribution of component sizes logging.debug("Finding distribution of component sizes") if graph.isUndirected(): components = graph.findConnectedComponents() if len(components) != 0: statsDict["componentsDist"] = numpy.bincount( numpy.array([len(c) for c in components], numpy.int)) #Make sure weight matrix is symmetric if graph.getNumVertices() != 0 and eigenStats: logging.debug("Computing eigenvalues/vectors") W = graph.getWeightMatrix() W = (W + W.T) / 2 eigenDistribution, V = numpy.linalg.eig(W) i = numpy.argmax(eigenDistribution) statsDict["maxEigVector"] = V[:, i] statsDict["eigenDist"] = numpy.flipud( numpy.sort(eigenDistribution[eigenDistribution > 0])) gc.collect() else: statsDict["maxEigVector"] = numpy.array([]) statsDict["eigenDist"] = numpy.array([]) if treeStats: logging.debug("Computing statistics on trees") trees = graph.findTrees() statsDict["treeSizesDist"] = numpy.bincount( [len(x) for x in trees]) treeDepths = [ GraphUtils.treeDepth((graph.subgraph(x))) for x in trees ] statsDict["treeDepthsDist"] = numpy.bincount(treeDepths) return statsDict
def checkBoolean(self): a = True b = False c = 0 d = 1 e = "s" Parameter.checkBoolean(a) Parameter.checkBoolean(b) self.assertRaises(ValueError, Parameter.checkBoolean, c) self.assertRaises(ValueError, Parameter.checkBoolean, d) self.assertRaises(ValueError, Parameter.checkBoolean, e)
def vectorStatistics(self, graph, treeStats=False, eigenStats=True): """ Find a series of statistics for the given input graph which can be represented as vector values. """ Parameter.checkClass(graph, AbstractMatrixGraph) Parameter.checkBoolean(treeStats) statsDict = {} statsDict["inDegreeDist"] = graph.inDegreeDistribution() statsDict["outDegreeDist"] = graph.degreeDistribution() logging.debug("Computing hop counts") P = graph.findAllDistances(False) statsDict["hopCount"] = graph.hopCount(P) logging.debug("Computing triangle count") if graph.getNumVertices() != 0: statsDict["triangleDist"] = numpy.bincount(graph.triangleSequence()) else: statsDict["triangleDist"] = numpy.array([]) #Get the distribution of component sizes logging.debug("Finding distribution of component sizes") if graph.isUndirected(): components = graph.findConnectedComponents() if len(components) != 0: statsDict["componentsDist"] = numpy.bincount(numpy.array([len(c) for c in components], numpy.int)) #Make sure weight matrix is symmetric if graph.getNumVertices()!=0 and eigenStats: logging.debug("Computing eigenvalues/vectors") W = graph.getWeightMatrix() W = (W + W.T)/2 eigenDistribution, V = numpy.linalg.eig(W) i = numpy.argmax(eigenDistribution) statsDict["maxEigVector"] = V[:, i] statsDict["eigenDist"] = numpy.flipud(numpy.sort(eigenDistribution[eigenDistribution>0])) gc.collect() else: statsDict["maxEigVector"] = numpy.array([]) statsDict["eigenDist"] = numpy.array([]) if treeStats: logging.debug("Computing statistics on trees") trees = graph.findTrees() statsDict["treeSizesDist"] = numpy.bincount([len(x) for x in trees]) treeDepths = [GraphUtils.treeDepth((graph.subgraph(x))) for x in trees] statsDict["treeDepthsDist"] = numpy.bincount(treeDepths) return statsDict
def __init__(self, vertices, undirected=True, W=None, sizeHint=1000): """ Create a PySparseGraph with a given AbstractVertexList or number of vertices, and specify whether it is directed. One can optionally pass in a sparse matrix W which is used as the weight matrix of the graph. Different kinds of sparse matrix can impact the speed of various operations. The currently supported sparse matrix types are: ll_mat. :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList. :param undirected: a boolean variable to indicate if the graph is undirected. :type undirected: :class:`boolean` :param W: a square sparse matrix of the same size as the number of vertices, or None to create the default one. :param sizeHint: the expected number of edges in the graph for efficient memory usage. :type sizeHint: :class:`int` """ Parameter.checkBoolean(undirected) if isinstance(vertices, AbstractVertexList): self.vList = vertices elif isinstance(vertices, int): self.vList = GeneralVertexList(vertices) else: raise ValueError("Invalid vList parameter: " + str(vertices)) if W != None and not (isinstance(W, spmatrix.LLMatType) and W.shape == (len(self.vList), len(self.vList))): raise ValueError( "Input argument W must be None or spmatrix.ll_mat of size " + str(len(self.vList))) self.undirected = undirected if W == None: #Should use ll_mat_sym for undirected graphs but it has several unimplemented methods self.W = spmatrix.ll_mat(len(self.vList), len(self.vList), sizeHint) else: self.W = W #The next line is for error checking mainly self.setWeightMatrix(W)
def sequenceVectorStats(self, graph, subgraphIndices, treeStats=False, eigenStats=True): """ Pass in a list of graphs are returns a series of statistics. Each list element is a dict of vector statistics. """ Parameter.checkClass(graph, AbstractMatrixGraph) for inds in subgraphIndices: Parameter.checkList(inds, Parameter.checkInt, [0, graph.getNumVertices()]) Parameter.checkBoolean(treeStats) numGraphs = len(subgraphIndices) statsDictList = [] for i in range(numGraphs): Util.printIteration(i, self.vectorPrintStep, numGraphs) subgraph = graph.subgraph(subgraphIndices[i]) statsDictList.append(self.vectorStatistics(subgraph, treeStats, eigenStats)) return statsDictList
def meanSeqScalarStats(self, graphList, slowStats=True, treeStats=False): """ Pass in a list of tuples (graph, subgraphIndices) and returns a series of statistics. Each row corresponds to the statistics on the subgraph. All graphs must be the same size and computed from the same distribution, and the number of subgraphs must be the same. """ Parameter.checkBoolean(slowStats) Parameter.checkBoolean(treeStats) if len(graphList)==0: return -1 numGraphs = len(graphList) numSubgraphs = len(graphList[0][1]) statsMatrix = numpy.zeros((numSubgraphs, self.numStats, numGraphs)) for i in range(len(graphList)): (graph, subgraphIndices) = graphList[i] statsMatrix[:, :, i] = self.sequenceScalarStats(graph, subgraphIndices, slowStats, treeStats) return numpy.mean(statsMatrix, 2), numpy.std(statsMatrix, 2)
def sequenceScalarStats(self, graph, subgraphIndices, slowStats=True, treeStats=False): """ Pass in a graph and list of subgraph indices and returns a series of statistics. Each row corresponds to the statistics on the subgraph. """ Parameter.checkClass(graph, AbstractMatrixGraph) for inds in subgraphIndices: Parameter.checkList(inds, Parameter.checkInt, [0, graph.getNumVertices()]) Parameter.checkBoolean(slowStats) Parameter.checkBoolean(treeStats) numGraphs = len(subgraphIndices) statsMatrix = numpy.zeros((numGraphs, self.numStats)) for i in range(numGraphs): Util.printIteration(i, self.printStep, numGraphs) logging.debug("Subgraph size: " + str(len(subgraphIndices[i]))) subgraph = graph.subgraph(subgraphIndices[i]) statsMatrix[i, :] = self.scalarStatistics(subgraph, slowStats, treeStats) return statsMatrix
def __init__(self, vertices, undirected=True, W=None, dtype=numpy.float): """ Create a DenseGraph with a given AbstractVertexList or number of vertices, and specify whether it is directed. One can optionally pass in a numpy array W which is used as the weight matrix of the graph. :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList. :param undirected: a boolean variable to indicate if the graph is undirected. :type undirected: :class:`boolean` :param W: a numpy array of the same size as vertices, or None to create the default one. :param dtype: the data type of the weight matrix if W is not specified e.g numpy.int8. """ Parameter.checkBoolean(undirected) if isinstance(vertices, AbstractVertexList): self.vList = vertices elif isinstance(vertices, int): self.vList = GeneralVertexList(vertices) else: raise ValueError("Invalid vList parameter: " + str(vertices)) if W != None and not (isinstance(W, numpy.ndarray) and W.shape == (len(self.vList), len(self.vList))): raise ValueError( "Input argument W must be None or numpy array of size " + str(len(self.vList))) self.undirected = undirected if W == None: self.W = numpy.zeros((len(self.vList), len(self.vList)), dtype=dtype) else: self.W = W #The next line is for error checking mainly self.setWeightMatrix(W)
def meanSeqScalarStats(self, graphList, slowStats=True, treeStats=False): """ Pass in a list of tuples (graph, subgraphIndices) and returns a series of statistics. Each row corresponds to the statistics on the subgraph. All graphs must be the same size and computed from the same distribution, and the number of subgraphs must be the same. """ Parameter.checkBoolean(slowStats) Parameter.checkBoolean(treeStats) if len(graphList) == 0: return -1 numGraphs = len(graphList) numSubgraphs = len(graphList[0][1]) statsMatrix = numpy.zeros((numSubgraphs, self.numStats, numGraphs)) for i in range(len(graphList)): (graph, subgraphIndices) = graphList[i] statsMatrix[:, :, i] = self.sequenceScalarStats(graph, subgraphIndices, slowStats, treeStats) return numpy.mean(statsMatrix, 2), numpy.std(statsMatrix, 2)
def __init__(self, vertices, undirected=True, dtype=numpy.float): """ Create a sparse graph using sppy csarray with a given AbstractVertexList, and specify whether directed. :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList. :param undirected: a boolean variable to indicate if the graph is undirected. :type undirected: :class:`boolean` :param dtype: the data type for the weight matrix, e.g numpy.int8. """ Parameter.checkBoolean(undirected) if isinstance(vertices, AbstractVertexList): self.vList = vertices elif isinstance(vertices, int): self.vList = GeneralVertexList(vertices) else: raise ValueError("Invalid vList parameter: " + str(vertices)) self.W = sppy.csarray((self.vList.getNumVertices(), self.vList.getNumVertices()), dtype) self.undirected = undirected
def __init__(self, vertices, undirected=True, W=None, sizeHint=1000): """ Create a PySparseGraph with a given AbstractVertexList or number of vertices, and specify whether it is directed. One can optionally pass in a sparse matrix W which is used as the weight matrix of the graph. Different kinds of sparse matrix can impact the speed of various operations. The currently supported sparse matrix types are: ll_mat. :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList. :param undirected: a boolean variable to indicate if the graph is undirected. :type undirected: :class:`boolean` :param W: a square sparse matrix of the same size as the number of vertices, or None to create the default one. :param sizeHint: the expected number of edges in the graph for efficient memory usage. :type sizeHint: :class:`int` """ Parameter.checkBoolean(undirected) if isinstance(vertices, AbstractVertexList): self.vList = vertices elif isinstance(vertices, int): self.vList = GeneralVertexList(vertices) else: raise ValueError("Invalid vList parameter: " + str(vertices)) if W != None and not (isinstance(W, spmatrix.LLMatType) and W.shape == (len(self.vList), len(self.vList))): raise ValueError("Input argument W must be None or spmatrix.ll_mat of size " + str(len(self.vList))) self.undirected = undirected if W == None: #Should use ll_mat_sym for undirected graphs but it has several unimplemented methods self.W = spmatrix.ll_mat(len(self.vList), len(self.vList), sizeHint) else: self.W = W #The next line is for error checking mainly self.setWeightMatrix(W)
def __init__(self, vertices, undirected=True, dtype=numpy.float): """ Create a sparse graph using sppy csarray with a given AbstractVertexList, and specify whether directed. :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList. :param undirected: a boolean variable to indicate if the graph is undirected. :type undirected: :class:`boolean` :param dtype: the data type for the weight matrix, e.g numpy.int8. """ Parameter.checkBoolean(undirected) if isinstance(vertices, AbstractVertexList): self.vList = vertices elif isinstance(vertices, int): self.vList = GeneralVertexList(vertices) else: raise ValueError("Invalid vList parameter: " + str(vertices)) self.W = sppy.csarray( (self.vList.getNumVertices(), self.vList.getNumVertices()), dtype) self.undirected = undirected
def __init__(self, vertices, undirected=True, W=None, dtype=numpy.float): """ Create a DenseGraph with a given AbstractVertexList or number of vertices, and specify whether it is directed. One can optionally pass in a numpy array W which is used as the weight matrix of the graph. :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList. :param undirected: a boolean variable to indicate if the graph is undirected. :type undirected: :class:`boolean` :param W: a numpy array of the same size as vertices, or None to create the default one. :param dtype: the data type of the weight matrix if W is not specified e.g numpy.int8. """ Parameter.checkBoolean(undirected) if isinstance(vertices, AbstractVertexList): self.vList = vertices elif isinstance(vertices, int): self.vList = GeneralVertexList(vertices) else: raise ValueError("Invalid vList parameter: " + str(vertices)) if W != None and not (isinstance(W, numpy.ndarray) and W.shape == (len(self.vList), len(self.vList))): raise ValueError("Input argument W must be None or numpy array of size " + str(len(self.vList))) self.undirected = undirected if W == None: self.W = numpy.zeros((len(self.vList), len(self.vList)), dtype=dtype) else: self.W = W #The next line is for error checking mainly self.setWeightMatrix(W)
def setIsLeafNode(self, leafNode): Parameter.checkBoolean(leafNode) self.leafNode = leafNode
def setPure(self, pure): Parameter.checkBoolean(pure) self.pure = pure
def scalarStatistics(self, graph, slowStats=True, treeStats=False): """ Find a series of statistics for the given input graph which can be represented as scalar values. Return results as a vector. """ #This method is a bit of a mess Parameter.checkClass(graph, AbstractSingleGraph) Parameter.checkBoolean(slowStats) Parameter.checkBoolean(treeStats) statsArray = numpy.ones(self.numStats) * -1 statsArray[self.numVerticesIndex] = graph.getNumVertices() statsArray[self.numEdgesIndex] = graph.getNumEdges() statsArray[self.numDirEdgesIndex] = graph.getNumDirEdges() statsArray[self.densityIndex] = graph.density() if graph.isUndirected(): subComponents = graph.findConnectedComponents() statsArray[self.numComponentsIndex] = len(subComponents) nonSingletonSubComponents = [ c for c in subComponents if len(c) > 1 ] statsArray[self.numNonSingletonComponentsIndex] = len( nonSingletonSubComponents) triOrMoreSubComponents = [c for c in subComponents if len(c) > 2] statsArray[self.numTriOrMoreComponentsIndex] = len( triOrMoreSubComponents) #logging.debug("Studying max component") if len(subComponents) != 0: maxCompGraph = graph.subgraph(list(subComponents[0])) statsArray[self.maxComponentSizeIndex] = len(subComponents[0]) if len(subComponents) >= 2: statsArray[self.secondComponentSizeIndex] = len( subComponents[1]) statsArray[ self.maxComponentEdgesIndex] = maxCompGraph.getNumEdges() statsArray[self.meanComponentSizeIndex] = sum([ len(x) for x in subComponents ]) / float(statsArray[self.numComponentsIndex]) statsArray[self.maxCompMeanDegreeIndex] = numpy.mean( maxCompGraph.outDegreeSequence()) else: statsArray[self.maxComponentSizeIndex] = 0 statsArray[self.maxComponentEdgesIndex] = 0 statsArray[self.meanComponentSizeIndex] = 0 statsArray[self.geodesicDistMaxCompIndex] = 0 if graph.getNumVertices() != 0: statsArray[self.meanDegreeIndex] = numpy.mean( graph.outDegreeSequence()) else: statsArray[self.meanDegreeIndex] = 0 if slowStats: if self.useFloydWarshall: logging.debug("Running Floyd-Warshall") P = graph.floydWarshall(False) else: logging.debug("Running Dijkstra's algorithm") P = graph.findAllDistances(False) statsArray[self.diameterIndex] = graph.diameter(P=P) statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter( self.q, P=P) statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0] statsArray[self.geodesicDistanceIndex] = graph.geodesicDistance( P=P) statsArray[ self. harmonicGeoDistanceIndex] = graph.harmonicGeodesicDistance(P=P) if graph.isUndirected() and len(subComponents) != 0: statsArray[ self.geodesicDistMaxCompIndex] = graph.geodesicDistance( P=P, vertexInds=list(subComponents[0])) if treeStats: logging.debug("Computing statistics on trees") trees = graph.findTrees() statsArray[self.numTreesIndex] = len(trees) nonSingletonTrees = [c for c in trees if len(c) > 1] statsArray[self.numNonSingletonTreesIndex] = len(nonSingletonTrees) statsArray[self.meanTreeSizeIndex] = numpy.mean( [len(x) for x in trees]) treeDepths = [ GraphUtils.treeDepth((graph.subgraph(list(x)))) for x in trees ] statsArray[self.meanTreeDepthIndex] = numpy.mean(treeDepths) if len(trees) != 0: maxTreeGraph = graph.subgraph(trees[0]) statsArray[self.maxTreeSizeIndex] = len(trees[0]) statsArray[self.maxTreeDepthIndex] = GraphUtils.treeDepth( maxTreeGraph) if len(trees) >= 2: secondTreeGraph = graph.subgraph(trees[1]) statsArray[self.secondTreeSizeIndex] = len(trees[1]) statsArray[ self.secondTreeDepthIndex] = GraphUtils.treeDepth( secondTreeGraph) return statsArray
def __init__(self, vertices, undirected=True, W=None, dtype=numpy.float, frmt="csr"): """ Create a SparseGraph with a given AbstractVertexList or number of vertices, and specify whether it is directed. One can optionally pass in a sparse matrix W which is used as the weight matrix of the graph. Different kinds of sparse matrix can impact the speed of various operations. The currently supported sparse matrix types are: lil_matrix, csr_matrix, csc_matrix and dok_matrix. The default sparse matrix is csr_matrix. :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList. :param undirected: a boolean variable to indicate if the graph is undirected. :type undirected: :class:`boolean` :param W: a square sparse matrix of the same size as the number of vertices, or None to create the default one. :param dtype: the data type of the sparse matrix if W is not specified. :param frmt: the format of the sparse matrix: lil, csr or csc if W is not specified """ Parameter.checkBoolean(undirected) if isinstance(vertices, AbstractVertexList): self.vList = vertices elif isinstance(vertices, int): self.vList = GeneralVertexList(vertices) else: raise ValueError("Invalid vList parameter: " + str(vertices)) if W != None and not (sparse.issparse(W) and W.shape == ( self.vList.getNumVertices(), self.vList.getNumVertices())): raise ValueError( "Input argument W must be None or sparse matrix of size " + str(self.vList.getNumVertices())) self.undirected = undirected if frmt == "lil": matrix = sparse.lil_matrix elif frmt == "csr": matrix = sparse.csr_matrix elif frmt == "csc": matrix = sparse.csc_matrix else: raise ValueError("Invalid sparse matrix format: " + frmt) #Terrible hack alert: can't create a zero size sparse matrix, so we settle #for one of size 1. Better is to create a new class. if self.vList.getNumVertices() == 0 and W == None: self.W = matrix((1, 1), dtype=dtype) elif W == None: self.W = matrix( (self.vList.getNumVertices(), self.vList.getNumVertices()), dtype=dtype) else: self.W = W #The next line is for error checking mainly self.setWeightMatrix(W)
def scalarStatistics(self, graph, slowStats=True, treeStats=False): """ Find a series of statistics for the given input graph which can be represented as scalar values. Return results as a vector. """ #This method is a bit of a mess Parameter.checkClass(graph, AbstractSingleGraph) Parameter.checkBoolean(slowStats) Parameter.checkBoolean(treeStats) statsArray = numpy.ones(self.numStats)*-1 statsArray[self.numVerticesIndex] = graph.getNumVertices() statsArray[self.numEdgesIndex] = graph.getNumEdges() statsArray[self.numDirEdgesIndex] = graph.getNumDirEdges() statsArray[self.densityIndex] = graph.density() if graph.isUndirected(): logging.debug("Finding connected components") subComponents = graph.findConnectedComponents() logging.debug("Done") statsArray[self.numComponentsIndex] = len(subComponents) nonSingletonSubComponents = [c for c in subComponents if len(c) > 1] statsArray[self.numNonSingletonComponentsIndex] = len(nonSingletonSubComponents) triOrMoreSubComponents = [c for c in subComponents if len(c) > 2] statsArray[self.numTriOrMoreComponentsIndex] = len(triOrMoreSubComponents) logging.debug("Studying max component") if len(subComponents) != 0: maxCompGraph = graph.subgraph(list(subComponents[0])) statsArray[self.maxComponentSizeIndex] = len(subComponents[0]) if len(subComponents) >= 2: statsArray[self.secondComponentSizeIndex] = len(subComponents[1]) statsArray[self.maxComponentEdgesIndex] = maxCompGraph.getNumEdges() statsArray[self.meanComponentSizeIndex] = sum([len(x) for x in subComponents])/float(statsArray[self.numComponentsIndex]) statsArray[self.maxCompMeanDegreeIndex] = numpy.mean(maxCompGraph.outDegreeSequence()) else: statsArray[self.maxComponentSizeIndex] = 0 statsArray[self.maxComponentEdgesIndex] = 0 statsArray[self.meanComponentSizeIndex] = 0 statsArray[self.geodesicDistMaxCompIndex] = 0 if graph.getNumVertices() != 0: statsArray[self.meanDegreeIndex] = numpy.mean(graph.outDegreeSequence()) else: statsArray[self.meanDegreeIndex] = 0 if slowStats: if self.useFloydWarshall: logging.debug("Running Floyd-Warshall") P = graph.floydWarshall(False) else: logging.debug("Running Dijkstra's algorithm") P = graph.findAllDistances(False) statsArray[self.diameterIndex] = graph.diameter(P=P) statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter(self.q, P=P) statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0] statsArray[self.geodesicDistanceIndex] = graph.geodesicDistance(P=P) statsArray[self.harmonicGeoDistanceIndex] = graph.harmonicGeodesicDistance(P=P) if graph.isUndirected() and len(subComponents) != 0: statsArray[self.geodesicDistMaxCompIndex] = graph.geodesicDistance(P=P, vertexInds=list(subComponents[0])) if treeStats: logging.debug("Computing statistics on trees") trees = graph.findTrees() statsArray[self.numTreesIndex] = len(trees) nonSingletonTrees = [c for c in trees if len(c) > 1] statsArray[self.numNonSingletonTreesIndex] = len(nonSingletonTrees) statsArray[self.meanTreeSizeIndex] = numpy.mean([len(x) for x in trees]) treeDepths = [GraphUtils.treeDepth((graph.subgraph(list(x)))) for x in trees] statsArray[self.meanTreeDepthIndex] = numpy.mean(treeDepths) if len(trees) != 0: maxTreeGraph = graph.subgraph(trees[0]) statsArray[self.maxTreeSizeIndex] = len(trees[0]) statsArray[self.maxTreeDepthIndex] = GraphUtils.treeDepth(maxTreeGraph) if len(trees) >= 2: secondTreeGraph = graph.subgraph(trees[1]) statsArray[self.secondTreeSizeIndex] = len(trees[1]) statsArray[self.secondTreeDepthIndex] = GraphUtils.treeDepth(secondTreeGraph) return statsArray
def scalarStatistics(self, graph, slowStats=True, treeStats=False): """ Find a series of statistics for the given input graph which can be represented as scalar values. Return results as a vector. """ if graph.is_directed(): raise ValueError("Only works on undirected graphs") #This method is a bit of a mess Parameter.checkBoolean(slowStats) Parameter.checkBoolean(treeStats) statsArray = numpy.ones(self.numStats)*-1 statsArray[self.numVerticesIndex] = graph.vcount() statsArray[self.numEdgesIndex] = graph.ecount() statsArray[self.numDirEdgesIndex] = graph.as_directed().ecount() statsArray[self.densityIndex] = graph.density() logging.debug("Finding connected components") subComponents = graph.components() logging.debug("Done") statsArray[self.numComponentsIndex] = len(subComponents) nonSingletonSubComponents = [c for c in subComponents if len(c) > 1] statsArray[self.numNonSingletonComponentsIndex] = len(nonSingletonSubComponents) triOrMoreSubComponents = [c for c in subComponents if len(c) > 2] statsArray[self.numTriOrMoreComponentsIndex] = len(triOrMoreSubComponents) componentSizes = numpy.array([len(c) for c in subComponents]) inds = numpy.flipud(numpy.argsort(componentSizes)) logging.debug("Studying max component") if len(subComponents) != 0: maxCompGraph = graph.subgraph(subComponents[inds[0]]) statsArray[self.maxComponentSizeIndex] = len(subComponents[inds[0]]) if len(subComponents) >= 2: statsArray[self.secondComponentSizeIndex] = len(subComponents[inds[1]]) statsArray[self.maxComponentEdgesIndex] = maxCompGraph.ecount() statsArray[self.meanComponentSizeIndex] = componentSizes.mean() statsArray[self.maxCompMeanDegreeIndex] = numpy.mean(maxCompGraph.degree(mode=igraph.OUT)) else: statsArray[self.maxComponentSizeIndex] = 0 statsArray[self.maxComponentEdgesIndex] = 0 statsArray[self.meanComponentSizeIndex] = 0 statsArray[self.geodesicDistMaxCompIndex] = 0 if graph.vcount() != 0: statsArray[self.meanDegreeIndex] = numpy.mean(graph.degree(mode=igraph.OUT)) else: statsArray[self.meanDegreeIndex] = 0 if slowStats: logging.debug("Computing diameter") statsArray[self.diameterIndex] = graph.diameter() #statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter(self.q, P=P) #statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0] logging.debug("Computing geodesic distance") statsArray[self.geodesicDistanceIndex] = graph.average_path_length() if len(subComponents) != 0: statsArray[self.geodesicDistMaxCompIndex] = graph.average_path_length(P=P, vertexInds=list(subComponents[inds[0]])) return statsArray