def main(): parser = argparse.ArgumentParser( description="Convert a directory of CSC graphs to graphml format") parser.add_argument( "directory", action="store", nargs="+", help= "The directory(ies) we should run on. *Don't add '/' to end of dir name" ) parser.add_argument("-w", "--weighted", action="store_true", help="Pass flag if the graphs are weighted") result = parser.parse_args() for dircty in result.directory: new_dir = dircty + "_graphml" if not os.path.exists(new_dir): os.makedirs(new_dir) print "Making dir %s ..." % new_dir else: print "Dir %s already exists ..." % new_dir for fn in glob(os.path.join(dircty, "*.mat")): print "Converting %s ..." % fn new_fn = os.path.join( new_dir, os.path.splitext(os.path.basename(fn))[0] + ".graphml") print "Creating %s ..." % new_fn graphml_adapter.csc_to_graphml(loadAnyMat(fn), is_weighted=result.weighted, desikan=True, is_directed=False, save_fn=new_fn)
def convert_graph(gfn, informat, save_dir, *outformats): """ Convert between igraph supported formats. No conversion to MAT or NPY available. Positional arguments: ==================== gfn - the graph file name informat - the input format of the graph save_dir - the directory where we save result to outformat - a list of output formats """ try: if informat in ["graphml", "ncol", "edgelist", "lgl", "pajek", "graphdb"]: g = igraph.read(gfn, None) elif informat == "mat": g = csc_to_igraph(loadAnyMat(gfn)) elif informat == "npy": g = csc_to_igraph(np.load(gfn).item()) else: err_msg = "[ERROR]: Unknown format '%s'. Please check format and re-try!" % informat print err_msg return err_msg except Exception, err_msg: print err_msg return "[ERROR]: "+str(err_msg)
def loadAdjMat(G_fn, lcc_fn): """ Load adjacency matrix given lcc_fn & G_fn. lcc has z-indicies corresponding to the lcc. positional args: ================ G_fn - the .mat file holding graph lcc_fn - the largest connected component .npy z-ordering returns: ======= G_lcc - The largest connected component of a graph """ start = time() print "Loading adjacency matrix..." try: vcc = lcc.ConnectedComponent(fn = lcc_fn) # creates conn_comp array G_full = loadAnyMat(G_fn) # sio.loadmat(G_fn)['fibergraph'] # load the full sparse graph G_lcc = vcc.induced_subgraph(G_full) # sparse graph of LCC G_lcc = G_lcc + G_lcc.T # Symmetrize except Exception: if not os.path.exists(lcc_fn): print "[IOError]: Lcc: %s Doesn't exist" % lcc_fn sys.exit(-1) if not os.path.exists(G_fn): print "[IOError]: Graph: %s Doesn't exist" % G_fn sys.exit(-1) print "Time to load: %s secs" % (time()-start) return G_lcc
def convert_graph(gfn, informat, save_dir, *outformats): """ Convert between igraph supported formats. No conversion to MAT or NPY available. Positional arguments: ==================== gfn - the graph file name informat - the input format of the graph save_dir - the directory where we save result to outformat - a list of output formats """ try: if informat in [ "graphml", "ncol", "edgelist", "lgl", "pajek", "graphdb" ]: g = igraph.read(gfn, None) elif informat == "mat": g = csc_to_igraph(loadAnyMat(gfn)) elif informat == "npy": g = csc_to_igraph(np.load(gfn).item()) elif informat == "attredge": g = attredge_to_igraph(gfn) else: err_msg = "[ERROR]: Unknown format '%s'. Please check format and retry!" % informat print err_msg return (None, err_msg) except Exception, err_msg: print err_msg return (None, "[ERROR]: " + str(err_msg))
def compute(inv_dict, sep_save=True, gformat="graphml"): """ Actual function that computes invariants and saves them to a location positional arguments: ===================== inv_dict: is a dict that must contain: - inv_dict["graph_fn"] inv_dict: optional arguments: - inv_dict["edge"]: boolean for global edge count - inv_dict["ver"]: boolean for global vertex number - inv_dict["tri"]: boolean for local triangle count - inv_dict["eig"]: boolean for eigenvalues and eigenvectors - inv_dict["deg"]: boolean for local degree count - inv_dict["ss1"]: boolean for scan 1 statistic - inv_dict["cc"]: boolean for clustering coefficient - inv_dict["mad"]: boolean for maximum average degree - inv_dict["k]: the number of eigenvalues to compute - inv_dict["save_dir"]: the base path where all invariants will create sub-dirs & be should be saved gformat - INPUT format of the graph optional arguments: =================== sep_save: boolean for auto save or not """ if inv_dict.get("save_dir", None) is None: inv_dict["save_dir"] = os.path.join(os.path.dirname(inv_dict["graph_fn"]), "graphInvariants") if not os.path.exists(inv_dict["save_dir"]): os.makedirs(inv_dict["save_dir"]) if inv_dict.has_key("G"): if inv_dict["G"] is not None: G = inv_dict["G"] else: try: if gformat in [ "edgelist", "pajek", "ncol", "lgl", "graphml", "gml", "dot", "leda", ]: # All igraph supported formats G = r_igraph_load_graph(inv_dict["graph_fn"], gformat) elif gformat == "mat": G = csc_to_r_igraph(loadAnyMat(inv_dict["graph_fn"])) if isinstance(G, str): return G # There was a loading error except Exception, err_msg: return str(err_msg)
def compute(inv_dict, sep_save=True, gformat="graphml"): """ Actual function that computes invariants and saves them to a location positional arguments: ===================== inv_dict: is a dict that must contain: - inv_dict["graph_fn"] inv_dict: optional arguments: - inv_dict["edge"]: boolean for global edge count - inv_dict["ver"]: boolean for global vertex number - inv_dict["tri"]: boolean for local triangle count - inv_dict["eig"]: boolean for eigenvalues and eigenvectors - inv_dict["deg"]: boolean for local degree count - inv_dict["ss1"]: boolean for scan 1 statistic - inv_dict["cc"]: boolean for clustering coefficient - inv_dict["mad"]: boolean for maximum average degree - inv_dict["k]: the number of eigenvalues to compute - inv_dict["save_dir"]: the base path where all invariants will create sub-dirs & be should be saved gformat - INPUT format of the graph optional arguments: =================== sep_save: boolean for auto save or not """ if inv_dict.get("save_dir", None) is None: inv_dict["save_dir"] = os.path.join( os.path.dirname(inv_dict["graph_fn"]), "graphInvariants") if not os.path.exists(inv_dict["save_dir"]): os.makedirs(inv_dict["save_dir"]) if inv_dict.has_key("G"): if inv_dict["G"] is not None: G = inv_dict["G"] else: try: if gformat in [ "edgelist", "pajek", "ncol", "lgl", "graphml", "gml", "dot", "leda" ]: # All igraph supported formats G = r_igraph_load_graph(inv_dict["graph_fn"], gformat) elif gformat == "mat": G = csc_to_r_igraph(loadAnyMat(inv_dict['graph_fn'])) if isinstance(G, str): return G # There was a loading error except Exception, err_msg: return str(err_msg)
def main(): parser = argparse.ArgumentParser(description="Convert a directory of CSC graphs to graphml format") parser.add_argument("directory", action="store", nargs="+", help="The directory(ies) we should run on. *Don't add '/' to end of dir name") parser.add_argument("-w", "--weighted", action="store_true", help="Pass flag if the graphs are weighted") result = parser.parse_args() for dircty in result.directory: new_dir = dircty+"_graphml" if not os.path.exists(new_dir): os.makedirs(new_dir); print "Making dir %s ..." % new_dir else: print "Dir %s already exists ..." % new_dir for fn in glob(os.path.join(dircty, "*.mat")): print "Converting %s ..." % fn new_fn = os.path.join(new_dir, os.path.splitext(os.path.basename(fn))[0]+".graphml") print "Creating %s ..." % new_fn graphml_adapter.csc_to_graphml(loadAnyMat(fn), is_weighted=result.weighted, desikan=True, is_directed=False, save_fn=new_fn)
def check_graph(g_fn, savedir): limits = (182, 218, 182) # HARD-CODED BOUNDS g = loadAnyMat(g_fn) vset = set() fset = set() nonzero = g.nonzero() LEN_NNZ = nonzero[1].shape[0] print "Adding nnz vertices to set ..." for idx in xrange(LEN_NNZ): vset.add(nonzero[0][idx]) vset.add(nonzero[1][idx]) if idx % 100000 == 0: print "Processed %d/%d ..." % (idx, LEN_NNZ) print "Checking if vertices are outside labels ..." for cnt, vertex in enumerate(vset): x, y, z = MortonXYZ(vertex) if x > limits[0] or y > limits[1] or z > limits[2]: print "Vertex %d has fibers and is outside the label mask ..." % vertex fset.add(vertex) if cnt % 300000 == 0: print "Processed %d/%d ..." % (cnt, len(vset)) if fset: print "Writing %d ill-placed vertices to disk as json..." % len(fset) f = open( os.path.join(savedir, os.path.splitext(os.path.basename(g_fn))[0] + ".json"), "wb") f.write(str(list(fset))) f.close() else: print "No vertices outside the label mask!"
def loadAdjMat(G_fn, lcc_fn): """ Load adjacency matrix given lcc_fn & G_fn. lcc has z-indicies corresponding to the lcc. positional args: ================ G_fn - the .mat file holding graph lcc_fn - the largest connected component .npy z-ordering returns: ======= G_lcc - The largest connected component of a graph """ start = time() print "Loading adjacency matrix..." try: vcc = lcc.ConnectedComponent(fn = lcc_fn) # creates conn_comp array G_full = loadAnyMat(G_fn) # sio.loadmat(G_fn)['fibergraph'] # load the full sparse graph G_lcc = vcc.induced_subgraph(G_full) # sparse graph of LCC G_lcc = G_lcc + G_lcc.T # Symmetrize print "Time to load Symmetrized LCC: %s secs" % (time()-start) return G_lcc except Exception, err: if not os.path.exists(lcc_fn): print "[IOError]: Lcc: %s Doesn't exist" % lcc_fn sys.exit(-1) if not os.path.exists(G_fn): print "[IOError]: Graph: %s Doesn't exist" % G_fn sys.exit(-1) else: print Exception, err raise Exception
def check_graph(g_fn, savedir): limits = (182, 218, 182) # HARD-CODED BOUNDS g = loadAnyMat(g_fn) vset = set() fset = set() nonzero = g.nonzero() LEN_NNZ = nonzero[1].shape[0] print "Adding nnz vertices to set ..." for idx in xrange(LEN_NNZ): vset.add(nonzero[0][idx]) vset.add(nonzero[1][idx]) if idx%100000==0: print "Processed %d/%d ..." % (idx, LEN_NNZ) print "Checking if vertices are outside labels ..." for cnt, vertex in enumerate(vset): x, y, z = MortonXYZ(vertex) if x > limits[0] or y > limits[1] or z > limits[2]: print "Vertex %d has fibers and is outside the label mask ..." % vertex fset.add(vertex) if cnt%300000==0: print "Processed %d/%d ..." % (cnt, len(vset)) if fset: print "Writing %d ill-placed vertices to disk as json..." % len(fset) f = open( os.path.join(savedir, os.path.splitext(os.path.basename(g_fn))[0]+".json"), "wb" ) f.write(str(list(fset))) f.close() else: print "No vertices outside the label mask!"
def compute(inv_dict, save=True): ''' @param inv_dict: is a dict optinally containing any of these: - inv_dict['edge']: boolean for global edge count - inv_dict['ver']: boolean for global vertex number - inv_dict['tri']: boolean for local triangle count - inv_dict['tri_fn']: the path of a precomputed triangle count (.npy) - inv_dict['eig']: boolean for eigenvalues and eigenvectors - inv_dict['eigvl_fn']: the path of a precomputed eigenvalues (.npy) - inv_dict['eigvect_fn']: the path of a precomputed eigenvectors (.npy) - inv_dict['deg']: boolean for local degree count - inv_dict['deg_fn']: the path of a precomputed triangle count (.npy) - inv_dict['ss1']: boolean for scan 1 statistic - inv_dict['cc']: boolean for clustering coefficient - inv_dict['mad']: boolean for maximum average degree - inv_dict['save_dir']: the base path where all invariants will create sub-dirs & be should be saved @param save: boolean for auto save or not. TODO: use this ''' # Popualate inv_dict inv_dict = populate_inv_dict(inv_dict) if inv_dict['save_dir'] is None: inv_dict['save_dir'] = os.path.dirname(inv_dict['graph_fn']) if (inv_dict.has_key('G')): if inv_dict['G'] is not None: G = inv_dict['G'] elif (inv_dict['graphsize'] == 'b' or inv_dict['graphsize'] == 'big'): G = loadAdjMat(inv_dict['graph_fn'], inv_dict['lcc_fn']) # TODO: test # small graphs else: G = loadAnyMat(inv_dict['graph_fn'], inv_dict['data_elem']) if isinstance(G, str): print G return G # Error message num_nodes = G.shape[0] # number of nodes # CC requires deg_fn and tri_fn. Load if available if inv_dict['cc']: # if either #tri or deg is undefined if not inv_dict['tri_fn']: inv_dict['tri'] = True if not inv_dict['deg_fn']: inv_dict['deg'] = True cc_array = np.zeros(num_nodes) # All invariants that require eigenvalues if ((inv_dict['tri'] and not inv_dict['tri_fn']) or (inv_dict['mad'])): if not inv_dict['eigvl_fn']: inv_dict['eig'] = True # Only create arrays if the computation will be done if inv_dict['tri']: if inv_dict['tri_fn']: tri_array = np.load(inv_dict['tri_fn']) # load if precomputed else: tri_array = np.zeros(num_nodes) # local triangle count if inv_dict['deg'] or inv_dict['edge']: # edge is global number of edges inv_dict['deg'] = True if inv_dict['deg_fn']: deg_array = np.load(inv_dict['deg_fn']) else: deg_array = np.zeros(num_nodes) # Vertex degrees of all vertices if (inv_dict['ss1']): ss1_array = np.zeros(num_nodes) # Induced subgraph edge number i.e scan statistic if (not inv_dict['k'] or inv_dict['k'] > 100 or inv_dict['k'] > G.shape[0] - 2): k = 100 if G.shape[0]-2 > 101 else G.shape[0] - 2 # Maximum of 100 eigenvalues start = time() # Calculate Eigenvalues & Eigen vectors if inv_dict['eig']: if not (inv_dict['eigvl_fn'] or inv_dict['eigvect_fn']): l, u = arpack.eigs(G, k=k, which='LM') # LanczosMethod(A,0) print 'Time taken to calc Eigenvalues: %f secs\n' % (time() - start) else: try: l = np.load(inv_dict['eigvl_fn']) u = l = np.load(inv_dict['eigvect_fn']) except Exception: return "[IOERROR: ]Eigenvalues failed to load" # All other invariants start = time() #### For loop #### if (inv_dict['cc'] or inv_dict['ss1'] or (inv_dict['tri'] and not inv_dict['tri_fn'])\ or (inv_dict['deg'] and not inv_dict['deg_fn']) ): # one of the others for j in range(num_nodes): # tri if not inv_dict['tri_fn'] and inv_dict['tri']: # if this is still None we need to compute it tri_array[j] = abs(round((sum( np.power(l.real,3) * (u[j][:].real**2)) ) / 6.0)) # Divide by six because we count locally # ss1 & deg if inv_dict['ss1'] or (not inv_dict['deg_fn'] and inv_dict['deg']): nbors = G[:,j].nonzero()[0] # deg if (not inv_dict['deg_fn'] and inv_dict['deg']): deg_array[j] = nbors.shape[0] # ss1 if inv_dict['ss1']: if (nbors.shape[0] > 0): nbors_mat = G[:,nbors][nbors,:] ss1_array[j] = nbors.shape[0] + (nbors_mat.nnz/2.0) # scan stat 1 # Divide by two because of symmetric matrix else: ss1_array[j] = 0 # zero neighbors hence zero cardinality enduced subgraph # cc if inv_dict['cc']: if (deg_array[j] > 2): cc_array[j] = (2.0 * tri_array[j]) / ( deg_array[j] * (deg_array[j] - 1) ) # Jari et al else: cc_array[j] = 0 print 'Time taken to compute loop dependent invariants: %f secs\n' % (time() - start) ### End For ### # global edge if inv_dict['edge']: edge_count = deg_array.sum() # global vertices is num_nodes ''' MAD ''' if (inv_dict['mad']): max_ave_deg = np.max(l.real) # Computation complete - handle the saving now ... ''' Top eigenvalues & eigenvectors ''' if not inv_dict['eigvl_fn'] and inv_dict['eig'] : eigvDir = os.path.join(inv_dict['save_dir'], "Eigen") #if eigvDir is None else eigvDir # Immediately write eigs to file inv_dict['eigvl_fn'] = os.path.join(eigvDir, getBaseName(inv_dict['graph_fn']) + '_eigvl.npy') inv_dict['eigvect_fn'] = os.path.join(eigvDir, getBaseName(inv_dict['graph_fn']) + '_eigvect.npy') createSave(inv_dict['eigvl_fn'], l.real) # eigenvalues createSave(inv_dict['eigvect_fn'], u) # eigenvectors print 'Eigenvalues and eigenvectors saved ...' ''' Triangle count ''' if not inv_dict['tri_fn'] and inv_dict['tri']: triDir = os.path.join(inv_dict['save_dir'], "Triangle") #if triDir is None else triDir inv_dict['tri_fn'] = os.path.join(triDir, getBaseName(inv_dict['graph_fn']) + '_triangles.npy') # TODO HERE createSave(inv_dict['tri_fn'], tri_array) print 'Triangle Count saved ...' ''' Degree count''' if not inv_dict['deg_fn'] and inv_dict['deg']: degDir = os.path.join(inv_dict['save_dir'], "Degree") #if degDir is None else degDir inv_dict['deg_fn'] = os.path.join(degDir, getBaseName(inv_dict['graph_fn']) + '_degree.npy') createSave(inv_dict['deg_fn'], deg_array) print 'Degree saved ...' ''' MAD ''' if inv_dict['mad']: MADdir = os.path.join(inv_dict['save_dir'], "MAD") #if MADdir is None else MADdir inv_dict['mad_fn'] = os.path.join(MADdir, getBaseName(inv_dict['graph_fn']) + '_mad.npy') createSave(inv_dict['mad_fn'], max_ave_deg) print 'Maximum average Degree saved ...' ''' Scan Statistic 1''' if inv_dict['ss1']: ss1Dir = os.path.join(inv_dict['save_dir'], "SS1") #if ss1Dir is None else ss1Dir inv_dict['ss1_fn'] = os.path.join(ss1Dir, getBaseName(inv_dict['graph_fn']) + '_scanstat1.npy') createSave(inv_dict['ss1_fn'], ss1_array) # save it print 'Scan 1 statistic saved ...' ''' Clustering coefficient ''' if inv_dict['cc']: ccDir = os.path.join(inv_dict['save_dir'], "ClustCoeff") #if ccDir is None else ccDir inv_dict['cc_fn'] = os.path.join(ccDir, getBaseName(inv_dict['graph_fn']) + '_clustcoeff.npy') createSave(inv_dict['cc_fn'], cc_array) # save it print 'Clustering coefficient saved ...' ''' Global Vertices ''' if inv_dict['ver']: vertDir = os.path.join(inv_dict['save_dir'], "Globals") #if vertDir is None else vertDir inv_dict['ver_fn'] = os.path.join(vertDir, getBaseName(inv_dict['graph_fn']) + '_numvert.npy') createSave(inv_dict['ver_fn'], num_nodes) # save it print 'Global vertices number saved ...' ''' Global number of edges ''' if inv_dict['edge']: edgeDir = os.path.join(inv_dict['save_dir'], "Globals") #if edgeDir is None else edgeDir inv_dict['edge_fn'] = os.path.join(edgeDir, getBaseName(inv_dict['graph_fn']) + '_numedges.npy') createSave(inv_dict['edge_fn'], edge_count) # save it print 'Global edge number saved ...' #if test: # bench test # tri_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_triangles.npy') # eigvl_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_eigvl.npy') # eigvect_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_eigvect.npy') # MAD_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_MAD.npy') return inv_dict # TODO: Fix code this breaks. Originally was [tri_fn, deg_fn, MAD_fn, eigvl_fn, eigvect_fn]
def compute(inv_dict, save=True): ''' @param inv_dict: is a dict optinally containing any of these: - inv_dict['edge']: boolean for global edge count - inv_dict['ver']: boolean for global vertex number - inv_dict['tri']: boolean for local triangle count - inv_dict['tri_fn']: the path of a precomputed triangle count (.npy) - inv_dict['eig']: boolean for eigenvalues and eigenvectors - inv_dict['eigvl_fn']: the path of a precomputed eigenvalues (.npy) - inv_dict['eigvect_fn']: the path of a precomputed eigenvectors (.npy) - inv_dict['deg']: boolean for local degree count - inv_dict['deg_fn']: the path of a precomputed triangle count (.npy) - inv_dict['ss1']: boolean for scan 1 statistic - inv_dict['cc']: boolean for clustering coefficient - inv_dict['mad']: boolean for maximum average degree - inv_dict['save_dir']: the base path where all invariants will create sub-dirs & be should be saved @param save: boolean for auto save or not. TODO: use this ''' # Popualate inv_dict inv_dict = populate_inv_dict(inv_dict) if inv_dict['save_dir'] is None: inv_dict['save_dir'] = os.path.dirname(inv_dict['graph_fn']) if (inv_dict.has_key('G')): if inv_dict['G'] is not None: G = inv_dict['G'] elif (inv_dict['graphsize'] == 'b' or inv_dict['graphsize'] == 'big'): G = loadAdjMat(inv_dict['graph_fn'], inv_dict['lcc_fn']) # TODO: test # small graphs else: G = loadAnyMat(inv_dict['graph_fn'], inv_dict['data_elem']) if isinstance(G, str): print G return G # Error message num_nodes = G.shape[0] # number of nodes # CC requires deg_fn and tri_fn. Load if available if inv_dict['cc']: # if either #tri or deg is undefined if not inv_dict['tri_fn']: inv_dict['tri'] = True if not inv_dict['deg_fn']: inv_dict['deg'] = True cc_array = np.zeros(num_nodes) # All invariants that require eigenvalues if ((inv_dict['tri'] and not inv_dict['tri_fn']) or (inv_dict['mad'])): if not inv_dict['eigvl_fn']: inv_dict['eig'] = True # Only create arrays if the computation will be done if inv_dict['tri']: if inv_dict['tri_fn']: tri_array = np.load(inv_dict['tri_fn']) # load if precomputed else: tri_array = np.zeros(num_nodes) # local triangle count if inv_dict['deg'] or inv_dict['edge']: # edge is global number of edges inv_dict['deg'] = True if inv_dict['deg_fn']: deg_array = np.load(inv_dict['deg_fn']) else: deg_array = np.zeros(num_nodes) # Vertex degrees of all vertices if (inv_dict['ss1']): ss1_array = np.zeros( num_nodes) # Induced subgraph edge number i.e scan statistic if (not inv_dict['k'] or inv_dict['k'] > 100 or inv_dict['k'] > G.shape[0] - 2): k = 100 if G.shape[0] - 2 > 101 else G.shape[ 0] - 2 # Maximum of 100 eigenvalues start = time() # Calculate Eigenvalues & Eigen vectors if inv_dict['eig']: if not (inv_dict['eigvl_fn'] or inv_dict['eigvect_fn']): l, u = arpack.eigs(G, k=k, which='LM') # LanczosMethod(A,0) print 'Time taken to calc Eigenvalues: %f secs\n' % (time() - start) else: try: l = np.load(inv_dict['eigvl_fn']) u = l = np.load(inv_dict['eigvect_fn']) except Exception: return "[IOERROR: ]Eigenvalues failed to load" # All other invariants start = time() #### For loop #### if (inv_dict['cc'] or inv_dict['ss1'] or (inv_dict['tri'] and not inv_dict['tri_fn'])\ or (inv_dict['deg'] and not inv_dict['deg_fn']) ): # one of the others for j in range(num_nodes): # tri if not inv_dict['tri_fn'] and inv_dict[ 'tri']: # if this is still None we need to compute it tri_array[j] = abs( round((sum(np.power(l.real, 3) * (u[j][:].real**2))) / 6.0)) # Divide by six because we count locally # ss1 & deg if inv_dict['ss1'] or (not inv_dict['deg_fn'] and inv_dict['deg']): nbors = G[:, j].nonzero()[0] # deg if (not inv_dict['deg_fn'] and inv_dict['deg']): deg_array[j] = nbors.shape[0] # ss1 if inv_dict['ss1']: if (nbors.shape[0] > 0): nbors_mat = G[:, nbors][nbors, :] ss1_array[j] = nbors.shape[0] + ( nbors_mat.nnz / 2.0 ) # scan stat 1 # Divide by two because of symmetric matrix else: ss1_array[ j] = 0 # zero neighbors hence zero cardinality enduced subgraph # cc if inv_dict['cc']: if (deg_array[j] > 2): cc_array[j] = (2.0 * tri_array[j]) / ( deg_array[j] * (deg_array[j] - 1)) # Jari et al else: cc_array[j] = 0 print 'Time taken to compute loop dependent invariants: %f secs\n' % ( time() - start) ### End For ### # global edge if inv_dict['edge']: edge_count = deg_array.sum() # global vertices is num_nodes ''' MAD ''' if (inv_dict['mad']): max_ave_deg = np.max(l.real) # Computation complete - handle the saving now ... ''' Top eigenvalues & eigenvectors ''' if not inv_dict['eigvl_fn'] and inv_dict['eig']: eigvDir = os.path.join(inv_dict['save_dir'], "Eigen") #if eigvDir is None else eigvDir # Immediately write eigs to file inv_dict['eigvl_fn'] = os.path.join( eigvDir, getBaseName(inv_dict['graph_fn']) + '_eigvl.npy') inv_dict['eigvect_fn'] = os.path.join( eigvDir, getBaseName(inv_dict['graph_fn']) + '_eigvect.npy') createSave(inv_dict['eigvl_fn'], l.real) # eigenvalues createSave(inv_dict['eigvect_fn'], u) # eigenvectors print 'Eigenvalues and eigenvectors saved ...' ''' Triangle count ''' if not inv_dict['tri_fn'] and inv_dict['tri']: triDir = os.path.join(inv_dict['save_dir'], "Triangle") #if triDir is None else triDir inv_dict['tri_fn'] = os.path.join(triDir, getBaseName(inv_dict['graph_fn']) + '_triangles.npy') # TODO HERE createSave(inv_dict['tri_fn'], tri_array) print 'Triangle Count saved ...' ''' Degree count''' if not inv_dict['deg_fn'] and inv_dict['deg']: degDir = os.path.join(inv_dict['save_dir'], "Degree") #if degDir is None else degDir inv_dict['deg_fn'] = os.path.join( degDir, getBaseName(inv_dict['graph_fn']) + '_degree.npy') createSave(inv_dict['deg_fn'], deg_array) print 'Degree saved ...' ''' MAD ''' if inv_dict['mad']: MADdir = os.path.join(inv_dict['save_dir'], "MAD") #if MADdir is None else MADdir inv_dict['mad_fn'] = os.path.join( MADdir, getBaseName(inv_dict['graph_fn']) + '_mad.npy') createSave(inv_dict['mad_fn'], max_ave_deg) print 'Maximum average Degree saved ...' ''' Scan Statistic 1''' if inv_dict['ss1']: ss1Dir = os.path.join(inv_dict['save_dir'], "SS1") #if ss1Dir is None else ss1Dir inv_dict['ss1_fn'] = os.path.join( ss1Dir, getBaseName(inv_dict['graph_fn']) + '_scanstat1.npy') createSave(inv_dict['ss1_fn'], ss1_array) # save it print 'Scan 1 statistic saved ...' ''' Clustering coefficient ''' if inv_dict['cc']: ccDir = os.path.join(inv_dict['save_dir'], "ClustCoeff") #if ccDir is None else ccDir inv_dict['cc_fn'] = os.path.join( ccDir, getBaseName(inv_dict['graph_fn']) + '_clustcoeff.npy') createSave(inv_dict['cc_fn'], cc_array) # save it print 'Clustering coefficient saved ...' ''' Global Vertices ''' if inv_dict['ver']: vertDir = os.path.join(inv_dict['save_dir'], "Globals") #if vertDir is None else vertDir inv_dict['ver_fn'] = os.path.join( vertDir, getBaseName(inv_dict['graph_fn']) + '_numvert.npy') createSave(inv_dict['ver_fn'], num_nodes) # save it print 'Global vertices number saved ...' ''' Global number of edges ''' if inv_dict['edge']: edgeDir = os.path.join(inv_dict['save_dir'], "Globals") #if edgeDir is None else edgeDir inv_dict['edge_fn'] = os.path.join( edgeDir, getBaseName(inv_dict['graph_fn']) + '_numedges.npy') createSave(inv_dict['edge_fn'], edge_count) # save it print 'Global edge number saved ...' #if test: # bench test # tri_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_triangles.npy') # eigvl_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_eigvl.npy') # eigvect_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_eigvect.npy') # MAD_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_MAD.npy') return inv_dict # TODO: Fix code this breaks. Originally was [tri_fn, deg_fn, MAD_fn, eigvl_fn, eigvect_fn]