def mp_worker((k, sigma, distancePath, dtype)): """ run spectral clustering """ sc = SpectralCluster(distancePath, dtype=dtype) sc.run(k, sk=None, sigma=sigma, verbose=True) return sc
def mp_worker((k,sigma,distancePath,dtype)): """ run spectral clustering """ sc = SpectralCluster(distancePath,dtype=dtype) sc.run(k,sk=None,sigma=sigma,verbose=True) return sc
def _run_sc(self, toRun): """ run spectral clustering (single core) """ for params in toRun: k, sigma, dpath, dtype = params sc = SpectralCluster(self.distancePath, dtype=dtype) sc.run(k, sk=None, sigma=sigma, verbose=True) clusterSizes = self.get_cluster_sizes(sc) self.writer1.writerow([k, sigma] + [round(sc.avgSilValue, 4)]) self.writer2.writerow([k, sigma] + clusterSizes)
def _run_sc(self,toRun): """ run spectral clustering (single core) """ for params in toRun: k,sigma,dpath,dtype = params sc = SpectralCluster(self.distancePath,dtype=dtype) sc.run(k,sk=None,sigma=sigma,verbose=True) clusterSizes = self.get_cluster_sizes(sc) self.writer1.writerow([k,sigma] + [round(sc.avgSilValue,4)]) self.writer2.writerow([k,sigma] + clusterSizes)
def __init__(self, distancePath, dtype='distance', aspect='biological_process'): """ distancePath - path to distance matrix dype = distance [default] or similarity aspect = biological_process, molecular_function, or cellular_component """ ## input if os.path.exists(distancePath) == False: raise Exception("cannot find distances file %s\nexiting..." % (distancePath)) if dtype not in ['distance', 'similarity']: raise Exception("Invalid distant type (dtype) specified") ## call an instance of SpectralClustering to ensure labels and matrix files are saved self.dtype = dtype self.aspect = aspect self.distancePath = distancePath sc = SpectralCluster(distancePath, dtype=dtype) matrixPath = re.sub("\.csv", "-matrix.npy", distancePath) genesPath = re.sub("\.csv", "-genes.npy", distancePath) self.M = np.load(matrixPath) self.items = np.load(genesPath) ## output self.resultsPath1 = re.sub("\.csv", "-scparams-sv.csv", distancePath) self.resultsPath2 = re.sub("\.csv", "-scparams-cl.csv", distancePath)
scps = SpectralClusterParamSearch(geneDistancePath,dtype='distance') scps.run(chunks=15) ## plot the parameter search psFigureFile = os.path.join(homeDir,"param-scan-%s.png"%(_aspect)) if not os.path.exists(psFigureFile): scr = SpectralClusterResults(silvalFile,clustersFile) scr.plot(figName=psFigureFile) ## run spectral clustering k = 20 sigma = 0.08 labelsPath = os.path.join(homeDir,"sc-labels-%s.csv"%(_aspect)) if not os.path.exists(labelsPath): sc = SpectralCluster(geneDistancePath,dtype='distance') sc.run(k,sk=None,sigma=sigma,verbose=True) sc.save(labelsPath=labelsPath) ## Save gene sets bm = BlastMapper() bmap = bm.load_summary('blast-parsed-summary.csv',best=False,taxaList=['8355','8364']) transcriptMin,transcriptMax = 9,1000 gsFile = os.path.join(homeDir,"%s.gmt"%(_aspect)) if not os.path.exists(gsFile): gsc = GeneSetCollection(labelsPath,gene2go) gsc.write(blastMap=bmap,transcriptMin=transcriptMin,transcriptMax=transcriptMax,outFile=gsFile) print("process complete.")
scps.run(chunks=5, kRange=range(3, 11)) ## plot the parameter search psFigureFile = os.path.join(gsaDir, "param-scan-%s.png" % (_aspect)) if not os.path.exists(psFigureFile): scr = SpectralClusterResults(silvalFile, clustersFile) scr.plot(figName=psFigureFile) ## run spectral clustering k = 3 sigma = 0.43 labelsPath = os.path.join(gsaDir, "sc-labels-%s.csv" % (_aspect)) if not os.path.exists(labelsPath): sc = SpectralCluster(geneDistancePath, dtype='distance') sc.run(k, sk=None, sigma=sigma, verbose=True) sc.save(labelsPath=labelsPath) import networkx from parse_KGML import KGML2Graph from KeggPathway import KeggPathway p = KeggPathway() #p.add_node('gene1', data={'type': 'gene', }) #p.get_node('gene1') #{'type': 'gene'} graphfile = "%s.xml" % pathway graph = KGML2Graph(graphfile)[1]