def __init__(self, distancePath, dtype='distance', aspect='biological_process'): """ distancePath - path to distance matrix dype = distance [default] or similarity aspect = biological_process, molecular_function, or cellular_component """ ## input if os.path.exists(distancePath) == False: raise Exception("cannot find distances file %s\nexiting..." % (distancePath)) if dtype not in ['distance', 'similarity']: raise Exception("Invalid distant type (dtype) specified") ## call an instance of SpectralClustering to ensure labels and matrix files are saved self.dtype = dtype self.aspect = aspect self.distancePath = distancePath sc = SpectralCluster(distancePath, dtype=dtype) matrixPath = re.sub("\.csv", "-matrix.npy", distancePath) genesPath = re.sub("\.csv", "-genes.npy", distancePath) self.M = np.load(matrixPath) self.items = np.load(genesPath) ## output self.resultsPath1 = re.sub("\.csv", "-scparams-sv.csv", distancePath) self.resultsPath2 = re.sub("\.csv", "-scparams-cl.csv", distancePath)
def mp_worker((k, sigma, distancePath, dtype)): """ run spectral clustering """ sc = SpectralCluster(distancePath, dtype=dtype) sc.run(k, sk=None, sigma=sigma, verbose=True) return sc
def _run_sc(self, toRun): """ run spectral clustering (single core) """ for params in toRun: k, sigma, dpath, dtype = params sc = SpectralCluster(self.distancePath, dtype=dtype) sc.run(k, sk=None, sigma=sigma, verbose=True) clusterSizes = self.get_cluster_sizes(sc) self.writer1.writerow([k, sigma] + [round(sc.avgSilValue, 4)]) self.writer2.writerow([k, sigma] + clusterSizes)
scps.run(chunks=5, kRange=range(3, 11)) ## plot the parameter search psFigureFile = os.path.join(gsaDir, "param-scan-%s.png" % (_aspect)) if not os.path.exists(psFigureFile): scr = SpectralClusterResults(silvalFile, clustersFile) scr.plot(figName=psFigureFile) ## run spectral clustering k = 3 sigma = 0.43 labelsPath = os.path.join(gsaDir, "sc-labels-%s.csv" % (_aspect)) if not os.path.exists(labelsPath): sc = SpectralCluster(geneDistancePath, dtype='distance') sc.run(k, sk=None, sigma=sigma, verbose=True) sc.save(labelsPath=labelsPath) import networkx from parse_KGML import KGML2Graph from KeggPathway import KeggPathway p = KeggPathway() #p.add_node('gene1', data={'type': 'gene', }) #p.get_node('gene1') #{'type': 'gene'} graphfile = "%s.xml" % pathway graph = KGML2Graph(graphfile)[1]