def ClCla(alignedstack, numpart=None, numclasses=40, factorlist=range(1,5), corandata="coran/corandata", dataext=".spi"): """ this doesn't work """ if alignedstack[-4:] == dataext: alignedstack = alignedstack[:-4] rundir = "cluster" classavg = rundir+"/"+("classavgstack%03d" % numclasses) classvar = rundir+"/"+("classvarstack%03d" % numclasses) apParam.createDirectory(rundir) for i in range(numclasses): apFile.removeFile(rundir+("/classdoc%04d" % (i+1))+dataext) apFile.removeFile(rundir+"/clusterdoc"+dataext) factorstr, factorkey = operations.intListToString(factorlist) ### do hierarchical clustering mySpider = spyder.SpiderSession(dataext=dataext, logo=True) mySpider.toSpider( "CL CLA", corandata, # path to coran data rundir+"/clusterdoc", #clusterdoc file factorstr, #factor numbers "5,8", "4", "2", # minimum number of particles per class "Y", rundir+"/dendrogram.ps", "Y", rundir+"/dendrogramdoc", ) mySpider.close()
def setRunDir(self): stackdata = apStack.getOnlyStackData(self.params['tiltstackid'], msg=False) path = stackdata['path']['path'] uppath = os.path.dirname(os.path.dirname(os.path.abspath(path))) classliststr = operations.intListToString(self.classlist) self.params['rundir'] = os.path.join(uppath, "rctvolume", self.params['runname'] )
def setRunDir(self): stackdata = apStack.getOnlyStackData(self.params['tiltstackid'], msg=False) path = stackdata['path']['path'] uppath = os.path.dirname(os.path.dirname(os.path.abspath(path))) classliststr = operations.intListToString(self.classlist) self.params['rundir'] = os.path.join(uppath, "rctvolume", self.params['runname'])
def insertRctRun(self, volfile): ### setup resolutions fscresq = appiondata.ApResolutionData() fscresq['type'] = "fsc" fscresq['half'] = self.fscresolution fscresq['fscfile'] = "fscdata"+self.timestamp+".fsc" rmeasureq = appiondata.ApResolutionData() rmeasureq['type'] = "rmeasure" rmeasureq['half'] = self.rmeasureresolution rmeasureq['fscfile'] = None ### insert rct run data rctrunq = appiondata.ApRctRunData() rctrunq['runname'] = self.params['runname'] classliststr = operations.intListToString(self.classlist) rctrunq['classnums'] = classliststr rctrunq['numiter'] = self.params['numiters'] rctrunq['maskrad'] = self.params['radius'] rctrunq['lowpassvol'] = self.params['lowpassvol'] rctrunq['highpasspart'] = self.params['highpasspart'] rctrunq['lowpasspart'] = self.params['lowpasspart'] rctrunq['median'] = self.params['median'] rctrunq['description'] = self.params['description'] rctrunq['path'] = appiondata.ApPathData(path=os.path.abspath(self.params['rundir'])) rctrunq['alignstack'] = self.alignstackdata rctrunq['tiltstack'] = apStack.getOnlyStackData(self.params['tiltstackid']) rctrunq['numpart'] = self.numpart rctrunq['fsc_resolution'] = fscresq rctrunq['rmeasure_resolution'] = rmeasureq if self.params['commit'] is True: rctrunq.insert() ### insert 3d volume density densq = appiondata.Ap3dDensityData() densq['rctrun'] = rctrunq densq['path'] = appiondata.ApPathData(path=os.path.dirname(os.path.abspath(volfile))) densq['name'] = os.path.basename(volfile) densq['hidden'] = False densq['norm'] = True densq['symmetry'] = appiondata.ApSymmetryData.direct_query(25) densq['pixelsize'] = apStack.getStackPixelSizeFromStackId(self.params['tiltstackid'])*self.params['tiltbin'] densq['boxsize'] = self.getBoxSize() densq['lowpass'] = self.params['lowpassvol'] densq['highpass'] = self.params['highpasspart'] densq['mask'] = self.params['radius'] #densq['iterid'] = self.params['numiters'] densq['description'] = self.params['description'] densq['resolution'] = self.fscresolution densq['rmeasure'] = self.rmeasureresolution densq['session'] = apStack.getSessionDataFromStackId(self.params['tiltstackid']) densq['md5sum'] = apFile.md5sumfile(volfile) if self.params['commit'] is True: densq.insert() return
def hierarchClusterProcess(numpart=None, factorlist=range(1, 5), corandata="coran/corandata", rundir=".", dataext=".spi"): """ inputs: coran data number of particles factor list output directory output: dendrogram doc file factorkey """ #apFile.removeFile(rundir+"/dendrogramdoc"+dataext) factorstr, factorkey = operations.intListToString(factorlist) dendrogramfile = rundir + "/dendrogramdoc" + factorkey + dataext if os.path.isfile(dendrogramfile): apDisplay.printMsg( "Dendrogram file already exists, skipping processing " + dendrogramfile) return dendrogramfile apDisplay.printMsg("Creating dendrogram file: " + dendrogramfile) ### do hierarchical clustering mySpider = spyder.SpiderSession(dataext=dataext, logo=False, log=True) mySpider.toSpider( "CL HC", spyder.fileFilter(corandata) + "_IMC", # path to coran data factorstr, # factor string ) ## weight for each factor for fact in factorlist: mySpider.toSpiderQuiet("1.0") minclasssize = "%.4f" % (numpart * 0.0001 + 2.0) mySpider.toSpider( "5", #use Ward's method "T", minclasssize, rundir + "/dendrogram.ps", #dendrogram image file "Y", spyder.fileFilter(dendrogramfile), #dendrogram doc file ) mySpider.close() if not os.path.isfile(dendrogramfile): apDisplay.printError( "SPIDER dendrogram creation (CL HC) failed, too many particles??") apImage.convertPostscriptToPng("cluster/dendrogram.ps", "dendrogram.png") return dendrogramfile
def hierarchClusterProcess(numpart=None, factorlist=range(1,5), corandata="coran/corandata", rundir=".", dataext=".spi"): """ inputs: coran data number of particles factor list output directory output: dendrogram doc file factorkey """ #apFile.removeFile(rundir+"/dendrogramdoc"+dataext) factorstr, factorkey = operations.intListToString(factorlist) dendrogramfile = rundir+"/dendrogramdoc"+factorkey+dataext if os.path.isfile(dendrogramfile): apDisplay.printMsg("Dendrogram file already exists, skipping processing "+dendrogramfile) return dendrogramfile apDisplay.printMsg("Creating dendrogram file: "+dendrogramfile) ### do hierarchical clustering mySpider = spyder.SpiderSession(dataext=dataext, logo=False, log=True) mySpider.toSpider( "CL HC", spyder.fileFilter(corandata)+"_IMC", # path to coran data factorstr, # factor string ) ## weight for each factor for fact in factorlist: mySpider.toSpiderQuiet("1.0") minclasssize = "%.4f" % (numpart*0.0001+2.0) mySpider.toSpider( "5", #use Ward's method "T", minclasssize, rundir+"/dendrogram.ps", #dendrogram image file "Y", spyder.fileFilter(dendrogramfile), #dendrogram doc file ) mySpider.close() if not os.path.isfile(dendrogramfile): apDisplay.printError("SPIDER dendrogram creation (CL HC) failed, too many particles??") apImage.convertPostscriptToPng("cluster/dendrogram.ps", "dendrogram.png") return dendrogramfile
def insertRctRun(self, volfile): ### setup resolutions fscresq = appiondata.ApResolutionData() fscresq['type'] = "fsc" fscresq['half'] = self.fscresolution fscresq['fscfile'] = "fscdata" + self.timestamp + ".fsc" rmeasureq = appiondata.ApResolutionData() rmeasureq['type'] = "rmeasure" rmeasureq['half'] = self.rmeasureresolution rmeasureq['fscfile'] = None ### insert rct run data rctrunq = appiondata.ApRctRunData() rctrunq['runname'] = self.params['runname'] classliststr = operations.intListToString(self.classlist) rctrunq['classnums'] = classliststr rctrunq['numiter'] = self.params['numiters'] rctrunq['maskrad'] = self.params['radius'] rctrunq['lowpassvol'] = self.params['lowpassvol'] rctrunq['highpasspart'] = self.params['highpasspart'] rctrunq['lowpasspart'] = self.params['lowpasspart'] rctrunq['median'] = self.params['median'] rctrunq['description'] = self.params['description'] rctrunq['path'] = appiondata.ApPathData( path=os.path.abspath(self.params['rundir'])) rctrunq['alignstack'] = self.alignstackdata rctrunq['tiltstack'] = apStack.getOnlyStackData( self.params['tiltstackid']) rctrunq['numpart'] = self.numpart rctrunq['fsc_resolution'] = fscresq rctrunq['rmeasure_resolution'] = rmeasureq if self.params['commit'] is True: rctrunq.insert() ### insert 3d volume density densq = appiondata.Ap3dDensityData() densq['rctrun'] = rctrunq densq['path'] = appiondata.ApPathData( path=os.path.dirname(os.path.abspath(volfile))) densq['name'] = os.path.basename(volfile) densq['hidden'] = False densq['norm'] = True densq['symmetry'] = appiondata.ApSymmetryData.direct_query(25) densq['pixelsize'] = apStack.getStackPixelSizeFromStackId( self.params['tiltstackid']) * self.params['tiltbin'] densq['boxsize'] = self.getBoxSize() densq['lowpass'] = self.params['lowpassvol'] densq['highpass'] = self.params['highpasspart'] densq['mask'] = self.params['radius'] #densq['iterid'] = self.params['numiters'] densq['description'] = self.params['description'] densq['resolution'] = self.fscresolution densq['rmeasure'] = self.rmeasureresolution densq['session'] = apStack.getSessionDataFromStackId( self.params['tiltstackid']) densq['md5sum'] = apFile.md5sumfile(volfile) if self.params['commit'] is True: densq.insert() return
def kmeansCluster(alignedstack, numpart=None, numclasses=40, timestamp=None, factorlist=range(1,5), corandata="coran/corandata", dataext=".spi"): """ inputs: outputs: """ if timestamp is None: timestamp = apParam.makeTimestamp() if alignedstack[-4:] == dataext: alignedstack = alignedstack[:-4] rundir = "cluster" classavg = rundir+"/"+("classavgstack_%s_%03d" % (timestamp, numclasses)) classvar = rundir+"/"+("classvarstack_%s_%03d" % (timestamp, numclasses)) apParam.createDirectory(rundir) for i in range(numclasses): apFile.removeFile(rundir+("/classdoc%04d" % (i+1))+dataext) apFile.removeFile(rundir+("/allclassesdoc%04d" % (numclasses))+dataext) ### make list of factors factorstr, factorkey = operations.intListToString(factorlist) ### do k-means clustering mySpider = spyder.SpiderSession(dataext=dataext, logo=True, log=False) mySpider.toSpider( "CL KM", corandata+"_IMC", # path to coran data str(numclasses), # num classes factorstr, # factor string ) ## weight for each factor for fact in factorlist: mySpider.toSpiderQuiet("1.0") randnum = (int(random.random()*1000) + 1) mySpider.toSpider( str(randnum), rundir+"/classdoc_"+timestamp+"_****", # class doc file rundir+("/allclassesdoc%04d" % (numclasses)), #clusterdoc file ) mySpider.close() ### delete existing files sys.stderr.write("delete existing files") for dext in (".hed", ".img", dataext): apFile.removeFile(classavg+dext) apFile.removeFile(classvar+dext) print "" mySpider = spyder.SpiderSession(dataext=dataext, logo=True, log=False) ### create class averages apDisplay.printMsg("Averaging particles into classes") for i in range(numclasses): classnum = i+1 mySpider.toSpiderQuiet( "AS R", spyder.fileFilter(alignedstack)+"@******", rundir+("/classdoc_"+timestamp+"_%04d" % (classnum)), "A", (classavg+"@%04d" % (classnum)), (classvar+"@%04d" % (classnum)), ) if classnum % 10 == 0: sys.stderr.write(".") time.sleep(1) mySpider.close() ### convert to IMAGIC emancmd = "proc2d "+classavg+".spi "+classavg+".hed" apEMAN.executeEmanCmd(emancmd, verbose=False, showcmd=True) emancmd = "proc2d "+classvar+".spi "+classvar+".hed" apEMAN.executeEmanCmd(emancmd, verbose=False, showcmd=True) return classavg,classvar
def start(self): ### get original aligned stack name astack = self.analysisdata['alignstack']['imagicfile'] ### spider has problems with file name if it includes an "x#" astack = re.sub(r'x(\d)',r'x-\1',astack) ### get original align stack imagicalignedstack = os.path.join(self.analysisdata['alignstack']['path']['path'], astack) alignedstack = re.sub("\.", "_", imagicalignedstack)+".spi" while os.path.isfile(alignedstack): apFile.removeFile(alignedstack) emancmd = "proc2d %s %s spiderswap"%(imagicalignedstack, alignedstack) apEMAN.executeEmanCmd(emancmd, showcmd=True, verbose=True) ### get database information numpart = self.analysisdata['alignstack']['num_particles'] corandata = os.path.join(self.analysisdata['path']['path'],"coran/corandata") ### parse factor list factorlist = self.params['factorstr'].split(",") factorstr, factorkey = operations.intListToString(factorlist) factorstr = re.sub(",", ", ", factorstr) apDisplay.printMsg("using factorlist "+factorstr) if len(factorlist) > self.analysisdata['coranrun']['num_factors']: apDisplay.printError("Requested factor list is longer than available factors") if self.params['commit'] is True: self.insertClusterRun(insert=True) else: apDisplay.printWarning("not committing results to DB") numclasslist = self.params['numclasslist'].split(",") if self.params['method'] != "kmeans": rundir = "cluster" apParam.createDirectory(rundir) ### step 1: use coran data to create hierarchy dendrogramfile = classification.hierarchClusterProcess(numpart, factorlist, corandata, rundir, dataext=".spi") ### step 2: asssign particles to groups based on hierarchy for item in numclasslist: t0 = time.time() if not item or not re.match("^[0-9]+$", item): continue numclass = int(item) apDisplay.printColor("\n============================\nprocessing class averages for " +str(numclass)+" classes\n============================\n", "green") #run the classification if self.params['method'] == "kmeans": apDisplay.printMsg("Using the k-means clustering method") classavg,classvar = classification.kmeansCluster(alignedstack, numpart, numclasses=numclass, timestamp=self.timestamp, factorlist=factorlist, corandata=corandata, dataext=".spi") else: apDisplay.printMsg("Using the hierarch clustering method") classavg,classvar = classification.hierarchClusterClassify(alignedstack, dendrogramfile, numclass, self.timestamp, rundir, dataext=".spi") #classavg,classvar = classification.hierarchCluster(alignedstack, numpart, numclasses=numclass, # timestamp=self.timestamp, factorlist=factorlist, corandata=corandata, dataext=".spi") if self.params['commit'] is True: self.insertClusterStack(classavg, classvar, numclass, insert=True) else: apDisplay.printWarning("not committing results to DB") apDisplay.printMsg("Completed "+str(numclass)+" classes in "+apDisplay.timeString(time.time()-t0))
def kmeansCluster(alignedstack, numpart=None, numclasses=40, timestamp=None, factorlist=range(1, 5), corandata="coran/corandata", dataext=".spi"): """ inputs: outputs: """ if timestamp is None: timestamp = apParam.makeTimestamp() if alignedstack[-4:] == dataext: alignedstack = alignedstack[:-4] rundir = "cluster" classavg = rundir + "/" + ("classavgstack_%s_%03d" % (timestamp, numclasses)) classvar = rundir + "/" + ("classvarstack_%s_%03d" % (timestamp, numclasses)) apParam.createDirectory(rundir) for i in range(numclasses): apFile.removeFile(rundir + ("/classdoc%04d" % (i + 1)) + dataext) apFile.removeFile(rundir + ("/allclassesdoc%04d" % (numclasses)) + dataext) ### make list of factors factorstr, factorkey = operations.intListToString(factorlist) ### do k-means clustering mySpider = spyder.SpiderSession(dataext=dataext, logo=True, log=False) mySpider.toSpider( "CL KM", corandata + "_IMC", # path to coran data str(numclasses), # num classes factorstr, # factor string ) ## weight for each factor for fact in factorlist: mySpider.toSpiderQuiet("1.0") randnum = (int(random.random() * 1000) + 1) mySpider.toSpider( str(randnum), rundir + "/classdoc_" + timestamp + "_****", # class doc file rundir + ("/allclassesdoc%04d" % (numclasses)), #clusterdoc file ) mySpider.close() ### delete existing files sys.stderr.write("delete existing files") for dext in (".hed", ".img", dataext): apFile.removeFile(classavg + dext) apFile.removeFile(classvar + dext) print "" mySpider = spyder.SpiderSession(dataext=dataext, logo=True, log=False) ### create class averages apDisplay.printMsg("Averaging particles into classes") for i in range(numclasses): classnum = i + 1 mySpider.toSpiderQuiet( "AS R", spyder.fileFilter(alignedstack) + "@******", rundir + ("/classdoc_" + timestamp + "_%04d" % (classnum)), "A", (classavg + "@%04d" % (classnum)), (classvar + "@%04d" % (classnum)), ) if classnum % 10 == 0: sys.stderr.write(".") time.sleep(1) mySpider.close() ### convert to IMAGIC emancmd = "proc2d " + classavg + ".spi " + classavg + ".hed" apEMAN.executeEmanCmd(emancmd, verbose=False, showcmd=True) emancmd = "proc2d " + classvar + ".spi " + classvar + ".hed" apEMAN.executeEmanCmd(emancmd, verbose=False, showcmd=True) return classavg, classvar
def start(self): ### get original aligned stack name astack = self.analysisdata['alignstack']['imagicfile'] ### spider has problems with file name if it includes an "x#" astack = re.sub(r'x(\d)', r'x-\1', astack) ### get original align stack imagicalignedstack = os.path.join( self.analysisdata['alignstack']['path']['path'], astack) alignedstack = re.sub("\.", "_", imagicalignedstack) + ".spi" while os.path.isfile(alignedstack): apFile.removeFile(alignedstack) emancmd = "proc2d %s %s spiderswap" % (imagicalignedstack, alignedstack) apEMAN.executeEmanCmd(emancmd, showcmd=True, verbose=True) ### get database information numpart = self.analysisdata['alignstack']['num_particles'] corandata = os.path.join(self.analysisdata['path']['path'], "coran/corandata") ### parse factor list factorlist = self.params['factorstr'].split(",") factorstr, factorkey = operations.intListToString(factorlist) factorstr = re.sub(",", ", ", factorstr) apDisplay.printMsg("using factorlist " + factorstr) if len(factorlist) > self.analysisdata['coranrun']['num_factors']: apDisplay.printError( "Requested factor list is longer than available factors") if self.params['commit'] is True: self.insertClusterRun(insert=True) else: apDisplay.printWarning("not committing results to DB") numclasslist = self.params['numclasslist'].split(",") if self.params['method'] != "kmeans": rundir = "cluster" apParam.createDirectory(rundir) ### step 1: use coran data to create hierarchy dendrogramfile = classification.hierarchClusterProcess( numpart, factorlist, corandata, rundir, dataext=".spi") ### step 2: asssign particles to groups based on hierarchy for item in numclasslist: t0 = time.time() if not item or not re.match("^[0-9]+$", item): continue numclass = int(item) apDisplay.printColor( "\n============================\nprocessing class averages for " + str(numclass) + " classes\n============================\n", "green") #run the classification if self.params['method'] == "kmeans": apDisplay.printMsg("Using the k-means clustering method") classavg, classvar = classification.kmeansCluster( alignedstack, numpart, numclasses=numclass, timestamp=self.timestamp, factorlist=factorlist, corandata=corandata, dataext=".spi") else: apDisplay.printMsg("Using the hierarch clustering method") classavg, classvar = classification.hierarchClusterClassify( alignedstack, dendrogramfile, numclass, self.timestamp, rundir, dataext=".spi") #classavg,classvar = classification.hierarchCluster(alignedstack, numpart, numclasses=numclass, # timestamp=self.timestamp, factorlist=factorlist, corandata=corandata, dataext=".spi") if self.params['commit'] is True: self.insertClusterStack(classavg, classvar, numclass, insert=True) else: apDisplay.printWarning("not committing results to DB") apDisplay.printMsg("Completed " + str(numclass) + " classes in " + apDisplay.timeString(time.time() - t0))