def analyseHill(ekindicts): """make hist of n coefficents for hill fits""" import pylab pylab.rc('text', usetex=True) f=pylab.figure() f.suptitle('n distributions- No linear (case 3)') i=1 for e in ekindicts: ekindata = ekindicts[e] proteins = ekindata.keys() nvals = [] for prot in proteins: edata = ekindata[prot] E = EkinProject(data=edata) for d in E.datasets: fdata = E.getMetaData(d) if fdata != None and fdata.has_key('model'): if fdata['model'] == 'Modified Hill': n=fdata['n'] if n<5 and n>-5: nvals.append(n) print 'n=', n ax = f.add_subplot(2,2,i) n, b, patches = pylab.hist(nvals, 30, histtype='bar', alpha=0.8) std = round(numpy.std(nvals), 2) ave = round(numpy.mean(nvals), 2) ax.set_title(e +' mean= '+str(ave)+r' $\sigma$= '+str(std)) i+=1 f.subplots_adjust(hspace=0.4) f.savefig('n_hist.png') return
def getEkinProject(data, xerror=None, yerror=None, sep='__'): """Get an ekin project from a dict of the form {label:([x],[y]),..} or {label:([x],[y],[xerr],[yerr]),..}""" E = EkinProject(mode='General') for d in data.keys(): if type(data[d]) is types.DictType: for lbl in data[d]: name = str(d)+sep+str(lbl) xy = data[d][lbl] ek=EkinDataset(xy=xy) E.insertDataset(ek, name) else: #print data[d] if len(data[d]) == 4: x,y,xerrs,yerrs = data[d] else: x,y = data[d] xerrs = []; yerrs=[] if xerror!=None: xerrs=[xerror for i in x] if yerror!=None: yerrs=[yerror for i in y] ek = EkinDataset(xy=[x,y], xerrs=xerrs, yerrs=yerrs) E.insertDataset(ek, d) #print ek.errors return E
def analyseHill(ekindicts): """make hist of n coefficents for hill fits""" import pylab pylab.rc('text', usetex=True) f = pylab.figure() f.suptitle('n distributions- No linear (case 3)') i = 1 for e in ekindicts: ekindata = ekindicts[e] proteins = ekindata.keys() nvals = [] for prot in proteins: edata = ekindata[prot] E = EkinProject(data=edata) for d in E.datasets: fdata = E.getMetaData(d) if fdata != None and fdata.has_key('model'): if fdata['model'] == 'Modified Hill': n = fdata['n'] if n < 5 and n > -5: nvals.append(n) print 'n=', n ax = f.add_subplot(2, 2, i) n, b, patches = pylab.hist(nvals, 30, histtype='bar', alpha=0.8) std = round(numpy.std(nvals), 2) ave = round(numpy.mean(nvals), 2) ax.set_title(e + ' mean= ' + str(ave) + r' $\sigma$= ' + str(std)) i += 1 f.subplots_adjust(hspace=0.4) f.savefig('n_hist.png') return
def fitPropagationTest(): """Tests the propagation of fit data direct from a dict - no importing""" start = time.time() p = Pipeline() conf = { "model1": "linear", "model2": "Michaelis-Menten", "model3": "sigmoid", "variable1": "a", "variable2": "Km", "variable3": "tm", #'xerror':.1,'yerror':0.05, } p.createConfig("temp.conf", **conf) data = Utilities.createNestedData() Em = EkinProject() E, fits = p.processFits(data, Em=Em) print "final fits", fits fname = os.path.join(p.workingdir, "results") Em.saveProject(fname) p.saveEkinPlotstoImages(Em, fname) print "completed fit propagation test" print "took %s seconds" % round((time.time() - start), 2) print "-------------------" return
def fitPropagationTest(): """Tests the propagation of fit data direct from a dict - no importing""" start = time.time() p = Pipeline() conf = { 'model1': 'linear', 'model2': 'Michaelis-Menten', 'model3': 'sigmoid', 'variable1': 'a', 'variable2': 'Km', 'variable3': 'tm', #'xerror':.1,'yerror':0.05, } p.createConfig('temp.conf', **conf) data = Utilities.createNestedData() Em = EkinProject() E, fits = p.processFits(data, Em=Em) print 'final fits', fits fname = os.path.join(p.workingdir, 'results') Em.saveProject(fname) p.saveEkinPlotstoImages(Em, fname) print 'completed fit propagation test' print 'took %s seconds' % round((time.time() - start), 2) print '-------------------' return
def createdb(local=None, server=None, project=None, username=None, norecs=1000): """Create and add some test data""" if local != None: if os.path.exists(local): for i in ['.lock', '.index', '']: try: os.remove(local + i) except: pass DB = PDatabase(local=local) elif server != None: DB = PDatabase(server=server, username=username, password='******', project=project) import string import DNAtool.mutation as mutation choices = ['a', 'b', 'c', 'd'] DB.addField('choice', 'text') DB.addField('stab', 'text') DB.addField('activity', 'text') #DB.addField('ekin', 'General') E = EkinProject() data = E.readDataset('Ekin/test.Ekindat') E.insertDataset(data['data'], 'test', fit=data['fit']) print 'creating dummy data..' j = 0 count = 0 for i in range(norecs): if j > 3: j = 0 c = '' for k in range(10): c += random.choice(string.letters) DB.add(c) DB.data[c].choice = choices[j] DB.data[c].DNASeq = simulate_sequence(300) AAseqs3, AAseqs1 = mutation.translate(DB.data[c].DNASeq) DB.addProtseq(c, AAseqs3[1][5:80], 1) DB.data[c].stab = str(round(random.normalvariate(1, 2), 3)) DB.data[c].activity = str(round(random.normalvariate(30, 4), 3)) #DB.data[c].ekin = E j += 1 count += 1 if count > 3000: print 'saving..' DB.commit() DB.db.cacheMinimize() count = 0 DB.commit() return DB
def showMetaData(self, ekinproj=None, ekindata=None, dataset=None, fdata=None, silent=False): """Print html of fit and metadata for the given dataset""" if fdata == None: if ekinproj == None and ekindata != None: E = EkinProject(data=ekindata) else: E = ekinproj fdata = E.getMetaData(dataset) fsock = None if silent == True: saveout = sys.stdout sys.stdout = fsock = StringIO.StringIO() print '<table id="mytable">' print '<tr>' print '<td class="alt" style="bold" colspan=2>%s</td><tr>' % dataset kys = fdata.keys() ignore = ['error'] for k in sorted(fdata): if k in ignore: continue if fdata[k] == None: continue elif type(fdata[k]) is types.DictType: print '<td>%s</td>' % k print '<td> <table id="mytable">' for n in fdata[k]: val = fdata[k][n][1] print '<td class="alt">%s</td><td>%.2f</td><tr>' % (n, val) print '</table></td><tr>' elif type(fdata[k]) is types.StringType: print '<td class="alt">%s</td><td>%s</td><tr>' % (k, fdata[k]) else: print '<td class="alt">%s</td><td>%.2f</td><tr>' % (k, fdata[k]) print '</table>' if silent == True: sys.stdout = saveout if fsock == None: return '' else: return fsock.getvalue() return
def getCSV(self): """Import a csv file""" self.E = EkinProject() from PEATDB.Ekin.IO import Importer importer = Importer(self, parent_win=self.mainwin) newdata = importer.import_multiple() if newdata == None: return for n in newdata.keys(): self.E.insertDataset(newdata[n], n, update=None) print 'imported %s datasets' % len(self.E.datasets) self.showDatasetSelector() self.showPreview() return
def createdb(local=None, server=None, project=None, username=None, norecs=1000): """Create and add some test data""" if local != None: if os.path.exists(local): for i in ['.lock','.index','']: try: os.remove(local+i) except: pass DB = PDatabase(local=local) elif server!=None: DB = PDatabase(server=server, username=username, password='******', project=project) import string import DNAtool.mutation as mutation choices = ['a','b','c','d'] DB.addField('choice', 'text') DB.addField('stab', 'text') DB.addField('activity', 'text') #DB.addField('ekin', 'General') E = EkinProject() data=E.readDataset('Ekin/test.Ekindat') E.insertDataset(data['data'], 'test', fit=data['fit']) print 'creating dummy data..' j=0 count=0 for i in range(norecs): if j>3: j=0 c='' for k in range(10): c += random.choice(string.letters) DB.add(c) DB.data[c].choice = choices[j] DB.data[c].DNASeq = simulate_sequence(300) AAseqs3,AAseqs1 = mutation.translate(DB.data[c].DNASeq) DB.addProtseq(c, AAseqs3[1][5:80], 1) DB.data[c].stab = str(round(random.normalvariate(1,2),3)) DB.data[c].activity = str(round(random.normalvariate(30,4),3)) #DB.data[c].ekin = E j+=1 count+=1 if count>3000: print 'saving..' DB.commit() DB.db.cacheMinimize() count=0 DB.commit() return DB
def showMetaData(self, ekinproj=None, ekindata=None, dataset=None, fdata=None, silent=False): """Print html of fit and metadata for the given dataset""" if fdata == None: if ekinproj == None and ekindata!=None: E = EkinProject(data=ekindata) else: E = ekinproj fdata = E.getMetaData(dataset) fsock = None if silent == True: saveout = sys.stdout sys.stdout = fsock = StringIO.StringIO() print '<table id="mytable">' print '<tr>' print '<td class="alt" style="bold" colspan=2>%s</td><tr>' % dataset kys = fdata.keys() ignore = ['error'] for k in sorted(fdata): if k in ignore: continue if fdata[k] == None: continue elif type(fdata[k]) is types.DictType: print '<td>%s</td>' %k print '<td> <table id="mytable">' for n in fdata[k]: val = fdata[k][n][1] print '<td class="alt">%s</td><td>%.2f</td><tr>' %(n, val) print '</table></td><tr>' elif type(fdata[k]) is types.StringType: print '<td class="alt">%s</td><td>%s</td><tr>' %(k, fdata[k]) else: print '<td class="alt">%s</td><td>%.2f</td><tr>' %(k, fdata[k]) print '</table>' if silent == True: sys.stdout = saveout if fsock == None: return '' else: return fsock.getvalue() return
def sampleData(self): E =self.E = EkinProject() E.createSampleData() self.plotframe.setProject(E) self.datasets = sorted(self.E.datasets) self.replot() self.updateSelector() return
def loadEkin(self): """Load the ekin prj""" filename = tkFileDialog.askopenfilename(defaultextension='.ekinprj', initialdir=os.getcwd(), filetypes=[ ("ekinprj", "*.ekinprj"), ("All files", "*.*") ], parent=self.mainwin) if not os.path.isfile(filename): return self.E = EkinProject() self.E.openProject(filename) self.showDatasetSelector() self.showPreview() return
def getEkinProject(data, xerror=None, yerror=None, sep='__'): """Get an ekin project from a dict of the form {label:([x],[y]),..} or {label:([x],[y],[xerr],[yerr]),..}""" E = EkinProject(mode='General') for d in data.keys(): if type(data[d]) is types.DictType: for lbl in data[d]: name = str(d) + sep + str(lbl) xy = data[d][lbl] ek = EkinDataset(xy=xy) E.insertDataset(ek, name) else: #print data[d] if len(data[d]) == 4: x, y, xerrs, yerrs = data[d] else: x, y = data[d] xerrs = [] yerrs = [] if xerror != None: xerrs = [xerror for i in x] if yerror != None: yerrs = [yerror for i in y] ek = EkinDataset(xy=[x, y], xerrs=xerrs, yerrs=yerrs) E.insertDataset(ek, d) #print ek.errors return E
def getCSV(self): """Import a csv file""" self.E = EkinProject() from PEATDB.Ekin.IO import Importer importer = Importer(self,parent_win=self.mainwin) newdata = importer.import_multiple() if newdata == None: return for n in newdata.keys(): self.E.insertDataset(newdata[n], n, update=None) print 'imported %s datasets' %len(self.E.datasets) self.showDatasetSelector() self.showPreview() return
def loadEkin(self): """Load the ekin prj""" filename=tkFileDialog.askopenfilename(defaultextension='.ekinprj', initialdir=os.getcwd(), filetypes=[("ekinprj","*.ekinprj"), ("All files","*.*")], parent=self.mainwin) if not os.path.isfile(filename): return self.E = EkinProject() self.E.openProject(filename) self.showDatasetSelector() self.showPreview() return
def main(): """Run some analysis""" from optparse import OptionParser parser = OptionParser() app = VantHoff() parser.add_option("-f", "--file", dest="file", help="Open a local db") parser.add_option("-e", "--ekinprj", dest="ekinprj", help="Open an ekin project") parser.add_option("-d", "--dataset", dest="dataset", help="Dataset name") parser.add_option("-m", "--method", dest="method", default=1, type='int', help="Choose method - 1: Van't Hoff plot, 2: Schellman, 3: Differential fit, 4: Breslauer") parser.add_option("-b", "--benchmark", dest="benchmark", action='store_true', help="Test", default=False) parser.add_option("-a", "--all", dest="all", action='store_true', help="Do all datasets in ekinprj", default=False) parser.add_option("-w", "--width", dest="width", default=50, type='int', help="Width of transition region to fit for method 1") parser.add_option("-s", "--smoothing", dest="smoothing", default=5, type='int', help="Degree of smoothing to apply in method 2 (default 5)") parser.add_option("-i", "--invert", dest="invert", action='store_true', help="Invert raw data", default=False) opts, remainder = parser.parse_args() if opts.file != None and os.path.exists(opts.file): app.loadDB(opts.file) if opts.ekinprj != None and os.path.exists(opts.ekinprj): E = EkinProject() E.openProject(opts.ekinprj) d = opts.dataset else: x,y = app.simulateCD() E = EkinProject() d='cdtest' E.insertDataset(xydata=[x,y], newname=d) if opts.all == True: self.doAll(E, methods) if opts.benchmark == True: app.benchmark(E,d,method=opts.method) #app.plotCorrelation() else: if opts.method == 1: app.fitVantHoff(E,d,transwidth=opts.width,invert=opts.invert,figname=d) elif opts.method == 2: app.fitElwellSchellman(E,d,transwidth=opts.width,invert=opts.invert,figname=d) elif opts.method == 3: app.fitDifferentialCurve(E,d,smooth=opts.smoothing,invert=opts.invert,figname=d) elif opts.method == 4: app.breslauerMethod(E,d,invert=opts.invert)
def loadEkinProj(self, E=None): """Load an ekin project file""" import os, types if E == None: import tkFileDialog filename=tkFileDialog.askopenfilename(defaultextension='.ekinprj', filetypes=[("Ekin project","*.ekinprj"), ("All files","*.*")], parent=self.mainwin) if filename != None: if os.path.isfile(filename): fd=open(filename) import pickle data=pickle.load(fd) E=EkinProject(data=data) self.ekinprojects[filename] = E fd.close() else: return self.currprj = E self.showEkinProject(E) return
class VantHoff(Plugin): """A plugin to do Van't Hoff Analysis of temperature melting curves""" """Author: Damien Farrell""" capabilities = ['gui','uses_sidepane'] requires = ['pylab','numpy'] menuentry = "Van't Hoff Analysis" gui_methods = {'getCSV': 'Import CSV', 'loadEkin':'Load Ekin Proj', 'saveEkin':'Save Ekin Proj', 'doAnalysis':"Do Analysis", #'benchmark': 'Do Benchmark', 'close':'Close' } about = "A plugin to do Van't Hoff Analysis of temperature melting curves" R = 8.3144 def __init__(self): self.path = os.path.expanduser("~") self.pltConfig() self.E = None return def main(self, parent): if parent==None: return self.parent = parent self.DB = parent.DB self.xydata = None self._doFrame() return def _doFrame(self): if 'uses_sidepane' in self.capabilities: self.mainwin = self.parent.createChildFrame(width=600) else: self.mainwin=Toplevel() self.mainwin.title(self.menuentry) self.mainwin.geometry('800x600+200+100') methods = self._getmethods() fr = Frame(self.mainwin) fr.pack(side=LEFT,fill=BOTH) methods = [m for m in methods if m[0] in self.gui_methods.keys()] self._createButtons(methods, fr) self.showDatasetSelector() self.doall = Pmw.RadioSelect(fr, buttontype = 'checkbutton', orient = 'horizontal', labelpos = 'w') self.doall.add('Process All') self.doall.pack() self.conversions = Pmw.RadioSelect(fr, buttontype = 'checkbutton', orient = 'horizontal', labelpos = 'w') self.conversions.add('Convert Celsius-Kelvin') self.conversions.pack() self.methods = Pmw.RadioSelect(fr, buttontype = 'checkbutton', orient = 'vertical', labelpos = 'w', label_text = 'Methods:') for m in ['method 1','method 2','method 3', 'method 4']: self.methods.add(m) self.methods.invoke('method 1') self.methods.pack() self.sm = Pmw.EntryField(fr, labelpos = 'w', value = 5, label_text = 'Smoothing:') self.sm.pack() self.tw = Pmw.EntryField(fr, labelpos = 'w', value = 60, label_text = 'Width of transition:') self.tw.pack() return def _getmethods(self): """Get a list of all available public methods""" import inspect mems = inspect.getmembers(self, inspect.ismethod) methods = [m for m in mems if not m[0].startswith('_')] return methods def _createButtons(self, methods, fr=None): """Dynamically create buttons for supplied methods, which is a tuple of (method name, label)""" for m in methods: b=Button(fr,text=self.gui_methods[m[0]],command=m[1]) b.pack(side=TOP,fill=BOTH) return def close(self): self.mainwin.destroy() self.plotframe = None return def showDatasetSelector(self): if self.E==None: return if hasattr(self, 'dmenu'): self.dmenu.destroy() self.dmenu = Pmw.OptionMenu(self.mainwin, labelpos = 'w', label_text = 'Dataset:', items = sorted(self.E.datasets), command=self.showPreview, menubutton_width = 8) self.dmenu.pack(side=TOP,fill=BOTH) return def showPreview(self,event=None): if self.E == None: return if not hasattr(self, 'plotframe') or self.plotframe == None: from Ekin.Plotting import PlotPanel self.plotframe = PlotPanel(parent=self.mainwin, side=BOTTOM) self.plotframe.setProject(self.E) d = self.dmenu.getcurselection() self.plotframe.plotCurrent(d) #plt.close(1) return def getCSV(self): """Import a csv file""" self.E = EkinProject() from PEATDB.Ekin.IO import Importer importer = Importer(self,parent_win=self.mainwin) newdata = importer.import_multiple() if newdata == None: return for n in newdata.keys(): self.E.insertDataset(newdata[n], n, update=None) print 'imported %s datasets' %len(self.E.datasets) self.showDatasetSelector() self.showPreview() return def loadEkin(self): """Load the ekin prj""" filename=tkFileDialog.askopenfilename(defaultextension='.ekinprj', initialdir=os.getcwd(), filetypes=[("ekinprj","*.ekinprj"), ("All files","*.*")], parent=self.mainwin) if not os.path.isfile(filename): return self.E = EkinProject() self.E.openProject(filename) self.showDatasetSelector() self.showPreview() return def saveEkin(self): """save proj""" if self.E != None: if self.E.filename == None: self.E.filename = tkFileDialog.asksaveasfilename(defaultextension='.ekinprj', initialdir=os.getcwd(), filetypes=[("ekinprj","*.ekinprj"), ("All files","*.*")], parent=self.mainwin) self.E.saveProject() print 'saved ekin proj' return def doAnalysis(self): """Execute from GUI""" if self.E == None: return methods = self.methods.getcurselection() if 'Process All' in self.doall.getcurselection(): self.doAll(methods=methods) else: if 'method 1' in methods: self.fitVantHoff(E=self.E,d=self.dmenu.getcurselection(), transwidth=int(self.tw.getvalue())) if 'method 2' in methods: self.fitElwellSchellman(E=self.E,d=self.dmenu.getcurselection(), transwidth=int(self.tw.getvalue())) if 'method 3' in methods: self.fitDifferentialCurve(E=self.E,d=self.dmenu.getcurselection(), smooth=int(self.sm.getvalue())) if 'method 4' in methods: self.breslauerMethod(E=self.E,d=self.dmenu.getcurselection())#,invert=opts.invert) return def guessMidpoint(self,x,y): """guess midpoint for unfolding model""" midy=min(y)+(max(y)-min(y))/2.0 midx=0 closest=1e4 for i in range(len(x)): c=abs(y[i]-midy) if c<closest: midx=x[i] closest=c return midx def transformCD(self,x,y,transwidth=None,ax=None): """Transform raw data into fraction unfolded per temp value, by fitting to a general unfolding equation that extracts baseline/slopes""" #fit baseline slopes and get intercepts d50 = self.guessMidpoint(x,y) print 'fitting to get baseline slopes and intercepts..' print 'midpoint is %s' %d50 A,X=Fitting.doFit(expdata=zip(x,y),model='Unfolding',noiter=50,silent=True, guess=False,startvalues=[1,1,1,1,1,d50]) #print X.getResult() fity = X.getFitLine(x) fd=X.getFitDict() if ax!=None: p=ax.plot(x,fity,'r',lw=2) self.drawParams(ax,fd) #we then use slopes and intercepts get frac unfolded at each temp mn = fd['bn']; mu = fd['bd'] #slopes #if mu>0.01: mu = 0.01 yn = fd['an']; yu = fd['ad'] #intercepts d50 = fd['d50']; m = fd['m'] t=[]; f=[] #print mu, mn for T,yo in zip(x,y): fu = (yo-(yn+mn*T)) / ((yu+mu*T)-(yn+mn*T)) #print fu, (yo-(yn+mn*T)), (m), mu, mn #if f>0: f.append(fu) t.append(T) #try to take useful transition region of data at,af=t,f diff=1e5 if transwidth != None: for i in t: d=abs(i-d50) if d<diff: mid = t.index(i) diff=d L=int(mid-transwidth); U=int(mid+transwidth) t,f = t[L:U], f[L:U] return at,af,t,f def fitVantHoff(self, E=None, d=None, xy=None, transwidth=80, invert=False, show=True, figname=None): """Derive fraction unfolded, get K and fit to Van't Hoff. see http://www.jbc.org/content/277/43/40717.full or http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2144003/ """ if E != None: if not d in E.datasets: print 'no such dataset, %s' %d print 'available datasets:', E.datasets return ek = E.getDataset(d) x,y = ek.getxySorted() elif xy!=None: x,y = xy if 'Convert Celsius-Kelvin' in self.conversions.getcurselection(): x = [i+273 for i in x] if invert == True: y = [max(y)-i for i in y[:]] f=plt.figure(figsize=(18,6)) ax=f.add_subplot(131) p=ax.plot(x,y,'o',alpha=0.6) ax.set_xlabel('T(K)'); ax.set_ylabel('mdeg') ax.set_title('raw data') x1,y1,x,y = self.transformCD(x,y,transwidth,ax) cw=csv.writer(open('frac_unfolded_'+d+'.csv','w')) cw.writerow(['temp','frac']) for i in zip(x1,y1): cw.writerow(i) #derive lnK vs 1/T t=[]; k=[] for T,fu in zip(x,y): if fu>=1 or fu<=0: continue K = fu/(1-fu) klog = math.log(K) k.append(klog) t.append(1/T) if len(t)<2: return None, None, None ax=f.add_subplot(132) p=ax.plot(x1,y1,'o',color='g',alpha=0.6) ax.set_xlabel('T(K)'); ax.set_ylabel('fu') ax.set_title('fraction unfolded') ax=f.add_subplot(133) p=ax.plot(t,k,'x',mew=2,color='black') ax.set_xlabel('1/T')#(r'$1/T ($K^-1)$') ax.set_ylabel('ln K') formatter = matplotlib.ticker.ScalarFormatter() formatter.set_scientific(True) formatter.set_powerlimits((0,0)) ax.xaxis.set_major_formatter(formatter) for l in ax.get_xticklabels(): l.set_rotation(30) #fit this van't hoff plot A,X=Fitting.doFit(expdata=zip(t,k),model='Linear') fitk = X.getFitLine(t) p=ax.plot(t,fitk,'r',lw=2) fd=X.getFitDict() #self.drawParams(ax,fd) #slope is deltaH/R/1000 in kJ deltaH = -fd['a']*self.R/1000 deltaS = fd['b']*self.R/1000 f.suptitle("Method 1 - deltaH: %2.2f deltaS: %2.2f" %(deltaH,deltaS),size=18) f.subplots_adjust(bottom=0.15,top=0.85) if show==True: self.showTkFigure(f) if figname == None: figname = d figname = figname.replace('.','_') fname = figname+'m1'+'.png' f.savefig(fname,dpi=300) print 'plot saved to %s' %os.path.abspath(fname) #plt.close() if E!=None: fdata = Fitting.makeFitData(X.name,vrs=X.variables) E.insertDataset(xydata=[t,k], newname=d+'_vanthoff',replace=True,fit=fdata) #E.saveProject() return deltaH, deltaS, ax def fitElwellSchellman(self,E=None, d=None, xy=None,transwidth=50, invert=False,show=True,figname=None): """Fit entire raw data simultaneously to the three main thermodynamic parameters using Elwell/Schellman method""" if E !=None: ek = E.getDataset(d) x,y,a, xerr,yerr = ek.getAll() elif xy!=None: x,y = xy else: return if invert == True: y = [max(y)-i for i in y[:]] f=plt.figure(figsize=(10,5)) ax=f.add_subplot(121) p=ax.plot(x,y,'o',alpha=0.5) ax.set_xlabel('T');ax.set_xlabel('mdeg') ax.set_title('raw data') x1,y1,x,y = self.transformCD(x,y,transwidth,ax) t=[];dg=[] R=8.3144e-3 for T,fu in zip(x,y): if fu>=1 or fu<=0: continue K = fu/(1-fu) deltaGt = -R * T * math.log(K) dg.append(deltaGt) t.append(T) ax1=f.add_subplot(122) p=ax1.plot(t,dg,'x',mew=2,color='black') ax1.set_xlabel('T'); ax1.set_ylabel('dG(T)') ax.set_title('stability curve') A,X=Fitting.doFit(expdata=zip(t,dg),model='schellman',grad=1e-9,conv=1e-9) fity = X.getFitLine(t) p=ax1.plot(t,fity,'r',lw=2) fd=X.getFitDict() self.drawParams(ax1,fd) deltaH=fd['deltaH']; deltacp=fd['deltacp']; Tm=fd['Tm'] f.suptitle("Method 2 - deltaH: %2.2f deltaCp: %2.2e Tm: %2.2f" %(deltaH,deltacp,Tm),size=18) if show == True: self.showTkFigure(f) if figname == None: figname = d figname = figname.replace('.','_') fname = figname+'m1'+'.png' f.savefig(fname,dpi=300) print 'plot saved to %s' %os.path.abspath(fname) if E!=None: fdata = Fitting.makeFitData(X.name,vrs=X.variables) E.insertDataset(xydata=[t,dg], newname=d+'_vanthoff2',replace=True,fit=fdata) #E.saveProject() return deltaH, Tm, deltacp def breslauerMethod(self,E=None, d=None, xy=None,invert=False, show=True,figname=None): """Finds slope of trans region and plugs this in to equation http://www.springerlink.com/content/r34n0201g30563u7/ """ if E !=None: ek = E.getDataset(d) x,y,a, xerr,yerr = ek.getAll() elif xy!=None: x,y = xy else: return f=plt.figure(figsize=(10,6)) ax=f.add_subplot(111) ax.set_xlabel('T') p=ax.plot(x,y,'o',alpha=0.5) d50 = self.guessMidpoint(x,y) A,X=Fitting.doFit(expdata=zip(x,y),model='Unfolding',conv=1e-7,noiter=60, guess=False,startvalues=[1,1,1,1,1,d50]) fity = X.getFitLine(x) p=ax.plot(x,fity,'r',lw=2) fd=X.getFitDict() self.drawParams(ax,fd) Tm = fd['d50']; m = fd['m'] R = 8.3144e-3 deltaH = R * math.pow(Tm,2) * m f.suptitle("Method 4 - deltaH: %2.2f Tm: %2.2f" %(deltaH,Tm),size=18) if show == True: self.showTkFigure(f) if figname != None: figname = figname.replace('.','_') f.savefig(figname) plt.close() return deltaH, Tm def fitDifferentialCurve(self, E=None, d=None, xy=None,smooth=0, invert=False,show=True,figname=None): """Derive differential denaturation curve and fit to get deltaH We smooth the unfolding curve and then differentiate and finally fit to a 3 parameter equation. See http://www.ncbi.nlm.nih.gov/pubmed/10933511""" if E !=None: ek = E.getDataset(d) x,y,a, xerr,yerr = ek.getAll() elif xy!=None: x,y = xy else: return if invert == True: y = [max(y)-i for i in y[:]] leg=[]; lines=[] f=plt.figure(figsize=(10,5)) ax=f.add_subplot(121) p=ax.plot(x,y,'x',color='black',mew=3,alpha=0.5) leg.append(p); lines.append('original') #smooth if smooth == 0: smooth=int(len(x)/15.0) s=self.smoothListGaussian(y,smooth) p=ax.plot(x[:len(s)-1],s[:-1],lw=3) leg.append(p); lines.append('smoothed') ax.set_title("original data") ax.set_xlabel('T') ax1=f.add_subplot(122) #differentiate dx,ds = self.differentiate(x[:len(s)],s) #ds = [i/max(ds) for i in ds] ds = [i*10 for i in ds] cw=csv.writer(open('diffcd.csv','w')) for row in zip(dx,ds): cw.writerow(row) p=ax1.plot(dx,ds,'-',lw=1.5,alpha=0.7,color='black') leg.append(p); lines.append('differential') ax1.set_title("differential denaturation") ax1.set_xlabel('T'); ax1.set_ylabel('dsignal/dT') A,X=Fitting.doFit(expdata=zip(dx,ds),model='diffDenaturation',grad=1e-9,conv=1e-10) fity = X.getFitLine(dx) p=ax1.plot(dx,fity,'r',lw=2) leg.append(p); lines.append('fit') t=X.getFitDict() self.drawParams(ax1,t) dHkcal=t['deltaH']/4.184 f.suptitle('Method 3 - deltaH: %2.2f kJ/mol (%2.2f kcal) Tm: %2.2f' %(t['deltaH'],dHkcal,t['Tm']),size=18) ax.legend(leg,lines,loc='best',prop=FontProperties(size="smaller")) #f.subplots_adjust(hspace=0.8) if show == True: self.showTkFigure(f) if figname != None: figname = figname.replace('.','_') f.savefig(figname+'m3',dpi=300) plt.close() if E!=None: fdata = Fitting.makeFitData(X.name,vrs=X.variables) E.insertDataset(xydata=[dx,ds], newname=d+'_diff',replace=True,fit=fdata) #E.saveProject() return t['deltaH'],t['Tm'] def differentiate(self, x,y): dy = numpy.diff(y,1) dx = x[:len(dy)] return dx,dy def smoothListGaussian(self,data,degree=5): """Gaussian data smoothing function""" #buffer data to avoid offset result data=list(data) data = [data[0]]*(degree-1) + data + [data[-1]]*degree window=degree*2-1 weight=numpy.array([1.0]*window) weightGauss=[] for i in range(window): i=i-degree+1 frac=i/float(window) gauss=1/(numpy.exp((4*(frac))**2)) weightGauss.append(gauss) weight=numpy.array(weightGauss)*weight smoothed=[0.0]*(len(data)-window) for i in range(len(smoothed)): smoothed[i]=sum(numpy.array(data[i:i+window])*weight)/sum(weight) return smoothed def invert(self,data): inv=[i for i in data] return inv def simulateCD(self,noise=1.0): """Simulate some CD spec data""" x=list(numpy.arange(290,380,0.2)); y=[] X=Fitting.getFitter(model='Unfolding', vrs=[-16, 0.01, -11.6, 0.01, 2.7, 324]) fity = X.getFitLine(x) for i in fity: noise=numpy.random.normal(i, 1.0/2) y.append(i+noise) cw=csv.writer(open('cd.csv','w')) for row in zip(x,y): cw.writerow(row) return x,y def drawParams(self,ax,d): ymin, ymax = ax.get_ylim() xmin, xmax = ax.get_xlim() inc=(ymax-ymin)/20 xinc=(xmax-xmin)/20 y=ymax-inc for k in d: s = k+'='+str(round(d[k],3)) ax.text(xmin+xinc,y,s,fontsize=10) y-=inc return def pltConfig(self): #plt.rc('text', usetex=True) plt.rc('figure.subplot', hspace=0.3,wspace=0.3) #plt.rc('axes',titlesize=22) plt.rc('font',family='monospace') return def doAll(self, methods=['method 1']): """Process all datasets in ekinprj""" E=self.E vals={} from Dialogs import PEATDialog pb=PEATDialog(self.mainwin, option='progressbar', message='Analysing Data..') pb.update_progress(0) total = len(E.datasets); count=0 for d in E.datasets: if '_diff' in d or '_vanthoff' in d: continue vals[d]={} name = d if 'method 1' in methods: vals[d]['dH1'], vals[d]['dS1'], ax = self.fitVantHoff(E,d, transwidth=int(self.tw.getvalue()), show=False,figname=name) if 'method 2' in methods: vals[d]['dH2'], vals[d]['dTm2'], vals[d]['dCp2'] = self.fitElwellSchellman(E,d,show=False,figname=name) if 'method 3' in methods: vals[d]['dH3'], vals[d]['dTm3'] = self.fitDifferentialCurve(E,d,show=False,figname=name) count += 1 pb.update_progress(float(count)/total*100.0) pb.close() self.showTable(vals) return def showTable(self, data): """Show results in table""" from PEATDB.DictEdit import DictEditor D=DictEditor(self.mainwin) D.loadTable(data) return def benchmark(self,E=None,d=None, method=1): """Test methods with varying paramaters, smoothing etc""" if E==None and self.E != None: E = self.E; d=self.dmenu.getcurselection() path='vh_benchmark' if not os.path.exists(path): os.mkdir(path) dHvals=[] if method == 1: xlabel = 'width (K)' title = 'method 1: deltaH variation with trans region width fit' vals=range(5,140,5) for w in vals: dH, dS, ax = self.fitVantHoff(E,d,transwidth=w,show=False, figname=os.path.join(path,'%s_%s.png' %(d,w))) if dH == None: dH=0 dHvals.append(dH) #take best values from middle #dHvals= dHvals[5:16] elif method == 2: xlabel = 'width (K)' title = 'method 2: deltaH variation with width fit' vals=range(5,140,5) for w in vals: dH, dcp, dTm = self.fitElwellSchellman(E,d,transwidth=w,show=False, figname=os.path.join(path,'%s_%s.png' %(d,w))) dHvals.append(dH) elif method == 3: xlabel = 'smoothing degree' title = 'method 3: deltaH variation with degree of smoothing' vals=range(1,30,3) for s in vals: dH, dTm = self.fitDifferentialCurve(E,d,smooth=s,show=False, figname=os.path.join(path,'%s_%s.png' %(d,s))) dHvals.append(dH) mean = numpy.mean(dHvals) stdev = numpy.std(dHvals) f=plt.figure() ax=f.add_subplot(111) ax.plot(vals, dHvals,lw=2) ax.set_xlabel(xlabel) ax.set_ylabel('deltaH (kJ)') ax.set_title('mean: %2.2f stdev: %2.2f'%(mean, stdev)) f.suptitle(title) f.savefig('benchmark_%s.png' %method) cw=csv.writer(open('benchmark_%s.csv' %method,'w')) for row in zip(vals,dHvals): cw.writerow(row) return def benchmarkLimitedData(self, E=None,d=None, method=1): """test any method with varying limited data""" if E==None and self.E != None: E = self.E; d=self.dmenu.getcurselection() path='vh_benchmark' if not os.path.exists(path): os.mkdir(path) dHvals=[] vals=[] if method == 1: L=range(5,140,5) for w in vals: dH, dS, ax = self.fitVantHoff(E,d,transwidth=w,show=False, figname=os.path.join(path,'%s_%s.png' %(d,w))) return @classmethod def plotCorrelation(self,x=None,y=None,xlabel='method1',ylabel='method2'): if x==None: data=open('compared.csv','r') cr=csv.reader(data) x=[float(r[0]) for r in cr]; data.seek(0) y=[float(r[1]) for r in cr] f=plt.figure() ax=f.add_subplot(111) line = ax.scatter(x, y, marker='o',alpha=0.8) cl = numpy.arange(0,max(x)+50) ax.plot(cl, cl, 'g', alpha=0.5,lw=2) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_xlim(150,600); ax.set_ylim(150,600) ax.set_title('Correlation') from scipy.stats import stats cc = str(round(pow(stats.pearsonr(x,y)[0],2),2)) ax.text(400,180, r'$r^2= %s$' %cc, fontsize=16) self.showTkFigure(f) return def showTkFigure(self, fig): from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2TkAgg fr = Toplevel() canvas = FigureCanvasTkAgg(fig, master=fr) #self.canvas.show() canvas.get_tk_widget().pack(side=TOP, fill=X, expand=1) mtoolbar = NavigationToolbar2TkAgg(canvas, fr) mtoolbar.update() canvas._tkcanvas.pack(side=BOTTOM, fill=BOTH, expand=1) return
def main(): """Run some analysis""" from optparse import OptionParser parser = OptionParser() app = VantHoff() parser.add_option("-f", "--file", dest="file", help="Open a local db") parser.add_option("-e", "--ekinprj", dest="ekinprj", help="Open an ekin project") parser.add_option("-d", "--dataset", dest="dataset", help="Dataset name") parser.add_option( "-m", "--method", dest="method", default=1, type='int', help= "Choose method - 1: Van't Hoff plot, 2: Schellman, 3: Differential fit, 4: Breslauer" ) parser.add_option("-b", "--benchmark", dest="benchmark", action='store_true', help="Test", default=False) parser.add_option("-a", "--all", dest="all", action='store_true', help="Do all datasets in ekinprj", default=False) parser.add_option("-w", "--width", dest="width", default=50, type='int', help="Width of transition region to fit for method 1") parser.add_option( "-s", "--smoothing", dest="smoothing", default=5, type='int', help="Degree of smoothing to apply in method 2 (default 5)") parser.add_option("-i", "--invert", dest="invert", action='store_true', help="Invert raw data", default=False) opts, remainder = parser.parse_args() if opts.file != None and os.path.exists(opts.file): app.loadDB(opts.file) if opts.ekinprj != None and os.path.exists(opts.ekinprj): E = EkinProject() E.openProject(opts.ekinprj) d = opts.dataset else: x, y = app.simulateCD() E = EkinProject() d = 'cdtest' E.insertDataset(xydata=[x, y], newname=d) if opts.all == True: self.doAll(E, methods) if opts.benchmark == True: app.benchmark(E, d, method=opts.method) #app.plotCorrelation() else: if opts.method == 1: app.fitVantHoff(E, d, transwidth=opts.width, invert=opts.invert, figname=d) elif opts.method == 2: app.fitElwellSchellman(E, d, transwidth=opts.width, invert=opts.invert, figname=d) elif opts.method == 3: app.fitDifferentialCurve(E, d, smooth=opts.smoothing, invert=opts.invert, figname=d) elif opts.method == 4: app.breslauerMethod(E, d, invert=opts.invert)
def run(self, callback=None): """Do initial import/fitting run with the current config""" self.stop=False self.loadModels() self.prepareData() print 'processing files in queue..' self.parseLabels() imported = {} #raw data results = {} #fitted data #print self.queue for key in self.queue: filename = self.queue[key] lines = self.openRaw(filename) if lines == None: continue data = self.doImport(lines) imported[key] = data #rebuild dict into a nested structure if it's flat (i.e. from single files) '''from Data import NestedData D = NestedData(imported) D.buildNestedStructure([0,2]) D.show() imported = D.data self.namelabels = None''' #try to average replicates here before we process if self.replicates == 1: if self.namelabels != None: imported = Utilities.addReplicates(imported, self.namelabels) else: print 'no replicates detected from labels' #re-arrange the imported dict if we want to group our output per field if self.groupbyfields == 1: imported = Utilities.arrangeDictbySecondaryKey(imported, self.namelabels) total = len(imported) #print imported #print self.namelabels c=0.0 for key in imported: if self.stop == True: print 'cancelled' return #set filename fname = os.path.basename(key) fname = os.path.join(self.workingdir, fname) data = imported[key] if self.function1 != '': data = self.doProcessingStep(data, fname) if self.namelabels == None or not self.namelabels.has_key(key): namelabel = key else: namelabel = self.namelabels[key] #print namelabel, key #print data #if we have models to fit this means we might need to propagate fit data if self.model1 != '': Em = EkinProject() #grouping by file labels handled here if self.groupbyname == 1: #we don't pass the last model if it has to be #reserved for a final round of fitting from the files dict models = self.models[:-1] variables = self.variables[:-1] E,fits = self.processFits(rawdata=data, Em=Em, models=models,variables=variables) else: E,fits = self.processFits(rawdata=data, Em=Em) results[namelabel] = fits #print E.datasets, namelabel else: #if no fitting we just put the data in ekin Em = Utilities.getEkinProject(data) results[namelabel] = data Em.saveProject(fname) Em.exportDatasets(fname, append=True) if self.model1 != '': self.saveFitstoCSV(Em, fname) if self.saveplots == 1: self.saveEkinPlotstoImages(Em, fname) c+=1.0 if callback != None: callback(c/total*100) #if grouped by file names then we process that here from results if self.groupbyname == 1: results = Utilities.extractSecondaryKeysFromDict(results) Em = EkinProject() #print results E,fits = self.processFits(rawdata=results, Em=Em) fname = os.path.join(self.workingdir, 'final') Em.saveProject(os.path.join(self.workingdir, fname)) Em.exportDatasets(os.path.join(self.workingdir, fname)) if self.model1 != '': self.saveFitstoCSV(Em, fname) self.saveEkinPlotstoImages(Em, fname) print 'processing done' print 'results saved to %s' %self.workingdir self.results = results return results
def showEkinPlots(self, ekindata=None, project=None, filename=None, datasets='ALL', title='Ekin plots', outfile=None, imgpath=None, path='', normalise=False, showfitvars=False, plotoption=1, columns=2, legend=False, size=(8, 6), logx=False, logy=False): """Plot ekin datasets from the provided ekin project data""" def downloadLink(): #do csv download link for data displayed print '<table id="mytable" valign=top>' cfname = tempfile.mktemp('.csv', dir=csvpath) E.exportCSV(filename=cfname) p = os.path.split(path)[0] print '<td><a href=%s title="right-click to save as"> download data </a>' \ %os.path.join(p, 'csv', os.path.basename(cfname)) print '</td></tr>' print '</table>' csvpath = os.path.join(os.path.split(imgpath)[0], 'csv') print project if ekindata != None: #convert from ekindata E = EkinProject(data=ekindata, mode='NMR titration') elif project != None: #just passed object E = project elif filename != None: #load project from file E = EkinProject() E.openProject(project) else: return E.checkDatasets() #if outfile is given, we override imgpath if outfile != None and imgpath == None: imgpath = os.path.dirname(outfile) if imgpath != None: tempfile.tempdir = imgpath size = (8, 6) if datasets == 'ALL': #we plot all the datasets datasets = E.datasets if plotoption == 1: if columns > 2: size = (4, 3) imagenames = {} for d in datasets: imgfile = self.maketempImage() name = os.path.basename(imgfile) E.plotDatasets(d, filename=imgfile, size=size, linecolor='r', normalise=normalise, showfitvars=showfitvars, legend=legend, logx=logx, logy=logy) imagenames[d] = name elif plotoption == 3: name = self.maketempImage() E.plotDatasets(datasets, filename=name, plotoption=3, size=size, normalise=normalise, legend=legend, logx=logx, logy=logy) if outfile != None: saveout = sys.stdout fsock = open(outfile, 'w') sys.stdout = fsock self.doheader(title) downloadLink() print '<table id="mytable" align=center cellspacing="0" borderwidth=1>' row = 1 c = 1 datasets.sort() if plotoption == 1: for d in datasets: if not imagenames.has_key(d): continue if c == 1: print '<tr>' print '<td> <img src=%s/%s align=center></td>' % ( path, imagenames[d]) print '<td class="alt">' #use ekinproject to supply formatted fit and model info here.. self.showMetaData(ekinproj=E, dataset=d) print '</td>' c = c + 1 if c >= columns: print '</tr>' row = row + 1 c = 1 elif plotoption == 3: print '<td> <img src=%s/%s align=center></td>' % ( path, os.path.basename(name)) print '<td class="alt">' #use ekinproject to supply formatted fit and model info here.. x = 1 for d in datasets: if x > 2: n = True x = 0 else: n = False if n == False: print '<td>' self.showMetaData(ekinproj=E, dataset=d) x += 1 print '</td>' c = c + 1 if c >= columns: print '</tr>' row = row + 1 c = 1 print '</table>' if outfile != None: sys.stdout.flush() sys.stdout = saveout fsock.close() return
def plotpKDCalcs(self, calcs, Ed=None, option=1): """Do pKD calcs with exp data plots""" from PEATDB.Ekin.Web import EkinWeb from PEATDB.Ekin.Base import EkinProject from PEATDB.Ekin.Convert import EkinConvert from PEATDB.Ekin.Titration import TitrationAnalyser import PEATDB.Ekin.Utils as Utils t = TitrationAnalyser() c=calcs EW = EkinWeb() if option == '2': print '<a>Just showing experimental data</a>' EW.showEkinPlots(project=Ed, datasets='ALL', path=self.plotsdir, imgpath=self.imagepath) return #create ekin proj from pKD titcurves Ec = EkinProject() for r in c.keys(): xd=[];yd=[] for i in c[r]: if type(i) is types.StringType: continue xd.append(i) yd.append(c[r][i]) edata=EkinConvert.xy2ekin([xd,yd]) Ec.insertDataset(edata, r) print '<a>Please wait, fitting calcated curves...</a>' sys.stdout.flush() Ec.fitDatasets(models=['1 pKa 2 Chemical shifts'], silent=True) if option == '3': print '<a>Just showing pKD data</a>' EW.showEkinPlots(project=Ec, datasets='ALL', path=self.plotsdir, imgpath=self.imagepath) return #transform exp data names to match pKD ones s=':' usechainid = True #if pKD names have no chain id, we don't need one for exp names if Ec.datasets[0].startswith(':'): usechainid=False for d in Ed.datasets[:]: r = Ed.getMetaData(d) if r != None: if r['chain_id'] == None or usechainid == False: chain = '' else: chain = r['chain_id'] new = chain+s+Utils.leadingZeros(r['res_num'],4)+s+r['residue'] if new in Ed.datasets: atom = r['atom'] new = new + '_' + atom Ed.renameDataset(d, new) #now we overlay the same datasets in Ed and Ec #also handles cases where same residue multiple times for diff atoms in exp data for d in Ed.datasets: if d in Ec.datasets: Ep = EkinProject() cdata = Ec.getDataset(d) Ep.insertDataset(cdata, d+'_pKD') Ep.setFitData(d+'_pKD', Ec.getFitData(d)) ddata = Ed.getDataset(d) Ep.insertDataset(ddata, d+'_exp') Ep.setFitData(d+'_exp', Ed.getFitData(d)) EW.showEkinPlots(project=Ep, datasets='ALL', plotoption=3, normalise=True, legend=True, path=self.plotsdir, imgpath=self.imagepath) return
def showEkinPlots(self, ekindata=None, project=None, filename=None, datasets='ALL', title='Ekin plots', outfile=None, imgpath=None, path='', normalise=False, showfitvars=False, plotoption=1, columns=2, legend=False, size=(8,6), logx=False, logy=False): """Plot ekin datasets from the provided ekin project data""" def downloadLink(): #do csv download link for data displayed print '<table id="mytable" valign=top>' cfname = tempfile.mktemp('.csv', dir=csvpath) E.exportCSV(filename=cfname) p = os.path.split(path)[0] print '<td><a href=%s title="right-click to save as"> download data </a>' \ %os.path.join(p, 'csv', os.path.basename(cfname)) print '</td></tr>' print '</table>' csvpath = os.path.join( os.path.split(imgpath)[0], 'csv') print project if ekindata != None: #convert from ekindata E = EkinProject(data=ekindata, mode='NMR titration') elif project != None: #just passed object E = project elif filename != None: #load project from file E = EkinProject() E.openProject(project) else: return E.checkDatasets() #if outfile is given, we override imgpath if outfile != None and imgpath==None: imgpath = os.path.dirname(outfile) if imgpath != None: tempfile.tempdir = imgpath size=(8,6) if datasets == 'ALL': #we plot all the datasets datasets = E.datasets if plotoption == 1: if columns>2: size=(4,3) imagenames={} for d in datasets: imgfile = self.maketempImage() name = os.path.basename(imgfile) E.plotDatasets(d, filename=imgfile, size=size, linecolor='r', normalise=normalise,showfitvars=showfitvars,legend=legend, logx=logx, logy=logy) imagenames[d] = name elif plotoption == 3: name = self.maketempImage() E.plotDatasets(datasets, filename=name, plotoption=3, size=size, normalise=normalise, legend=legend, logx=logx, logy=logy) if outfile != None: saveout = sys.stdout fsock = open(outfile, 'w') sys.stdout = fsock self.doheader(title) downloadLink() print '<table id="mytable" align=center cellspacing="0" borderwidth=1>' row=1;c=1 datasets.sort() if plotoption == 1: for d in datasets: if not imagenames.has_key(d): continue if c==1: print '<tr>' print '<td> <img src=%s/%s align=center></td>' % (path, imagenames[d]) print '<td class="alt">' #use ekinproject to supply formatted fit and model info here.. self.showMetaData(ekinproj=E, dataset=d) print '</td>' c=c+1 if c >= columns: print '</tr>' row=row+1 c=1 elif plotoption == 3: print '<td> <img src=%s/%s align=center></td>' % (path, os.path.basename(name)) print '<td class="alt">' #use ekinproject to supply formatted fit and model info here.. x=1 for d in datasets: if x>2: n=True x=0 else: n=False if n==False: print '<td>' self.showMetaData(ekinproj=E, dataset=d) x+=1 print '</td>' c=c+1 if c >= columns: print '</tr>' row=row+1 c=1 print '</table>' if outfile != None: sys.stdout.flush() sys.stdout = saveout fsock.close() return
def importOldProj(datadir, local=None, server=None, project=None, username=None): """Import old peat projects""" import PEAT_DB.Database as peatDB from PEAT_DB.PEAT_dict import PEAT_dict, sub_dict import copy if local != None: newDB = PDatabase(local=local) elif server != None: newDB = PDatabase(server=server, username=username, port=8080, password='******', project=project) print newDB PT = peatDB.Database(datadir, Tk=False) oldDB = PT.DB print 'got old peat_db with %s proteins' % len(PT.proteins) print PT.DB.keys() #import meta stuff like userfields, table for p in newDB.meta.special: if not p in PT.DB.keys(): continue print 'adding', p for k in PT.DB[p]: newDB.meta[p][k] = copy.deepcopy(PT.DB[p][k]) newDB.meta._p_changed = 1 for p in PT.proteins: if p in newDB.meta.special: continue name = oldDB[p]['Name'] rec = PEATRecord(name=name) for col in oldDB[p].keys(): cdata = oldDB[p][col] recdata = {} if col == 'name': cdata = oldDB[p]['Name'] if oldDB['userfields'].has_key(col) and oldDB['userfields'][col][ 'field_type'] in ekintypes: E = EkinProject(data=cdata) E.length = len(E.datasets) if len(E.datasets) == 0: continue cdata = E if type(cdata) == sub_dict: for k in cdata.keys(): recdata[k] = copy.deepcopy(cdata[k]) else: recdata = cdata if cdata != '' and cdata != None: rec.addField(col, data=recdata) newDB.add(p, rec) print newDB.meta.userfields #remove any file cols, too hard to import for m in newDB.meta.userfields.keys()[:]: if newDB.meta.userfields[m]['field_type'] == 'File': newDB.deleteField(m) newDB.commit(user='******', note='import') newDB.close() print 'import done' return
def importOldProj(datadir,local=None, server=None, project=None, username=None): """Import old peat projects""" import PEAT_DB.Database as peatDB from PEAT_DB.PEAT_dict import PEAT_dict, sub_dict import copy if local != None: newDB = PDatabase(local=local) elif server != None: newDB = PDatabase(server=server, username=username, port=8080, password='******', project=project) print newDB PT = peatDB.Database(datadir, Tk=False) oldDB = PT.DB print 'got old peat_db with %s proteins' %len(PT.proteins) print PT.DB.keys() #import meta stuff like userfields, table for p in newDB.meta.special: if not p in PT.DB.keys(): continue print 'adding',p for k in PT.DB[p]: newDB.meta[p][k] = copy.deepcopy(PT.DB[p][k]) newDB.meta._p_changed = 1 for p in PT.proteins: if p in newDB.meta.special: continue name = oldDB[p]['Name'] rec = PEATRecord(name=name) for col in oldDB[p].keys(): cdata = oldDB[p][col] recdata = {} if col == 'name': cdata = oldDB[p]['Name'] if oldDB['userfields'].has_key(col) and oldDB['userfields'][col]['field_type'] in ekintypes: E=EkinProject(data=cdata) E.length = len(E.datasets) if len(E.datasets)==0: continue cdata = E if type(cdata) == sub_dict: for k in cdata.keys(): recdata[k] = copy.deepcopy(cdata[k]) else: recdata = cdata if cdata != '' and cdata != None: rec.addField(col, data=recdata) newDB.add(p,rec) print newDB.meta.userfields #remove any file cols, too hard to import for m in newDB.meta.userfields.keys()[:]: if newDB.meta.userfields[m]['field_type'] == 'File': newDB.deleteField(m) newDB.commit(user='******', note='import') newDB.close() print 'import done' return
def main(): """Run some analysis""" from optparse import OptionParser parser = OptionParser() app = NMRTitration() DB=None; E=None parser.add_option("-f", "--file", dest="file", help="Open a local db") parser.add_option("-e", "--ekinprj", dest="ekinprj", help="Open an ekin project") parser.add_option("-s", "--server", dest="server", help="field") parser.add_option("-t", "--analysis", dest="analysis", action='store_true', help="titr db analysis", default=False) parser.add_option("-r", "--refit", dest="refit", action='store_true', help="refit specific ekin data", default=False) parser.add_option("-u", "--getexperrs", dest="getexperrs", action='store_true', help="get exp uncertainties", default=False) parser.add_option("-m", "--addmeta", dest="addmeta", action='store_true', help="add meta data for NMR", default=False) parser.add_option("-p", "--protein", dest="protein", help="protein") parser.add_option("-c", "--col", dest="col", help="field") parser.add_option("-a", "--atom", dest="atom", help="atom") parser.add_option("-x", "--export", dest="export", action='store_true', help="export db", default=False) parser.add_option("-b", "--benchmark", dest="benchmark", action='store_true', help="benchmark some stuff", default=False) parser.add_option("-g", "--gui", dest="gui", action='store_true', help="start gui app", default=False) opts, remainder = parser.parse_args() if opts.file != None and os.path.exists(opts.file): app.loadDB(opts.file) elif opts.server != None: DB = PDatabase(server='localhost', username='******', password='******', project='titration_db', port=8080) if opts.gui == True: app.main() app.mainwin.mainloop() return yuncerts = {'H':0.03,'N':0.1,'C':0.2} try: yuncert=yuncerts[opts.atom] except: yuncert=None if opts.ekinprj != None: E = EkinProject() E.openProject(opts.ekinprj) #some tit db analysis if opts.analysis == True and opts.server != None: complete = ['HEWL', 'Bovine Beta-Lactoglobulin', 'Plastocyanin (Anabaena variabilis)', 'Plastocyanin (Phormidium)', 'Glutaredoxin', 'Protein G B1','Xylanase (Bacillus subtilus)'] if opts.col == None: print 'provide a column' else: app.analyseTitDB(DB, opts.col)#, complete) #app.addpKaTables(DB, complete) elif opts.benchmark == True: app.benchmarkExpErr(DB) elif opts.col != None or E != None: app.titDBUtils(DB, opts.col, opts.protein, a=opts.atom, E=E, refit=opts.refit, addmeta=opts.addmeta, getexperrs=opts.getexperrs, yuncert=yuncert) elif opts.export == True: app.exportAll(DB, col=opts.col)
def run(self, callback=None): """Do initial import/fitting run with the current config""" self.stop = False self.loadModels() self.prepareData() print 'processing files in queue..' self.parseLabels() imported = {} #raw data results = {} #fitted data #print self.queue for key in self.queue: filename = self.queue[key] lines = self.openRaw(filename) if lines == None: continue data = self.doImport(lines) imported[key] = data #rebuild dict into a nested structure if it's flat (i.e. from single files) '''from Data import NestedData D = NestedData(imported) D.buildNestedStructure([0,2]) D.show() imported = D.data self.namelabels = None''' #try to average replicates here before we process if self.replicates == 1: if self.namelabels != None: imported = Utilities.addReplicates(imported, self.namelabels) else: print 'no replicates detected from labels' #re-arrange the imported dict if we want to group our output per field if self.groupbyfields == 1: imported = Utilities.arrangeDictbySecondaryKey( imported, self.namelabels) total = len(imported) #print imported #print self.namelabels c = 0.0 for key in imported: if self.stop == True: print 'cancelled' return #set filename fname = os.path.basename(key) fname = os.path.join(self.workingdir, fname) data = imported[key] if self.function1 != '': data = self.doProcessingStep(data, fname) if self.namelabels == None or not self.namelabels.has_key(key): namelabel = key else: namelabel = self.namelabels[key] #print namelabel, key #print data #if we have models to fit this means we might need to propagate fit data if self.model1 != '': Em = EkinProject() #grouping by file labels handled here if self.groupbyname == 1: #we don't pass the last model if it has to be #reserved for a final round of fitting from the files dict models = self.models[:-1] variables = self.variables[:-1] E, fits = self.processFits(rawdata=data, Em=Em, models=models, variables=variables) else: E, fits = self.processFits(rawdata=data, Em=Em) results[namelabel] = fits #print E.datasets, namelabel else: #if no fitting we just put the data in ekin Em = Utilities.getEkinProject(data) results[namelabel] = data Em.saveProject(fname) Em.exportDatasets(fname, append=True) if self.model1 != '': self.saveFitstoCSV(Em, fname) if self.saveplots == 1: self.saveEkinPlotstoImages(Em, fname) c += 1.0 if callback != None: callback(c / total * 100) #if grouped by file names then we process that here from results if self.groupbyname == 1: results = Utilities.extractSecondaryKeysFromDict(results) Em = EkinProject() #print results E, fits = self.processFits(rawdata=results, Em=Em) fname = os.path.join(self.workingdir, 'final') Em.saveProject(os.path.join(self.workingdir, fname)) Em.exportDatasets(os.path.join(self.workingdir, fname)) if self.model1 != '': self.saveFitstoCSV(Em, fname) self.saveEkinPlotstoImages(Em, fname) print 'processing done' print 'results saved to %s' % self.workingdir self.results = results return results
class VantHoff(Plugin): """A plugin to do Van't Hoff Analysis of temperature melting curves""" """Author: Damien Farrell""" capabilities = ['gui', 'uses_sidepane'] requires = ['pylab', 'numpy'] menuentry = "Van't Hoff Analysis" gui_methods = { 'getCSV': 'Import CSV', 'loadEkin': 'Load Ekin Proj', 'saveEkin': 'Save Ekin Proj', 'doAnalysis': "Do Analysis", #'benchmark': 'Do Benchmark', 'close': 'Close' } about = "A plugin to do Van't Hoff Analysis of temperature melting curves" R = 8.3144 def __init__(self): self.path = os.path.expanduser("~") self.pltConfig() self.E = None return def main(self, parent): if parent == None: return self.parent = parent self.DB = parent.DB self.xydata = None self._doFrame() return def _doFrame(self): if 'uses_sidepane' in self.capabilities: self.mainwin = self.parent.createChildFrame(width=600) else: self.mainwin = Toplevel() self.mainwin.title(self.menuentry) self.mainwin.geometry('800x600+200+100') methods = self._getmethods() fr = Frame(self.mainwin) fr.pack(side=LEFT, fill=BOTH) methods = [m for m in methods if m[0] in self.gui_methods.keys()] self._createButtons(methods, fr) self.showDatasetSelector() self.doall = Pmw.RadioSelect(fr, buttontype='checkbutton', orient='horizontal', labelpos='w') self.doall.add('Process All') self.doall.pack() self.conversions = Pmw.RadioSelect(fr, buttontype='checkbutton', orient='horizontal', labelpos='w') self.conversions.add('Convert Celsius-Kelvin') self.conversions.pack() self.methods = Pmw.RadioSelect(fr, buttontype='checkbutton', orient='vertical', labelpos='w', label_text='Methods:') for m in ['method 1', 'method 2', 'method 3', 'method 4']: self.methods.add(m) self.methods.invoke('method 1') self.methods.pack() self.sm = Pmw.EntryField(fr, labelpos='w', value=5, label_text='Smoothing:') self.sm.pack() self.tw = Pmw.EntryField(fr, labelpos='w', value=60, label_text='Width of transition:') self.tw.pack() return def _getmethods(self): """Get a list of all available public methods""" import inspect mems = inspect.getmembers(self, inspect.ismethod) methods = [m for m in mems if not m[0].startswith('_')] return methods def _createButtons(self, methods, fr=None): """Dynamically create buttons for supplied methods, which is a tuple of (method name, label)""" for m in methods: b = Button(fr, text=self.gui_methods[m[0]], command=m[1]) b.pack(side=TOP, fill=BOTH) return def close(self): self.mainwin.destroy() self.plotframe = None return def showDatasetSelector(self): if self.E == None: return if hasattr(self, 'dmenu'): self.dmenu.destroy() self.dmenu = Pmw.OptionMenu(self.mainwin, labelpos='w', label_text='Dataset:', items=sorted(self.E.datasets), command=self.showPreview, menubutton_width=8) self.dmenu.pack(side=TOP, fill=BOTH) return def showPreview(self, event=None): if self.E == None: return if not hasattr(self, 'plotframe') or self.plotframe == None: from Ekin.Plotting import PlotPanel self.plotframe = PlotPanel(parent=self.mainwin, side=BOTTOM) self.plotframe.setProject(self.E) d = self.dmenu.getcurselection() self.plotframe.plotCurrent(d) #plt.close(1) return def getCSV(self): """Import a csv file""" self.E = EkinProject() from PEATDB.Ekin.IO import Importer importer = Importer(self, parent_win=self.mainwin) newdata = importer.import_multiple() if newdata == None: return for n in newdata.keys(): self.E.insertDataset(newdata[n], n, update=None) print 'imported %s datasets' % len(self.E.datasets) self.showDatasetSelector() self.showPreview() return def loadEkin(self): """Load the ekin prj""" filename = tkFileDialog.askopenfilename(defaultextension='.ekinprj', initialdir=os.getcwd(), filetypes=[ ("ekinprj", "*.ekinprj"), ("All files", "*.*") ], parent=self.mainwin) if not os.path.isfile(filename): return self.E = EkinProject() self.E.openProject(filename) self.showDatasetSelector() self.showPreview() return def saveEkin(self): """save proj""" if self.E != None: if self.E.filename == None: self.E.filename = tkFileDialog.asksaveasfilename( defaultextension='.ekinprj', initialdir=os.getcwd(), filetypes=[("ekinprj", "*.ekinprj"), ("All files", "*.*")], parent=self.mainwin) self.E.saveProject() print 'saved ekin proj' return def doAnalysis(self): """Execute from GUI""" if self.E == None: return methods = self.methods.getcurselection() if 'Process All' in self.doall.getcurselection(): self.doAll(methods=methods) else: if 'method 1' in methods: self.fitVantHoff(E=self.E, d=self.dmenu.getcurselection(), transwidth=int(self.tw.getvalue())) if 'method 2' in methods: self.fitElwellSchellman(E=self.E, d=self.dmenu.getcurselection(), transwidth=int(self.tw.getvalue())) if 'method 3' in methods: self.fitDifferentialCurve(E=self.E, d=self.dmenu.getcurselection(), smooth=int(self.sm.getvalue())) if 'method 4' in methods: self.breslauerMethod( E=self.E, d=self.dmenu.getcurselection()) #,invert=opts.invert) return def guessMidpoint(self, x, y): """guess midpoint for unfolding model""" midy = min(y) + (max(y) - min(y)) / 2.0 midx = 0 closest = 1e4 for i in range(len(x)): c = abs(y[i] - midy) if c < closest: midx = x[i] closest = c return midx def transformCD(self, x, y, transwidth=None, ax=None): """Transform raw data into fraction unfolded per temp value, by fitting to a general unfolding equation that extracts baseline/slopes""" #fit baseline slopes and get intercepts d50 = self.guessMidpoint(x, y) print 'fitting to get baseline slopes and intercepts..' print 'midpoint is %s' % d50 A, X = Fitting.doFit(expdata=zip(x, y), model='Unfolding', noiter=50, silent=True, guess=False, startvalues=[1, 1, 1, 1, 1, d50]) #print X.getResult() fity = X.getFitLine(x) fd = X.getFitDict() if ax != None: p = ax.plot(x, fity, 'r', lw=2) self.drawParams(ax, fd) #we then use slopes and intercepts get frac unfolded at each temp mn = fd['bn'] mu = fd['bd'] #slopes #if mu>0.01: mu = 0.01 yn = fd['an'] yu = fd['ad'] #intercepts d50 = fd['d50'] m = fd['m'] t = [] f = [] #print mu, mn for T, yo in zip(x, y): fu = (yo - (yn + mn * T)) / ((yu + mu * T) - (yn + mn * T)) #print fu, (yo-(yn+mn*T)), (m), mu, mn #if f>0: f.append(fu) t.append(T) #try to take useful transition region of data at, af = t, f diff = 1e5 if transwidth != None: for i in t: d = abs(i - d50) if d < diff: mid = t.index(i) diff = d L = int(mid - transwidth) U = int(mid + transwidth) t, f = t[L:U], f[L:U] return at, af, t, f def fitVantHoff(self, E=None, d=None, xy=None, transwidth=80, invert=False, show=True, figname=None): """Derive fraction unfolded, get K and fit to Van't Hoff. see http://www.jbc.org/content/277/43/40717.full or http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2144003/ """ if E != None: if not d in E.datasets: print 'no such dataset, %s' % d print 'available datasets:', E.datasets return ek = E.getDataset(d) x, y = ek.getxySorted() elif xy != None: x, y = xy if 'Convert Celsius-Kelvin' in self.conversions.getcurselection(): x = [i + 273 for i in x] if invert == True: y = [max(y) - i for i in y[:]] f = plt.figure(figsize=(18, 6)) ax = f.add_subplot(131) p = ax.plot(x, y, 'o', alpha=0.6) ax.set_xlabel('T(K)') ax.set_ylabel('mdeg') ax.set_title('raw data') x1, y1, x, y = self.transformCD(x, y, transwidth, ax) cw = csv.writer(open('frac_unfolded_' + d + '.csv', 'w')) cw.writerow(['temp', 'frac']) for i in zip(x1, y1): cw.writerow(i) #derive lnK vs 1/T t = [] k = [] for T, fu in zip(x, y): if fu >= 1 or fu <= 0: continue K = fu / (1 - fu) klog = math.log(K) k.append(klog) t.append(1 / T) if len(t) < 2: return None, None, None ax = f.add_subplot(132) p = ax.plot(x1, y1, 'o', color='g', alpha=0.6) ax.set_xlabel('T(K)') ax.set_ylabel('fu') ax.set_title('fraction unfolded') ax = f.add_subplot(133) p = ax.plot(t, k, 'x', mew=2, color='black') ax.set_xlabel('1/T') #(r'$1/T ($K^-1)$') ax.set_ylabel('ln K') formatter = matplotlib.ticker.ScalarFormatter() formatter.set_scientific(True) formatter.set_powerlimits((0, 0)) ax.xaxis.set_major_formatter(formatter) for l in ax.get_xticklabels(): l.set_rotation(30) #fit this van't hoff plot A, X = Fitting.doFit(expdata=zip(t, k), model='Linear') fitk = X.getFitLine(t) p = ax.plot(t, fitk, 'r', lw=2) fd = X.getFitDict() #self.drawParams(ax,fd) #slope is deltaH/R/1000 in kJ deltaH = -fd['a'] * self.R / 1000 deltaS = fd['b'] * self.R / 1000 f.suptitle("Method 1 - deltaH: %2.2f deltaS: %2.2f" % (deltaH, deltaS), size=18) f.subplots_adjust(bottom=0.15, top=0.85) if show == True: self.showTkFigure(f) if figname == None: figname = d figname = figname.replace('.', '_') fname = figname + 'm1' + '.png' f.savefig(fname, dpi=300) print 'plot saved to %s' % os.path.abspath(fname) #plt.close() if E != None: fdata = Fitting.makeFitData(X.name, vrs=X.variables) E.insertDataset(xydata=[t, k], newname=d + '_vanthoff', replace=True, fit=fdata) #E.saveProject() return deltaH, deltaS, ax def fitElwellSchellman(self, E=None, d=None, xy=None, transwidth=50, invert=False, show=True, figname=None): """Fit entire raw data simultaneously to the three main thermodynamic parameters using Elwell/Schellman method""" if E != None: ek = E.getDataset(d) x, y, a, xerr, yerr = ek.getAll() elif xy != None: x, y = xy else: return if invert == True: y = [max(y) - i for i in y[:]] f = plt.figure(figsize=(10, 5)) ax = f.add_subplot(121) p = ax.plot(x, y, 'o', alpha=0.5) ax.set_xlabel('T') ax.set_xlabel('mdeg') ax.set_title('raw data') x1, y1, x, y = self.transformCD(x, y, transwidth, ax) t = [] dg = [] R = 8.3144e-3 for T, fu in zip(x, y): if fu >= 1 or fu <= 0: continue K = fu / (1 - fu) deltaGt = -R * T * math.log(K) dg.append(deltaGt) t.append(T) ax1 = f.add_subplot(122) p = ax1.plot(t, dg, 'x', mew=2, color='black') ax1.set_xlabel('T') ax1.set_ylabel('dG(T)') ax.set_title('stability curve') A, X = Fitting.doFit(expdata=zip(t, dg), model='schellman', grad=1e-9, conv=1e-9) fity = X.getFitLine(t) p = ax1.plot(t, fity, 'r', lw=2) fd = X.getFitDict() self.drawParams(ax1, fd) deltaH = fd['deltaH'] deltacp = fd['deltacp'] Tm = fd['Tm'] f.suptitle("Method 2 - deltaH: %2.2f deltaCp: %2.2e Tm: %2.2f" % (deltaH, deltacp, Tm), size=18) if show == True: self.showTkFigure(f) if figname == None: figname = d figname = figname.replace('.', '_') fname = figname + 'm1' + '.png' f.savefig(fname, dpi=300) print 'plot saved to %s' % os.path.abspath(fname) if E != None: fdata = Fitting.makeFitData(X.name, vrs=X.variables) E.insertDataset(xydata=[t, dg], newname=d + '_vanthoff2', replace=True, fit=fdata) #E.saveProject() return deltaH, Tm, deltacp def breslauerMethod(self, E=None, d=None, xy=None, invert=False, show=True, figname=None): """Finds slope of trans region and plugs this in to equation http://www.springerlink.com/content/r34n0201g30563u7/ """ if E != None: ek = E.getDataset(d) x, y, a, xerr, yerr = ek.getAll() elif xy != None: x, y = xy else: return f = plt.figure(figsize=(10, 6)) ax = f.add_subplot(111) ax.set_xlabel('T') p = ax.plot(x, y, 'o', alpha=0.5) d50 = self.guessMidpoint(x, y) A, X = Fitting.doFit(expdata=zip(x, y), model='Unfolding', conv=1e-7, noiter=60, guess=False, startvalues=[1, 1, 1, 1, 1, d50]) fity = X.getFitLine(x) p = ax.plot(x, fity, 'r', lw=2) fd = X.getFitDict() self.drawParams(ax, fd) Tm = fd['d50'] m = fd['m'] R = 8.3144e-3 deltaH = R * math.pow(Tm, 2) * m f.suptitle("Method 4 - deltaH: %2.2f Tm: %2.2f" % (deltaH, Tm), size=18) if show == True: self.showTkFigure(f) if figname != None: figname = figname.replace('.', '_') f.savefig(figname) plt.close() return deltaH, Tm def fitDifferentialCurve(self, E=None, d=None, xy=None, smooth=0, invert=False, show=True, figname=None): """Derive differential denaturation curve and fit to get deltaH We smooth the unfolding curve and then differentiate and finally fit to a 3 parameter equation. See http://www.ncbi.nlm.nih.gov/pubmed/10933511""" if E != None: ek = E.getDataset(d) x, y, a, xerr, yerr = ek.getAll() elif xy != None: x, y = xy else: return if invert == True: y = [max(y) - i for i in y[:]] leg = [] lines = [] f = plt.figure(figsize=(10, 5)) ax = f.add_subplot(121) p = ax.plot(x, y, 'x', color='black', mew=3, alpha=0.5) leg.append(p) lines.append('original') #smooth if smooth == 0: smooth = int(len(x) / 15.0) s = self.smoothListGaussian(y, smooth) p = ax.plot(x[:len(s) - 1], s[:-1], lw=3) leg.append(p) lines.append('smoothed') ax.set_title("original data") ax.set_xlabel('T') ax1 = f.add_subplot(122) #differentiate dx, ds = self.differentiate(x[:len(s)], s) #ds = [i/max(ds) for i in ds] ds = [i * 10 for i in ds] cw = csv.writer(open('diffcd.csv', 'w')) for row in zip(dx, ds): cw.writerow(row) p = ax1.plot(dx, ds, '-', lw=1.5, alpha=0.7, color='black') leg.append(p) lines.append('differential') ax1.set_title("differential denaturation") ax1.set_xlabel('T') ax1.set_ylabel('dsignal/dT') A, X = Fitting.doFit(expdata=zip(dx, ds), model='diffDenaturation', grad=1e-9, conv=1e-10) fity = X.getFitLine(dx) p = ax1.plot(dx, fity, 'r', lw=2) leg.append(p) lines.append('fit') t = X.getFitDict() self.drawParams(ax1, t) dHkcal = t['deltaH'] / 4.184 f.suptitle('Method 3 - deltaH: %2.2f kJ/mol (%2.2f kcal) Tm: %2.2f' % (t['deltaH'], dHkcal, t['Tm']), size=18) ax.legend(leg, lines, loc='best', prop=FontProperties(size="smaller")) #f.subplots_adjust(hspace=0.8) if show == True: self.showTkFigure(f) if figname != None: figname = figname.replace('.', '_') f.savefig(figname + 'm3', dpi=300) plt.close() if E != None: fdata = Fitting.makeFitData(X.name, vrs=X.variables) E.insertDataset(xydata=[dx, ds], newname=d + '_diff', replace=True, fit=fdata) #E.saveProject() return t['deltaH'], t['Tm'] def differentiate(self, x, y): dy = numpy.diff(y, 1) dx = x[:len(dy)] return dx, dy def smoothListGaussian(self, data, degree=5): """Gaussian data smoothing function""" #buffer data to avoid offset result data = list(data) data = [data[0]] * (degree - 1) + data + [data[-1]] * degree window = degree * 2 - 1 weight = numpy.array([1.0] * window) weightGauss = [] for i in range(window): i = i - degree + 1 frac = i / float(window) gauss = 1 / (numpy.exp((4 * (frac))**2)) weightGauss.append(gauss) weight = numpy.array(weightGauss) * weight smoothed = [0.0] * (len(data) - window) for i in range(len(smoothed)): smoothed[i] = sum( numpy.array(data[i:i + window]) * weight) / sum(weight) return smoothed def invert(self, data): inv = [i for i in data] return inv def simulateCD(self, noise=1.0): """Simulate some CD spec data""" x = list(numpy.arange(290, 380, 0.2)) y = [] X = Fitting.getFitter(model='Unfolding', vrs=[-16, 0.01, -11.6, 0.01, 2.7, 324]) fity = X.getFitLine(x) for i in fity: noise = numpy.random.normal(i, 1.0 / 2) y.append(i + noise) cw = csv.writer(open('cd.csv', 'w')) for row in zip(x, y): cw.writerow(row) return x, y def drawParams(self, ax, d): ymin, ymax = ax.get_ylim() xmin, xmax = ax.get_xlim() inc = (ymax - ymin) / 20 xinc = (xmax - xmin) / 20 y = ymax - inc for k in d: s = k + '=' + str(round(d[k], 3)) ax.text(xmin + xinc, y, s, fontsize=10) y -= inc return def pltConfig(self): #plt.rc('text', usetex=True) plt.rc('figure.subplot', hspace=0.3, wspace=0.3) #plt.rc('axes',titlesize=22) plt.rc('font', family='monospace') return def doAll(self, methods=['method 1']): """Process all datasets in ekinprj""" E = self.E vals = {} from Dialogs import PEATDialog pb = PEATDialog(self.mainwin, option='progressbar', message='Analysing Data..') pb.update_progress(0) total = len(E.datasets) count = 0 for d in E.datasets: if '_diff' in d or '_vanthoff' in d: continue vals[d] = {} name = d if 'method 1' in methods: vals[d]['dH1'], vals[d]['dS1'], ax = self.fitVantHoff( E, d, transwidth=int(self.tw.getvalue()), show=False, figname=name) if 'method 2' in methods: vals[d]['dH2'], vals[d]['dTm2'], vals[d][ 'dCp2'] = self.fitElwellSchellman(E, d, show=False, figname=name) if 'method 3' in methods: vals[d]['dH3'], vals[d]['dTm3'] = self.fitDifferentialCurve( E, d, show=False, figname=name) count += 1 pb.update_progress(float(count) / total * 100.0) pb.close() self.showTable(vals) return def showTable(self, data): """Show results in table""" from PEATDB.DictEdit import DictEditor D = DictEditor(self.mainwin) D.loadTable(data) return def benchmark(self, E=None, d=None, method=1): """Test methods with varying paramaters, smoothing etc""" if E == None and self.E != None: E = self.E d = self.dmenu.getcurselection() path = 'vh_benchmark' if not os.path.exists(path): os.mkdir(path) dHvals = [] if method == 1: xlabel = 'width (K)' title = 'method 1: deltaH variation with trans region width fit' vals = range(5, 140, 5) for w in vals: dH, dS, ax = self.fitVantHoff(E, d, transwidth=w, show=False, figname=os.path.join( path, '%s_%s.png' % (d, w))) if dH == None: dH = 0 dHvals.append(dH) #take best values from middle #dHvals= dHvals[5:16] elif method == 2: xlabel = 'width (K)' title = 'method 2: deltaH variation with width fit' vals = range(5, 140, 5) for w in vals: dH, dcp, dTm = self.fitElwellSchellman( E, d, transwidth=w, show=False, figname=os.path.join(path, '%s_%s.png' % (d, w))) dHvals.append(dH) elif method == 3: xlabel = 'smoothing degree' title = 'method 3: deltaH variation with degree of smoothing' vals = range(1, 30, 3) for s in vals: dH, dTm = self.fitDifferentialCurve(E, d, smooth=s, show=False, figname=os.path.join( path, '%s_%s.png' % (d, s))) dHvals.append(dH) mean = numpy.mean(dHvals) stdev = numpy.std(dHvals) f = plt.figure() ax = f.add_subplot(111) ax.plot(vals, dHvals, lw=2) ax.set_xlabel(xlabel) ax.set_ylabel('deltaH (kJ)') ax.set_title('mean: %2.2f stdev: %2.2f' % (mean, stdev)) f.suptitle(title) f.savefig('benchmark_%s.png' % method) cw = csv.writer(open('benchmark_%s.csv' % method, 'w')) for row in zip(vals, dHvals): cw.writerow(row) return def benchmarkLimitedData(self, E=None, d=None, method=1): """test any method with varying limited data""" if E == None and self.E != None: E = self.E d = self.dmenu.getcurselection() path = 'vh_benchmark' if not os.path.exists(path): os.mkdir(path) dHvals = [] vals = [] if method == 1: L = range(5, 140, 5) for w in vals: dH, dS, ax = self.fitVantHoff(E, d, transwidth=w, show=False, figname=os.path.join( path, '%s_%s.png' % (d, w))) return @classmethod def plotCorrelation(self, x=None, y=None, xlabel='method1', ylabel='method2'): if x == None: data = open('compared.csv', 'r') cr = csv.reader(data) x = [float(r[0]) for r in cr] data.seek(0) y = [float(r[1]) for r in cr] f = plt.figure() ax = f.add_subplot(111) line = ax.scatter(x, y, marker='o', alpha=0.8) cl = numpy.arange(0, max(x) + 50) ax.plot(cl, cl, 'g', alpha=0.5, lw=2) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_xlim(150, 600) ax.set_ylim(150, 600) ax.set_title('Correlation') from scipy.stats import stats cc = str(round(pow(stats.pearsonr(x, y)[0], 2), 2)) ax.text(400, 180, r'$r^2= %s$' % cc, fontsize=16) self.showTkFigure(f) return def showTkFigure(self, fig): from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2TkAgg fr = Toplevel() canvas = FigureCanvasTkAgg(fig, master=fr) #self.canvas.show() canvas.get_tk_widget().pack(side=TOP, fill=X, expand=1) mtoolbar = NavigationToolbar2TkAgg(canvas, fr) mtoolbar.update() canvas._tkcanvas.pack(side=BOTTOM, fill=BOTH, expand=1) return