def calculateAvDlossPerDose(PDBmulti, n): # calculated the average Dloss metric over all atoms of each residue/nucleotide type within # TRAP complex # determine full list of residue types present uniq_resis = [] for atom in PDBmulti: if atom.basetype not in uniq_resis: uniq_resis.append(atom.basetype) uniq_resis.sort() x = np.array(uniq_resis) # for each residue type, find average Dloss at each dose # first group atoms by residue type: atomsByRes = {} for res in uniq_resis: atomsByRes[res] = [] for atom in PDBmulti: atomsByRes[atom.basetype].append(atom.mindensity[0:9]) # calculate average Dloss over structure for each dose avDlossOverall = np.mean([atom.mindensity[0:9] for atom in PDBmulti], 0) print "Average Dloss for structure per dose as following:" print ",".join([str(val) for val in avDlossOverall]) # for each residue type, calculate mean Dloss at each dose avDlossByRes = {} stdDlossByRes = {} nthTileDlossByRes = {} confIntDlossByRes = {} avDlossByResNorm = {} for key in atomsByRes.keys(): avDloss = np.mean(atomsByRes[key], 0) stdDloss = np.std(atomsByRes[key], 0) nthTileDloss = np.percentile(atomsByRes[key], n, 0) avDlossNorm = avDloss - avDlossOverall # calculate 95% confidence interval confIntDloss = [] for i in range(0, 9): confIntDloss.append(mean_confidence_interval([val[i] for val in atomsByRes[key]])) avDlossByRes[key] = avDloss stdDlossByRes[key] = stdDloss nthTileDlossByRes[key] = nthTileDloss confIntDlossByRes[key] = confIntDloss avDlossByResNorm[key] = avDlossNorm # get results csvOutput_stdError = open("avDlossPerDataset_stdError.csv", "w") csvOutput_ConfIntError = open("avDlossPerDataset_confIntError.csv", "w") csvOutput_nthTile = open("{}thTile_DlossPerDataset.csv".format(n), "w") csvOutput_DlossNormalised = open("avDlossPerDataset_DlossNormalised.csv", "w") for key in avDlossByRes.keys(): avDlossList = list([str(element) for element in avDlossByRes[key]]) stdDlossList = list([str(element) for element in stdDlossByRes[key]]) nthTileDlossList = list([str(element) for element in nthTileDlossByRes[key]]) confIntDlossList = list([str(element) for element in confIntDlossByRes[key]]) avDlossByResNormList = list([str(element) for element in avDlossByResNorm[key]]) print "\n***\n{}: {}".format(key, "-->".join(avDlossList)) print "{}: {}".format(key, "-->".join(stdDlossList)) print "{}: {}".format(key, "-->".join(confIntDlossList)) csvOutput_stdError.write("{},{}\n".format(key, ",".join(avDlossList))) csvOutput_stdError.write("{},{}\n".format(key, ",".join(stdDlossList))) csvOutput_nthTile.write("{},{}\n".format(key, ",".join(nthTileDlossList))) csvOutput_DlossNormalised.write("{},{}\n".format(key, ",".join(avDlossByResNormList))) csvOutput_ConfIntError.write("{},{}\n".format(key, ",".join(avDlossList))) csvOutput_ConfIntError.write("{},{}\n".format(key, ",".join(confIntDlossList))) csvOutput_stdError.close() csvOutput_ConfIntError.close() csvOutput_nthTile.close() csvOutput_DlossNormalised.close()
def densMetricErrorbarGraphs(self,auto,where,metricTypes,confInt): # function to plot density change as function of dataset number # for a specific atom in the structure, with the mean value over # all protein chains plotted, along with error bars for the 22 # equivalent atoms present. specify auto=False to specify atom type # on the command line # 'where' specifies where to plot, if doesn't exist, makes directory in # current directory # metricTypes takes values 1 or 2 if auto == True # If 'confInt' is True then error bars are 95% confidence intervals, # otherwise, 1 SD used at each dose # get equivalent atoms of specified type (command line input to specify) self.getEquivalentAtoms(auto) # determine whether dealing with protein or RNA atoms if self.equivAtoms[0].boundOrUnbound() in ('unbound protein','bound protein'): protein = True else: protein = False sns.set(style="white", context="talk") f = plt.figure(figsize=(16, 8)) # define x range here (damage set numbers or doses if specified) if self.doseList == []: x = range(2,len(self.equivAtoms[0].meandensity)+2)[0:10] x_label = 'Damage set' else: x = self.doseList x_label = "Dose (MGy)" # Determine density metric set to plot here # Currently two distinct options given below densMets1 = ['loss','net','mean','gain'] densMets2 = ['loss','net','mean','gain','bfactor','bdamage'] densMets3 = ['|loss|','loss','net','mean','gain','bfactor'] densMets4 = ['max-simple','median-simple','gain','median','mean','loss'] if auto == False: print 'Which metrics would you like to plot...' userInput = raw_input("1 or 2?: ") if userInput == str(1): densMets = densMets1 normTypes = ['Standard','Calpha normalised'] else: densMets = densMets2 normTypes = ['Standard'] if auto == True: if metricTypes == 1: densMets = densMets1 normTypes = ['Standard','Calpha normalised'] elif metricTypes == 2: densMets = densMets2 normTypes = ['Standard'] elif metricTypes == 3: densMets = densMets3 normTypes = ['Standard'] elif metricTypes == 4: densMets = densMets4 normTypes = ['Standard'] i = 0 HotellingTsquareDict = {} for densMet in densMets: for normType in normTypes: if densMet in ('mean','gain') and normType in ('Calpha normalised'): continue i+=1 yValue = {} # for protein atoms, group by bound and unbound chains if protein == True: for boundType in ('unbound','bound'): yValue[boundType] = {} for valType in ('mean','std','95ConfInt'): yValue[boundType][valType] = [] for j in range(0,len(x)): yValue[boundType]['mean'].append(np.mean([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.boundOrUnbound() == '{} protein'.format(boundType)])) yValue[boundType]['std'].append(np.std([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.boundOrUnbound() == '{} protein'.format(boundType)])) yValue[boundType]['95ConfInt'].append(mean_confidence_interval([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.boundOrUnbound() == '{} protein'.format(boundType)])) # for RNA atoms, just create a list of density values if protein == False: yValue['RNA 1'] = {} yValue['RNA 2'] = {} for valType in ('mean','std'): yValue['RNA 1'][valType] = [] yValue['RNA 2'][valType] = [] for j in range(0,len(x)): yValue['RNA 1']['mean'].append(np.mean([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.residuenum%5 == self.residueNum])) yValue['RNA 1']['std'].append(np.std([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.residuenum%5 == self.residueNum])) yValue['RNA 2']['mean'].append(np.mean([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.residuenum%5 != self.residueNum])) yValue['RNA 2']['std'].append(np.std([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.residuenum%5 != self.residueNum])) ax = plt.subplot(2,3,i) ax.set_xlim([0, 29]) if protein == True: if confInt == True: plt.errorbar(x,yValue['unbound']['mean'],yerr=yValue['unbound']['95ConfInt'], fmt='-o',capthick=2,color='#99ccff',label='Non-bound') plt.errorbar(x,yValue['bound']['mean'],yerr=yValue['bound']['95ConfInt'],fmt='-o',capthick=2,color='#f47835',label='Bound') else: plt.errorbar(x,yValue['unbound']['mean'],yerr=yValue['unbound']['std'], fmt='-o',capthick=2,color='#99ccff',label='Non-bound') plt.errorbar(x,yValue['bound']['mean'],yerr=yValue['bound']['std'],fmt='-o',capthick=2,color='#f47835',label='Bound') else: try: plt.errorbar(x,yValue['RNA 1']['mean'],yerr=yValue['RNA 1']['std'], fmt='-o',capthick=2,color='r',label='G1') plt.errorbar(x,yValue['RNA 2']['mean'],yerr=yValue['RNA 2']['std'], fmt='-o',capthick=2,color='g',label='G3') except KeyError: plt.errorbar(x,yValue['RNA 1']['mean'],yerr=yValue['RNA 1']['std'], fmt='-o',capthick=2,color='r') # ax.set_xlim([1, 11]) ax.legend(loc='best') plt.xlabel(x_label) if normType == 'Calpha normalised': plt.ylabel('Normalised D{} change'.format(densMet)) else: plt.ylabel('{} D{} change'.format(normType,densMet)) # # perform Hotelling T-squared test if protein atoms # if protein == True: # keyVal = '{} D{}'.format(normType,densMet) # HotellingTsquareDict[keyVal] = {} # F,p_value,reject = self.hotellingTsquareTest(densMet,normType) # run Hotelling's T squared test to distinguish between bound and unbound TRAP rings # HotellingTsquareDict[keyVal]['F value'] = F # HotellingTsquareDict[keyVal]['p value'] = p_value # HotellingTsquareDict[keyVal]['reject?'] = reject plt.subplots_adjust(top=0.90) f.subplots_adjust(hspace=0.4) f.subplots_adjust(wspace=0.5) f.suptitle('damage metrics vs damage set: {} {} {}'.format(atom.basetype,atom.residuenum,atom.atomtype),fontsize=20) # check if directory exists to save graphs to and make if not: if not os.path.exists(where): os.makedirs(where) # save graphs to directly specified by 'where' if confInt == True: f.savefig('{}/6DamageSubplots_{}_{}_{}_95confIntErrorbars.png'.format(where,atom.basetype,atom.residuenum,atom.atomtype)) else: f.savefig('{}/6DamageSubplots_{}_{}_{}_SDerrorbars.png'.format(where,atom.basetype,atom.residuenum,atom.atomtype))