def plot(): fd = open('Biso_v_resolution.gnuplotdata', 'r') lines = fd.readlines() fd.close() d_splot = {} l_resolution = [] l_Biso = [] for line in lines: l = line.split() resolution = float(l[1]) Biso = float(l[0]) resolution_rounded = round(resolution, 1) ## resolution_rounded = 0.1*round(resolution/0.1,0) Biso_rounded = round(Biso, 0) if not resolution_rounded in d_splot.keys(): d_splot[resolution_rounded] = {} if not Biso_rounded in d_splot[resolution_rounded].keys(): d_splot[resolution_rounded][Biso_rounded] = 0 d_splot[resolution_rounded][Biso_rounded] += 1 l_resolution += [resolution] l_Biso += [Biso] a, b, r, p = statistics.do_regression( l_resolution, l_Biso, verbose=False, ) print 'correlation = r =', r tcrit = 1.960 ## 95% confidence 2 tail (student table...) ## tcrit = 76. ## f,g,h = statistics.do_confidence_bands(l_resolution,l_Biso,tcrit,) lines = [] ## for resolution in range(5,35+1): for resolution in range(5, 35 + 1): resolution /= 10. for Biso in range(0, 100 + 1): Biso /= 1. if not resolution in d_splot.keys(): count = 0 elif not Biso in d_splot[resolution].keys(): count = 0 else: count = d_splot[resolution][Biso] lines += ['%s %s %s\n' % ( resolution, Biso, count, )] lines += ['\n'] gnuplot.contour_plot( 'validation_Biso_v_resolution', lines, xlabel='resolution (Angstrom)', ylabel='<Biso>', bool_remove=False, ) return
def plot(): fd = open('Biso_v_resolution.gnuplotdata','r') lines = fd.readlines() fd.close() d_splot = {} l_resolution = [] l_Biso = [] for line in lines: l = line.split() resolution = float(l[1]) Biso = float(l[0]) resolution_rounded = round(resolution,1) ## resolution_rounded = 0.1*round(resolution/0.1,0) Biso_rounded = round(Biso,0) if not resolution_rounded in d_splot.keys(): d_splot[resolution_rounded] = {} if not Biso_rounded in d_splot[resolution_rounded].keys(): d_splot[resolution_rounded][Biso_rounded] = 0 d_splot[resolution_rounded][Biso_rounded] += 1 l_resolution += [resolution] l_Biso += [Biso] a,b,r,p = statistics.do_regression(l_resolution,l_Biso,verbose=False,) print 'correlation = r =', r tcrit = 1.960 ## 95% confidence 2 tail (student table...) ## tcrit = 76. ## f,g,h = statistics.do_confidence_bands(l_resolution,l_Biso,tcrit,) lines = [] ## for resolution in range(5,35+1): for resolution in range(5,35+1): resolution /= 10. for Biso in range(0,100+1): Biso /= 1. if not resolution in d_splot.keys(): count = 0 elif not Biso in d_splot[resolution].keys(): count = 0 else: count = d_splot[resolution][Biso] lines += ['%s %s %s\n' %(resolution,Biso,count,)] lines += ['\n'] gnuplot.contour_plot( 'validation_Biso_v_resolution',lines, xlabel='resolution (Angstrom)',ylabel='<Biso>', bool_remove = False, ) return
def plot_contour(dn,d_lengths): l_keys = list(d_lengths.keys()) ## for k in l_keys: ## if k > 200: ## del d_lengths[k] gnuplot.contour_plot( d_dat=d_lengths,fileprefix='INDEL_%s' %(dn),bool_remove=False, xlabel='dist_m_i_n (1000bp)',ylabel='INDEL length', bool_log=True, x2=50, y2=50, ) return
def plot_dihedrals(d_count,): phipsi_step = 5 d_titles = { 'sheet':'{/Symbol b} sheet', 'helix_alpha':'{/Symbol a} helix', 'helix_pi':'{/Symbol p} helix', 'helix_310':'3_1_0 helix', 'prePRO_notGLY':'prePro (excl. Gly)', 'prePRO_GLY':'Gly (prePro)', 'all_notgly_notpro_notprepro':'all (excl. Gly, Pro and pre-Pro)', 'turns_notgly_notpro_notprepro':'Turn (excl. Gly, Pro and pre-Pro)', } for k in d_count.keys(): print 'plot', k l = [] for phi in range(-180,180,phipsi_step,): for psi in range(-180,180,phipsi_step,): l += ['%s %s %s\n' %(phi,psi,d_count[k][phi][psi])] l += ['\n'] if k in d_titles.keys(): title = d_titles[k] else: if k in ['ALA','CYS','ASP','GLU','PHE','HIS','ILE','LYS','LEU','MET','ASN','GLN','ARG','SER','THR','VAL','TRP','TYR',]: ## if k not in [ ## 'Turn','prePRO_notGLY','prePRO_GLY', ## 'GLY', ## not prePro ## 'PRO', ## not prePro ## 'cisPro','transPro', ## ]: continue title = k gnuplot.contour_plot( k, l, title=title, xlabel='{/Symbol f}', ylabel='{/Symbol y}', x1 = -180, x2 = 180, y1 = -180, y2 = 180, z1 = 0, bool_remove = False, ) return
def plot_contour(dn, d_lengths): l_keys = list(d_lengths.keys()) ## for k in l_keys: ## if k > 200: ## del d_lengths[k] gnuplot.contour_plot( d_dat=d_lengths, fileprefix='INDEL_%s' % (dn), bool_remove=False, xlabel='dist_m_i_n (1000bp)', ylabel='INDEL length', bool_log=True, x2=50, y2=50, ) return
def plot(d_mmCIF_main,d_rmsds,): l_pdbs = d_rmsds.keys() l_pdbs.sort() l_temperature = [] l_ph = [] l_resolution = [] d_spacegroup = {} d_starting_model = {} l_correl_T = [[],[],] l_correl_pH = [[],[],] l_correl_resol_max = [[],[],] d_histo_pH = {} d_histo_T = {} d_histo_resol = {} for i1 in range(len(l_pdbs)-1): pdb1 = l_pdbs[i1] spacegroup1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_symmetry.space_group_name_H-M',) T1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_diffrn.ambient_temp',) pH1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_exptl_crystal_grow.pH',) starting_model1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_refine.pdbx_starting_model',) resolution1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_refine.ls_d_res_high',) for i2 in range(i1+1,len(l_pdbs)): pdb2 = l_pdbs[i2] spacegroup2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_symmetry.space_group_name_H-M',) T2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_diffrn.ambient_temp',) pH2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_exptl_crystal_grow.pH',) starting_model2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_refine.pdbx_starting_model',) resolution2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_refine.ls_d_res_high',) rmsd = d_rmsds[pdb1][pdb2] if rmsd > 1: print pdb1, pdb2, rmsd if T1 and T2: T_diff = abs(float(T2)-float(T1)) l_temperature += ['%s %s\n' %(T_diff,rmsd),] l_correl_T[0] += [T_diff] l_correl_T[1] += [rmsd] print T_diff, 10*round(T_diff/10.,0) if not 10*round(T_diff/10.,0) in d_histo_T.keys(): d_histo_T[10*round(T_diff/10.,0)] = 0 d_histo_T[10*round(T_diff/10.,0)] += 1 if pH1 and pH2: pH_diff = abs(float(pH2)-float(pH1)) l_ph += ['%s %s\n' %(pH_diff,rmsd),] l_correl_pH[0] += [pH_diff] l_correl_pH[1] += [rmsd] if not pH_diff in d_histo_pH.keys(): d_histo_pH[pH_diff] = 0 d_histo_pH[pH_diff] += 1 resolution_max = max(resolution1,resolution2,) l_resolution += ['%s %s\n' %(resolution_max,rmsd),] if resolution_max != 'N/A': l_correl_resol_max[0] += [float(resolution_max)] l_correl_resol_max[1] += [rmsd] if not round(float(resolution_max),0) in d_histo_resol.keys(): d_histo_resol[round(float(resolution_max),0)] = 0 d_histo_resol[round(float(resolution_max),0)] += 1 d_spacegroup = append_to_dictionary(d_spacegroup,spacegroup1,spacegroup2,rmsd,) d_starting_model = append_to_dictionary(d_starting_model,starting_model1,starting_model2,rmsd,) r1 = statistics.correlation(l_correl_T[0],l_correl_T[1],) r2 = statistics.correlation(l_correl_pH[0],l_correl_pH[1],) r3 = statistics.correlation(l_correl_resol_max[0],l_correl_resol_max[1],) ## ## plot histograms ## for prefix,d in [ ['deltapH',d_histo_pH,], ['deltaT',d_histo_T,], ['maxresolution',d_histo_resol,], ]: l = [] l_diffs = d.keys() l_diffs.sort() for diff in l_diffs: l += ['%s %s\n' %(diff,d[diff],)] fd = open('histo_%s.txt' %(prefix),'w') fd.writelines(l) fd.close() l = [ 'set terminal postscript eps enhanced color "Helvetica"\n', 'set output "gnuplot.ps"\n', 'set size 3,3\n', 'set style data histogram\n', 'set xtics rotate\n', 'set xlabel "%s\n' %(prefix), 'set ylabel "count\n', 'plot "histo_%s.txt" u 2:xtic(1) t ""\n' %(prefix) ] fd = open('tmp.txt','w') fd.writelines(l) fd.close() os.system('gnuplot tmp.txt') os.system('convert gnuplot.ps histo_%s.png' %(prefix)) ## ## plot rmsd as a function of each property (2d) ## for prefix,data,xlabel in [ ['pH',l_ph,'pH diff',], ['Temperature',l_temperature,'T diff',], ['resolution',l_resolution,'maximum resolution',], ]: prefix += method fd = open('%s.gnuplotdata' %(prefix),'w') fd.writelines(data) fd.close() gnuplot.scatter_plot_2d( prefix,xlabel=xlabel,ylabel='RMSD %s' %(method,), ## averages=True, regression=True, ) ## ## plot rmsd as a function of each property (contour) ## for d,prefix in [ [d_spacegroup,'spacegroup',], [d_starting_model,'startingmodel',], ]: d_tics = {} l_tics = d.keys() l_tics.sort() for i in range(len(l_tics)): d_tics[l_tics[i]] = i+.5 z1 = 9 z2 = 0 l_data = [] for x in range(len(l_tics)): k1 = l_tics[x] for y in range(len(l_tics)): k2 = l_tics[y] if not k2 in d[k1].keys(): average = 9 else: l_rmsds = d[k1][k2] average = sum(l_rmsds)/len(l_rmsds) if average < z1: z1 = average if average > z2: z2 = average l_data += ['%s %s %s\n' %(x,y,average,)] l_data += ['%s %s %s\n' %(x,y+1,1,)] l_data += ['\n'] for y in range(len(l_tics)): l_data += ['%s %s %s\n' %(x+1,y,1,)] l_data += ['%s %s %s\n' %(x+1,y+1,1,)] l_data += ['\n'] gnuplot.contour_plot( prefix,l_data, title='%s %s' %(prefix,method,),zlabel='RMSD %s' %(method), d_xtics = d_tics, d_ytics = d_tics, palette = '0 1 0 0, 0.9999 0 0 1, 0.9999 1 1 1, 1 1 1 1', z1 = z1, z2 = z2+0.1, bool_remove = False, ) os.system('convert %s.ps %s_spacegroup%s_mutations%s_atoms%s.png' %(prefix,prefix,spacegroup.replace(' ',''),n_mutations_max,method,)) ## os.remove('%s.ps' %(prefix,)) print d_spacegroup print d_starting_model print r1 print r2 print r3 return
def count_and_plot(): chromosome = '1' import time t1 = time.time() for l_fp_in in [ ['out_GATK/join/CombineVariants.vcf'], ['out_GATK/join/ApplyRecalibration.recalibrated.filtered.vcf'], ['SelectVariants_discordance1.vcf'], ['SelectVariants_discordance2.vcf'], ['%s.vqsr.filt.vcf' %(chromosome) for chromosome in range(1,23)+['X','Y',]], ['SelectVariants_concordance.vcf'], ]: import time t1 = time.time() ## ## prepare scatter lists ## l_gnuplot_MAF = [] l_gnuplot_DP = [] l_gnuplot_CR = [] ## ## prepare contour dic ## d_contour = {} for AF in xrange(100+1): d_contour[AF*0.01] = {} for DP in xrange(150+1): d_contour[AF*0.01][DP*10.] = 0 d_contour_CR = {} for AF in xrange(100+1): d_contour_CR[round(AF*0.01,2)] = {} for CR in xrange(100+1): d_contour_CR[round(AF*0.01,2)][CR] = 0 for fp_in in l_fp_in: print fp_in fd = open(fp_in,'r') print fp_in for line in fd: if line[0] == '#': continue ## if line.count('./.')+line.count('0/0')+line.count('0/1')+line.count('1/1') != 100: ## print line ## stop CHROM, d_INFO, bool_continue = parse_line(line) if bool_continue == True: continue CR = 100-line.count('./.') DP = int(d_INFO['DP']) try: AF = float(d_INFO['AF']) except: d_INFO['AF'] AF = 'N/A' if AF < 0.5: MAF = AF else: MAF = 1-AF ## ## append to list ## l_gnuplot_DP += [DP] l_gnuplot_CR += [CR] if fp_in != 'mp15_vqsr.vcf': if AF == 'N/A': stop l_gnuplot_MAF += [MAF] if DP < 1500: d_contour[0.01*round(MAF/0.01,0)][10.*round(DP/10.,0)] += 1 ## ## append to dic ## d_contour_CR[round(MAF,2)][CR] += 1 if chromosome != CHROM: t2 = time.time() print fp_in, '%-2s' %(chromosome), '%2is' %(int(t2-t1)) chromosome = CHROM t1 = t2 ## if CHROM == '2': ## break ## if POS[-1] == '0' and POS[-2] == '0' and POS[-3] == '0' and POS[-4] == '0': ## print '%2s %9s %6s %4s' %(CHROM, POS, AF, DP,), fp_in ## break ## if POS[-1] == '0' and POS[-2] == '0' and POS[-3] == '0' and POS[-4] == '0': ## print '%2s %9s %6s %4s' %(CHROM, POS, AF, DP,), fp_in title = fp_in.replace('_','').replace('out_GATK/','').replace('.vcf','') suffix = fp_in.replace('out_GATK','').replace('/','').replace('.vcf','') gnuplot.histogram2( 'DP_%s' %(suffix), l_data=l_gnuplot_DP, x_step=10,x_min=0,x_max=1000,tic_step=100, xlabel='DP from VCF', ylabel='SNP count', title= title, ) gnuplot.histogram2( 'CR_%s' %(suffix), l_data=l_gnuplot_CR, x_step=1,x_min=0,x_max=100,tic_step=10, xlabel='SNP Call Rate', ylabel='SNP count', title= title, ) if fp_in != 'mp15_vqsr.vcf': gnuplot.histogram2( 'AF_%s' %(suffix), l_data=l_gnuplot_MAF, x_min=0,x_max=.5,tic_step=0.05,x_step=0.01, xlabel='AF from VCF', ylabel='SNP count', title = title, ) lines = [] for AF in xrange(50+1): for DP in xrange(150+1): lines += ['%s %s %s\n' %(AF*0.01,DP*10.,d_contour[AF*0.01][DP*10.],)] lines += ['\n'] gnuplot.contour_plot( 'AFvDP_%s' %(suffix), lines, title = title, xlabel = 'AF from VCF', ylabel = 'DP from VCF', zlabel = 'count', ) lines = [] for AF in xrange(50+1): for CR in xrange(100+1): lines += ['%s %s %s\n' %(AF*0.01,CR,d_contour_CR[round(AF*0.01,2)][CR],)] lines += ['\n'] gnuplot.contour_plot( 'AFvCR_%s' %(suffix), lines, title = title, xlabel = 'AF from VCF', ylabel = 'Call Rate from VCF', zlabel = 'count', ) ## t2 = time.time() ## print t2-t1 ## stop return
def plot_dihedrals(d_count, ): phipsi_step = 5 d_titles = { 'sheet': '{/Symbol b} sheet', 'helix_alpha': '{/Symbol a} helix', 'helix_pi': '{/Symbol p} helix', 'helix_310': '3_1_0 helix', 'prePRO_notGLY': 'prePro (excl. Gly)', 'prePRO_GLY': 'Gly (prePro)', 'all_notgly_notpro_notprepro': 'all (excl. Gly, Pro and pre-Pro)', 'turns_notgly_notpro_notprepro': 'Turn (excl. Gly, Pro and pre-Pro)', } for k in d_count.keys(): print 'plot', k l = [] for phi in range( -180, 180, phipsi_step, ): for psi in range( -180, 180, phipsi_step, ): l += ['%s %s %s\n' % (phi, psi, d_count[k][phi][psi])] l += ['\n'] if k in d_titles.keys(): title = d_titles[k] else: if k in [ 'ALA', 'CYS', 'ASP', 'GLU', 'PHE', 'HIS', 'ILE', 'LYS', 'LEU', 'MET', 'ASN', 'GLN', 'ARG', 'SER', 'THR', 'VAL', 'TRP', 'TYR', ]: ## if k not in [ ## 'Turn','prePRO_notGLY','prePRO_GLY', ## 'GLY', ## not prePro ## 'PRO', ## not prePro ## 'cisPro','transPro', ## ]: continue title = k gnuplot.contour_plot( k, l, title=title, xlabel='{/Symbol f}', ylabel='{/Symbol y}', x1=-180, x2=180, y1=-180, y2=180, z1=0, bool_remove=False, ) return
def main(): import sys sys.path.append('/home/people/tc/svn/tc_sandbox/misc/') import gnuplot min_count = 2 phipsi_range = 1 phipsi_step = 5 d_residues = { 'ALA':'A','CYS':'C','ASP':'D','GLU':'E','PHE':'F', 'GLY':'G','HIS':'H','ILE':'I','LYS':'K','LEU':'L', 'MET':'M','ASN':'N','PRO':'P','GLN':'Q','ARG':'R', 'SER':'S','THR':'T','VAL':'V','TRP':'W','TYR':'Y', } d_ramachandran = parse_dihedrals(d_residues, plot = True) print 'preparing dictionary' for res1 in d_residues.values(): for res2 in d_residues.values(): if res1 == res2: continue d_ramachandran[res1+res2] = {} for phi in range(-180,180,phipsi_step,): d_ramachandran[res1+res2][phi] = {} for psi in range(-180,180,phipsi_step,): d_ramachandran[res1+res2][phi][psi] = 0 print 'prepared dictionary' fd = open('phipsi.txt','r') lines = fd.readlines() fd.close() print 'looping over', len(lines), 'lines' for line in lines: if len(line.split()) == 12: if ( 'N/A' in [ line.split()[8], line.split()[9], line.split()[10], line.split()[11], ] ): continue res1 = line.split()[6] res2 = line.split()[7] phi1 = float(line.split()[8]) psi1 = float(line.split()[9]) phi2 = float(line.split()[10]) psi2 = float(line.split()[11]) else: print line stop if res1 == 'X' or res2 == 'X': print line continue ## count = sum_ramachandran(d_ramachandran,res1,phi1,psi1,phipsi_range,) ## if count < count_min: ## print line ## print count ## if res1 != 'G': ## stop1 ## ## count = sum_ramachandran(d_ramachandran,res2,phi2,psi2,phipsi_range,) ## if count < count_min: ## print line ## print count ## if res2 != 'G': ## stop2 phi1 = round_angle(phi1,phipsi_step) psi1 = round_angle(psi1,phipsi_step) phi2 = round_angle(phi2,phipsi_step) psi2 = round_angle(psi2,phipsi_step) d_ramachandran[res2+res1][phi1][psi1] += 1 d_ramachandran[res1+res2][phi2][psi2] += 1 ## ## ramachandran plots ## for key in d_ramachandran.keys(): if len(key) == 1: continue import os if os.path.isfile('plot_phipsi_%s.png' %(key)): os.remove('plot_phipsi_%s.png' %(key)) print key l_gnuplot = [] max_count = 0 sum_count = 0 for phi in range(-180,180,phipsi_step,): for psi in range(-180,180,phipsi_step,): l_gnuplot += ['%s %s %s\n' %(phi,psi,d_ramachandran[key][phi][psi])] if d_ramachandran[key][phi][psi] > max_count: max_count = d_ramachandran[key][phi][psi] sum_count += d_ramachandran[key][phi][psi] l_gnuplot += ['\n'] if max_count > 1 and sum_count > 3: gnuplot.contour_plot( 'plot_phipsi_%s' %(key), l_gnuplot, title='%s' %(key), xtitle='{/Symbol f}', ytitle='{/Symbol y}', x1=-180, x2=180, y1=-180, y2=180, z1=0, ) return
def parse_dihedrals(d_residues, plot = False): import math,sys sys.path.append('/home/people/tc/svn/tc_sandbox/misc/') import gnuplot d_ramachandran = {} for res in d_residues.values(): d_ramachandran[res] = {} for phi in range(-180,180): d_ramachandran[res][phi] = {} for psi in range(-180,180): d_ramachandran[res][phi][psi] = 0 for res in d_residues.values(): print 'read', res ## ## read lines ## fd = open('phipsi_all_%s.txt' %(res),'r') lines = fd.readlines() fd.close() ## ## parse lines ## for line in lines: if 'N/A' in [line.split()[0],line.split()[1]]: continue phi = float(line.split()[0]) psi = float(line.split()[1]) if phi == 180.: phi = -180. if psi == 180.: psi = -180. d_ramachandran[res][int(phi)][int(psi)] += 1 ## ## ramachandran plots ## if plot == True: for res in d_ramachandran.keys(): print 'plot', res l_gnuplot_res = [] for phi in range(-180,180): for psi in range(-180,180): l_gnuplot_res += ['%s %s %s\n' %(phi,psi,d_ramachandran[res][phi][psi])] l_gnuplot_res += ['\n'] gnuplot.contour_plot('plot_phipsi_%s' %(res), l_gnuplot_res, title='phipsi_%s' %(res), xtitle='{/Symbol f}', ytitle='{/Symbol y}') l_gnuplot = [] for phi in range(-180,180): for psi in range(-180,180): for res in d_ramachandran.keys(): l_gnuplot += ['%s %s %s\n' %(phi,psi,d_ramachandran[res][phi][psi])] l_gnuplot += ['\n'] gnuplot.contour_plot('plot_phipsi', l_gnuplot, title='phipsi', xtitle='{/Symbol f}', ytitle='{/Symbol y}') ## d_dihedral = { ## 'A':{'phi':[],'psi':[]}, ## 'C':{'phi':[],'psi':[]}, ## 'D':{'phi':[],'psi':[]}, ## 'E':{'phi':[],'psi':[]}, ## 'F':{'phi':[],'psi':[]}, ## 'G':{'phi':[],'psi':[]}, ## 'H':{'phi':[],'psi':[]}, ## 'I':{'phi':[],'psi':[]}, ## 'K':{'phi':[],'psi':[]}, ## 'L':{'phi':[],'psi':[]}, ## 'M':{'phi':[],'psi':[]}, ## 'N':{'phi':[],'psi':[]}, ## 'P':{'phi':[],'psi':[]}, ## 'Q':{'phi':[],'psi':[]}, ## 'R':{'phi':[],'psi':[]}, ## 'S':{'phi':[],'psi':[]}, ## 'T':{'phi':[],'psi':[]}, ## 'V':{'phi':[],'psi':[]}, ## 'W':{'phi':[],'psi':[]}, ## 'Y':{'phi':[],'psi':[]}, ## } ## ## for line in lines: ## residue = d_residues[line.split()[0]] ## if 'N/A' in [line.split()[1],line.split()[2]]: ## continue ## phi = float(line.split()[1]) ## psi = float(line.split()[2]) ## d_dihedral[residue]['phi'] += [phi] ## d_dihedral[residue]['psi'] += [psi] ## ## for residue in d_dihedral.keys(): ## for dihedral in ['phi','psi',]: ## sumx = 0 ## sumxx = 0 ## n = len(d_dihedral[residue][dihedral]) ## for x in d_dihedral[residue][dihedral]: ## sumx += x ## sumxx += x**2 ## mean = sumx/n ## stddev = math.sqrt(n*sumxx-(sumx**2))/n ## d_dihedral[residue][dihedral] = {'mean':mean,'stddev':stddev} ## print d_dihedral return d_ramachandran
def main(): import sys sys.path.append('/home/people/tc/svn/tc_sandbox/misc/') import gnuplot min_count = 2 phipsi_range = 1 phipsi_step = 5 d_residues = { 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L', 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R', 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y', } d_ramachandran = parse_dihedrals(d_residues, plot=True) print 'preparing dictionary' for res1 in d_residues.values(): for res2 in d_residues.values(): if res1 == res2: continue d_ramachandran[res1 + res2] = {} for phi in range( -180, 180, phipsi_step, ): d_ramachandran[res1 + res2][phi] = {} for psi in range( -180, 180, phipsi_step, ): d_ramachandran[res1 + res2][phi][psi] = 0 print 'prepared dictionary' fd = open('phipsi.txt', 'r') lines = fd.readlines() fd.close() print 'looping over', len(lines), 'lines' for line in lines: if len(line.split()) == 12: if ('N/A' in [ line.split()[8], line.split()[9], line.split()[10], line.split()[11], ]): continue res1 = line.split()[6] res2 = line.split()[7] phi1 = float(line.split()[8]) psi1 = float(line.split()[9]) phi2 = float(line.split()[10]) psi2 = float(line.split()[11]) else: print line stop if res1 == 'X' or res2 == 'X': print line continue ## count = sum_ramachandran(d_ramachandran,res1,phi1,psi1,phipsi_range,) ## if count < count_min: ## print line ## print count ## if res1 != 'G': ## stop1 ## ## count = sum_ramachandran(d_ramachandran,res2,phi2,psi2,phipsi_range,) ## if count < count_min: ## print line ## print count ## if res2 != 'G': ## stop2 phi1 = round_angle(phi1, phipsi_step) psi1 = round_angle(psi1, phipsi_step) phi2 = round_angle(phi2, phipsi_step) psi2 = round_angle(psi2, phipsi_step) d_ramachandran[res2 + res1][phi1][psi1] += 1 d_ramachandran[res1 + res2][phi2][psi2] += 1 ## ## ramachandran plots ## for key in d_ramachandran.keys(): if len(key) == 1: continue import os if os.path.isfile('plot_phipsi_%s.png' % (key)): os.remove('plot_phipsi_%s.png' % (key)) print key l_gnuplot = [] max_count = 0 sum_count = 0 for phi in range( -180, 180, phipsi_step, ): for psi in range( -180, 180, phipsi_step, ): l_gnuplot += [ '%s %s %s\n' % (phi, psi, d_ramachandran[key][phi][psi]) ] if d_ramachandran[key][phi][psi] > max_count: max_count = d_ramachandran[key][phi][psi] sum_count += d_ramachandran[key][phi][psi] l_gnuplot += ['\n'] if max_count > 1 and sum_count > 3: gnuplot.contour_plot( 'plot_phipsi_%s' % (key), l_gnuplot, title='%s' % (key), xtitle='{/Symbol f}', ytitle='{/Symbol y}', x1=-180, x2=180, y1=-180, y2=180, z1=0, ) return
def parse_dihedrals(d_residues, plot=False): import math, sys sys.path.append('/home/people/tc/svn/tc_sandbox/misc/') import gnuplot d_ramachandran = {} for res in d_residues.values(): d_ramachandran[res] = {} for phi in range(-180, 180): d_ramachandran[res][phi] = {} for psi in range(-180, 180): d_ramachandran[res][phi][psi] = 0 for res in d_residues.values(): print 'read', res ## ## read lines ## fd = open('phipsi_all_%s.txt' % (res), 'r') lines = fd.readlines() fd.close() ## ## parse lines ## for line in lines: if 'N/A' in [line.split()[0], line.split()[1]]: continue phi = float(line.split()[0]) psi = float(line.split()[1]) if phi == 180.: phi = -180. if psi == 180.: psi = -180. d_ramachandran[res][int(phi)][int(psi)] += 1 ## ## ramachandran plots ## if plot == True: for res in d_ramachandran.keys(): print 'plot', res l_gnuplot_res = [] for phi in range(-180, 180): for psi in range(-180, 180): l_gnuplot_res += [ '%s %s %s\n' % (phi, psi, d_ramachandran[res][phi][psi]) ] l_gnuplot_res += ['\n'] gnuplot.contour_plot('plot_phipsi_%s' % (res), l_gnuplot_res, title='phipsi_%s' % (res), xtitle='{/Symbol f}', ytitle='{/Symbol y}') l_gnuplot = [] for phi in range(-180, 180): for psi in range(-180, 180): for res in d_ramachandran.keys(): l_gnuplot += [ '%s %s %s\n' % (phi, psi, d_ramachandran[res][phi][psi]) ] l_gnuplot += ['\n'] gnuplot.contour_plot('plot_phipsi', l_gnuplot, title='phipsi', xtitle='{/Symbol f}', ytitle='{/Symbol y}') ## d_dihedral = { ## 'A':{'phi':[],'psi':[]}, ## 'C':{'phi':[],'psi':[]}, ## 'D':{'phi':[],'psi':[]}, ## 'E':{'phi':[],'psi':[]}, ## 'F':{'phi':[],'psi':[]}, ## 'G':{'phi':[],'psi':[]}, ## 'H':{'phi':[],'psi':[]}, ## 'I':{'phi':[],'psi':[]}, ## 'K':{'phi':[],'psi':[]}, ## 'L':{'phi':[],'psi':[]}, ## 'M':{'phi':[],'psi':[]}, ## 'N':{'phi':[],'psi':[]}, ## 'P':{'phi':[],'psi':[]}, ## 'Q':{'phi':[],'psi':[]}, ## 'R':{'phi':[],'psi':[]}, ## 'S':{'phi':[],'psi':[]}, ## 'T':{'phi':[],'psi':[]}, ## 'V':{'phi':[],'psi':[]}, ## 'W':{'phi':[],'psi':[]}, ## 'Y':{'phi':[],'psi':[]}, ## } ## ## for line in lines: ## residue = d_residues[line.split()[0]] ## if 'N/A' in [line.split()[1],line.split()[2]]: ## continue ## phi = float(line.split()[1]) ## psi = float(line.split()[2]) ## d_dihedral[residue]['phi'] += [phi] ## d_dihedral[residue]['psi'] += [psi] ## ## for residue in d_dihedral.keys(): ## for dihedral in ['phi','psi',]: ## sumx = 0 ## sumxx = 0 ## n = len(d_dihedral[residue][dihedral]) ## for x in d_dihedral[residue][dihedral]: ## sumx += x ## sumxx += x**2 ## mean = sumx/n ## stddev = math.sqrt(n*sumxx-(sumx**2))/n ## d_dihedral[residue][dihedral] = {'mean':mean,'stddev':stddev} ## print d_dihedral return d_ramachandran