def tabulate(d_mmCIF_main,): l_pdbs = list(set(d_mmCIF_main.keys())) l_pdbs.sort() l = ['ID mut T pH res spacegroup startingmodel\n'] for i in range(len(l_pdbs)): pdb = l_pdbs[i] d_mmCIF = d_mmCIF_main[pdb] if d_mmCIF['_entity_poly_seq.mon_id'] == ref_seq: s_mutation = 'wt' else: for i in range(len(d_mmCIF['_entity_poly_seq.mon_id'])): res_id_mmCIF = d_mmCIF['_entity_poly_seq.mon_id'][i] res_id_ref = ref_seq[i] if res_id_mmCIF != res_id_ref: res_no = i+1 s_mutation = '%1s%3i%1s' %(d_321[res_id_ref],res_no,d_321[res_id_mmCIF],) spacegroup = core.parse_mmCIF_item(d_mmCIF,'_symmetry.space_group_name_H-M',) T = core.parse_mmCIF_item(d_mmCIF,'_diffrn.ambient_temp',) pH = core.parse_mmCIF_item(d_mmCIF,'_exptl_crystal_grow.pH',) starting_model = core.parse_mmCIF_item(d_mmCIF,'_refine.pdbx_starting_model',) resolution = core.parse_mmCIF_item(d_mmCIF,'_refine.ls_d_res_high',) l += ['%4s %-5s %5s %5s %3.1f %-10s %4s\n' %(pdb,s_mutation,T,pH,float(resolution),spacegroup,starting_model,)] fd = open('HEWL_table.txt','w') fd.writelines(l) fd.close() return
def plot(d_mmCIF_main,d_rmsds,): l_pdbs = d_rmsds.keys() l_pdbs.sort() l_temperature = [] l_ph = [] l_resolution = [] d_spacegroup = {} d_starting_model = {} l_correl_T = [[],[],] l_correl_pH = [[],[],] l_correl_resol_max = [[],[],] d_histo_pH = {} d_histo_T = {} d_histo_resol = {} for i1 in range(len(l_pdbs)-1): pdb1 = l_pdbs[i1] spacegroup1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_symmetry.space_group_name_H-M',) T1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_diffrn.ambient_temp',) pH1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_exptl_crystal_grow.pH',) starting_model1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_refine.pdbx_starting_model',) resolution1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_refine.ls_d_res_high',) for i2 in range(i1+1,len(l_pdbs)): pdb2 = l_pdbs[i2] spacegroup2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_symmetry.space_group_name_H-M',) T2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_diffrn.ambient_temp',) pH2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_exptl_crystal_grow.pH',) starting_model2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_refine.pdbx_starting_model',) resolution2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_refine.ls_d_res_high',) rmsd = d_rmsds[pdb1][pdb2] if rmsd > 1: print pdb1, pdb2, rmsd if T1 and T2: T_diff = abs(float(T2)-float(T1)) l_temperature += ['%s %s\n' %(T_diff,rmsd),] l_correl_T[0] += [T_diff] l_correl_T[1] += [rmsd] print T_diff, 10*round(T_diff/10.,0) if not 10*round(T_diff/10.,0) in d_histo_T.keys(): d_histo_T[10*round(T_diff/10.,0)] = 0 d_histo_T[10*round(T_diff/10.,0)] += 1 if pH1 and pH2: pH_diff = abs(float(pH2)-float(pH1)) l_ph += ['%s %s\n' %(pH_diff,rmsd),] l_correl_pH[0] += [pH_diff] l_correl_pH[1] += [rmsd] if not pH_diff in d_histo_pH.keys(): d_histo_pH[pH_diff] = 0 d_histo_pH[pH_diff] += 1 resolution_max = max(resolution1,resolution2,) l_resolution += ['%s %s\n' %(resolution_max,rmsd),] if resolution_max != 'N/A': l_correl_resol_max[0] += [float(resolution_max)] l_correl_resol_max[1] += [rmsd] if not round(float(resolution_max),0) in d_histo_resol.keys(): d_histo_resol[round(float(resolution_max),0)] = 0 d_histo_resol[round(float(resolution_max),0)] += 1 d_spacegroup = append_to_dictionary(d_spacegroup,spacegroup1,spacegroup2,rmsd,) d_starting_model = append_to_dictionary(d_starting_model,starting_model1,starting_model2,rmsd,) r1 = statistics.correlation(l_correl_T[0],l_correl_T[1],) r2 = statistics.correlation(l_correl_pH[0],l_correl_pH[1],) r3 = statistics.correlation(l_correl_resol_max[0],l_correl_resol_max[1],) ## ## plot histograms ## for prefix,d in [ ['deltapH',d_histo_pH,], ['deltaT',d_histo_T,], ['maxresolution',d_histo_resol,], ]: l = [] l_diffs = d.keys() l_diffs.sort() for diff in l_diffs: l += ['%s %s\n' %(diff,d[diff],)] fd = open('histo_%s.txt' %(prefix),'w') fd.writelines(l) fd.close() l = [ 'set terminal postscript eps enhanced color "Helvetica"\n', 'set output "gnuplot.ps"\n', 'set size 3,3\n', 'set style data histogram\n', 'set xtics rotate\n', 'set xlabel "%s\n' %(prefix), 'set ylabel "count\n', 'plot "histo_%s.txt" u 2:xtic(1) t ""\n' %(prefix) ] fd = open('tmp.txt','w') fd.writelines(l) fd.close() os.system('gnuplot tmp.txt') os.system('convert gnuplot.ps histo_%s.png' %(prefix)) ## ## plot rmsd as a function of each property (2d) ## for prefix,data,xlabel in [ ['pH',l_ph,'pH diff',], ['Temperature',l_temperature,'T diff',], ['resolution',l_resolution,'maximum resolution',], ]: prefix += method fd = open('%s.gnuplotdata' %(prefix),'w') fd.writelines(data) fd.close() gnuplot.scatter_plot_2d( prefix,xlabel=xlabel,ylabel='RMSD %s' %(method,), ## averages=True, regression=True, ) ## ## plot rmsd as a function of each property (contour) ## for d,prefix in [ [d_spacegroup,'spacegroup',], [d_starting_model,'startingmodel',], ]: d_tics = {} l_tics = d.keys() l_tics.sort() for i in range(len(l_tics)): d_tics[l_tics[i]] = i+.5 z1 = 9 z2 = 0 l_data = [] for x in range(len(l_tics)): k1 = l_tics[x] for y in range(len(l_tics)): k2 = l_tics[y] if not k2 in d[k1].keys(): average = 9 else: l_rmsds = d[k1][k2] average = sum(l_rmsds)/len(l_rmsds) if average < z1: z1 = average if average > z2: z2 = average l_data += ['%s %s %s\n' %(x,y,average,)] l_data += ['%s %s %s\n' %(x,y+1,1,)] l_data += ['\n'] for y in range(len(l_tics)): l_data += ['%s %s %s\n' %(x+1,y,1,)] l_data += ['%s %s %s\n' %(x+1,y+1,1,)] l_data += ['\n'] gnuplot.contour_plot( prefix,l_data, title='%s %s' %(prefix,method,),zlabel='RMSD %s' %(method), d_xtics = d_tics, d_ytics = d_tics, palette = '0 1 0 0, 0.9999 0 0 1, 0.9999 1 1 1, 1 1 1 1', z1 = z1, z2 = z2+0.1, bool_remove = False, ) os.system('convert %s.ps %s_spacegroup%s_mutations%s_atoms%s.png' %(prefix,prefix,spacegroup.replace(' ',''),n_mutations_max,method,)) ## os.remove('%s.ps' %(prefix,)) print d_spacegroup print d_starting_model print r1 print r2 print r3 return