def plot():

    fd = open('Biso_v_resolution.gnuplotdata', 'r')
    lines = fd.readlines()
    fd.close()
    d_splot = {}
    l_resolution = []
    l_Biso = []
    for line in lines:
        l = line.split()
        resolution = float(l[1])
        Biso = float(l[0])
        resolution_rounded = round(resolution, 1)
        ##        resolution_rounded = 0.1*round(resolution/0.1,0)
        Biso_rounded = round(Biso, 0)
        if not resolution_rounded in d_splot.keys():
            d_splot[resolution_rounded] = {}
        if not Biso_rounded in d_splot[resolution_rounded].keys():
            d_splot[resolution_rounded][Biso_rounded] = 0
        d_splot[resolution_rounded][Biso_rounded] += 1
        l_resolution += [resolution]
        l_Biso += [Biso]
    a, b, r, p = statistics.do_regression(
        l_resolution,
        l_Biso,
        verbose=False,
    )
    print 'correlation = r =', r
    tcrit = 1.960  ## 95% confidence 2 tail (student table...)
    ##    tcrit = 76.
    ##    f,g,h = statistics.do_confidence_bands(l_resolution,l_Biso,tcrit,)

    lines = []
    ##    for resolution in range(5,35+1):
    for resolution in range(5, 35 + 1):
        resolution /= 10.
        for Biso in range(0, 100 + 1):
            Biso /= 1.
            if not resolution in d_splot.keys():
                count = 0
            elif not Biso in d_splot[resolution].keys():
                count = 0
            else:
                count = d_splot[resolution][Biso]
            lines += ['%s %s %s\n' % (
                resolution,
                Biso,
                count,
            )]
        lines += ['\n']

    gnuplot.contour_plot(
        'validation_Biso_v_resolution',
        lines,
        xlabel='resolution (Angstrom)',
        ylabel='<Biso>',
        bool_remove=False,
    )

    return
def plot():

    fd = open('Biso_v_resolution.gnuplotdata','r')
    lines = fd.readlines()
    fd.close()
    d_splot = {}
    l_resolution = []
    l_Biso = []
    for line in lines:
        l = line.split()
        resolution = float(l[1])
        Biso = float(l[0])
        resolution_rounded = round(resolution,1)
##        resolution_rounded = 0.1*round(resolution/0.1,0)
        Biso_rounded = round(Biso,0)
        if not resolution_rounded in d_splot.keys():
            d_splot[resolution_rounded] = {}
        if not Biso_rounded in d_splot[resolution_rounded].keys():
            d_splot[resolution_rounded][Biso_rounded] = 0
        d_splot[resolution_rounded][Biso_rounded] += 1
        l_resolution += [resolution]
        l_Biso += [Biso]
    a,b,r,p = statistics.do_regression(l_resolution,l_Biso,verbose=False,)
    print 'correlation = r =', r
    tcrit = 1.960 ## 95% confidence 2 tail (student table...)
##    tcrit = 76.
##    f,g,h = statistics.do_confidence_bands(l_resolution,l_Biso,tcrit,)

    lines = []
##    for resolution in range(5,35+1):
    for resolution in range(5,35+1):
        resolution /= 10.
        for Biso in range(0,100+1):
            Biso /= 1.
            if not resolution in d_splot.keys():
                count = 0
            elif not Biso in d_splot[resolution].keys():
                count = 0
            else:
                count = d_splot[resolution][Biso]
            lines += ['%s %s %s\n' %(resolution,Biso,count,)]
        lines += ['\n']

    gnuplot.contour_plot(
        'validation_Biso_v_resolution',lines,
        xlabel='resolution (Angstrom)',ylabel='<Biso>',
        bool_remove = False,
        )

    return
示例#3
0
文件: ENCODE.py 项目: grbot/tc9
def plot_contour(dn,d_lengths):

    l_keys = list(d_lengths.keys())
##    for k in l_keys:
##        if k > 200:
##            del d_lengths[k]
    gnuplot.contour_plot(
        d_dat=d_lengths,fileprefix='INDEL_%s' %(dn),bool_remove=False,
        xlabel='dist_m_i_n (1000bp)',ylabel='INDEL length',
        bool_log=True,
        x2=50,
        y2=50,
        )

    return
def plot_dihedrals(d_count,):

    phipsi_step = 5
    d_titles = {
        'sheet':'{/Symbol b} sheet',
        'helix_alpha':'{/Symbol a} helix',
        'helix_pi':'{/Symbol p} helix',
        'helix_310':'3_1_0 helix',
        'prePRO_notGLY':'prePro (excl. Gly)',
        'prePRO_GLY':'Gly (prePro)',
        'all_notgly_notpro_notprepro':'all (excl. Gly, Pro and pre-Pro)',
        'turns_notgly_notpro_notprepro':'Turn (excl. Gly, Pro and pre-Pro)',
        }

    for k in d_count.keys():
        print 'plot', k
        l = []
        for phi in range(-180,180,phipsi_step,):
            for psi in range(-180,180,phipsi_step,):
                l += ['%s %s %s\n' %(phi,psi,d_count[k][phi][psi])]
            l += ['\n']

        if k in d_titles.keys():
            title = d_titles[k]
        else:
            if k in ['ALA','CYS','ASP','GLU','PHE','HIS','ILE','LYS','LEU','MET','ASN','GLN','ARG','SER','THR','VAL','TRP','TYR',]:
##            if k not in [
##                'Turn','prePRO_notGLY','prePRO_GLY',
##                'GLY', ## not prePro
##                'PRO', ## not prePro
##                'cisPro','transPro',
##                ]:
                continue
            title = k
        
        gnuplot.contour_plot(
            k, l,
            title=title, xlabel='{/Symbol f}', ylabel='{/Symbol y}',
            x1 = -180, x2 = 180,
            y1 = -180, y2 = 180,
            z1 = 0,
            bool_remove = False,
            )

    return
示例#5
0
def plot_contour(dn, d_lengths):

    l_keys = list(d_lengths.keys())
    ##    for k in l_keys:
    ##        if k > 200:
    ##            del d_lengths[k]
    gnuplot.contour_plot(
        d_dat=d_lengths,
        fileprefix='INDEL_%s' % (dn),
        bool_remove=False,
        xlabel='dist_m_i_n (1000bp)',
        ylabel='INDEL length',
        bool_log=True,
        x2=50,
        y2=50,
    )

    return
def plot(d_mmCIF_main,d_rmsds,):

    l_pdbs = d_rmsds.keys()
    l_pdbs.sort()

    l_temperature = []
    l_ph = []
    l_resolution = []
    d_spacegroup = {}
    d_starting_model = {}

    l_correl_T = [[],[],]
    l_correl_pH = [[],[],]
    l_correl_resol_max = [[],[],]

    d_histo_pH = {}
    d_histo_T = {}
    d_histo_resol = {}

    for i1 in range(len(l_pdbs)-1):
        pdb1 = l_pdbs[i1]
        spacegroup1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_symmetry.space_group_name_H-M',)
        T1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_diffrn.ambient_temp',)
        pH1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_exptl_crystal_grow.pH',)
        starting_model1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_refine.pdbx_starting_model',)
        resolution1 = core.parse_mmCIF_item(d_mmCIF_main[pdb1[:4]],'_refine.ls_d_res_high',)

        for i2 in range(i1+1,len(l_pdbs)):
            pdb2 = l_pdbs[i2]
            spacegroup2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_symmetry.space_group_name_H-M',)
            T2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_diffrn.ambient_temp',)
            pH2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_exptl_crystal_grow.pH',)
            starting_model2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_refine.pdbx_starting_model',)
            resolution2 = core.parse_mmCIF_item(d_mmCIF_main[pdb2[:4]],'_refine.ls_d_res_high',)

            rmsd = d_rmsds[pdb1][pdb2]
            if rmsd > 1:
                print pdb1, pdb2, rmsd

            if T1 and T2:
                T_diff = abs(float(T2)-float(T1))
                l_temperature += ['%s %s\n' %(T_diff,rmsd),]
                l_correl_T[0] += [T_diff]
                l_correl_T[1] += [rmsd]

                print T_diff, 10*round(T_diff/10.,0)
                if not 10*round(T_diff/10.,0) in d_histo_T.keys():
                    d_histo_T[10*round(T_diff/10.,0)] = 0
                d_histo_T[10*round(T_diff/10.,0)] += 1

            if pH1 and pH2:
                pH_diff = abs(float(pH2)-float(pH1))
                l_ph += ['%s %s\n' %(pH_diff,rmsd),]
                l_correl_pH[0] += [pH_diff]
                l_correl_pH[1] += [rmsd]

                if not pH_diff in d_histo_pH.keys():
                    d_histo_pH[pH_diff] = 0
                d_histo_pH[pH_diff] += 1

            resolution_max = max(resolution1,resolution2,)
            l_resolution += ['%s %s\n' %(resolution_max,rmsd),]
            if resolution_max != 'N/A':
                l_correl_resol_max[0] += [float(resolution_max)]
                l_correl_resol_max[1] += [rmsd]

                if not round(float(resolution_max),0) in d_histo_resol.keys():
                    d_histo_resol[round(float(resolution_max),0)] = 0
                d_histo_resol[round(float(resolution_max),0)] += 1

            d_spacegroup = append_to_dictionary(d_spacegroup,spacegroup1,spacegroup2,rmsd,)
            d_starting_model = append_to_dictionary(d_starting_model,starting_model1,starting_model2,rmsd,)

    r1 = statistics.correlation(l_correl_T[0],l_correl_T[1],)
    r2 = statistics.correlation(l_correl_pH[0],l_correl_pH[1],)
    r3 = statistics.correlation(l_correl_resol_max[0],l_correl_resol_max[1],)

    ##
    ## plot histograms
    ##
    for prefix,d in [
        ['deltapH',d_histo_pH,],
        ['deltaT',d_histo_T,],
        ['maxresolution',d_histo_resol,],
        ]:
        
        l = []
        l_diffs = d.keys()
        l_diffs.sort()
        for diff in l_diffs:
            l += ['%s %s\n' %(diff,d[diff],)]
        fd = open('histo_%s.txt' %(prefix),'w')
        fd.writelines(l)
        fd.close()

        l = [
            'set terminal postscript eps enhanced color "Helvetica"\n',
            'set output "gnuplot.ps"\n',
            'set size 3,3\n',
            'set style data histogram\n',
            'set xtics rotate\n',
            'set xlabel "%s\n' %(prefix),
            'set ylabel "count\n',
            'plot "histo_%s.txt" u 2:xtic(1) t ""\n' %(prefix)
            ]
        fd = open('tmp.txt','w')
        fd.writelines(l)
        fd.close()

        os.system('gnuplot tmp.txt')
        os.system('convert gnuplot.ps histo_%s.png' %(prefix))

    ##
    ## plot rmsd as a function of each property (2d)
    ##
    for prefix,data,xlabel in [
        ['pH',l_ph,'pH diff',],
        ['Temperature',l_temperature,'T diff',],
        ['resolution',l_resolution,'maximum resolution',],
        ]:
        prefix += method
        fd = open('%s.gnuplotdata' %(prefix),'w')
        fd.writelines(data)
        fd.close()
        gnuplot.scatter_plot_2d(
            prefix,xlabel=xlabel,ylabel='RMSD %s' %(method,),
##            averages=True,
            regression=True,
            )

    ##
    ## plot rmsd as a function of each property (contour)
    ##
    for d,prefix in [
        [d_spacegroup,'spacegroup',],
        [d_starting_model,'startingmodel',],
        ]:

        d_tics = {}
        l_tics = d.keys()
        l_tics.sort()
        for i in range(len(l_tics)):
            d_tics[l_tics[i]] = i+.5
        z1 = 9
        z2 = 0

        l_data = []
        for x in range(len(l_tics)):
            k1 = l_tics[x]
            for y in range(len(l_tics)):
                k2 = l_tics[y]
                if not k2 in d[k1].keys():
                    average = 9
                else:
                    l_rmsds = d[k1][k2]
                    average = sum(l_rmsds)/len(l_rmsds)
                    if average < z1:
                        z1 = average
                    if average > z2:
                        z2 = average
                l_data += ['%s %s %s\n' %(x,y,average,)]
            l_data += ['%s %s %s\n' %(x,y+1,1,)]
            l_data += ['\n']
        for y in range(len(l_tics)):
            l_data += ['%s %s %s\n' %(x+1,y,1,)]
        l_data += ['%s %s %s\n' %(x+1,y+1,1,)]
        l_data += ['\n']
        gnuplot.contour_plot(
            prefix,l_data,
            title='%s %s' %(prefix,method,),zlabel='RMSD %s' %(method),
            d_xtics = d_tics, d_ytics = d_tics,
            palette = '0 1 0 0, 0.9999 0 0 1, 0.9999 1 1 1, 1 1 1 1',
            z1 = z1, z2 = z2+0.1,
            bool_remove = False,
            )
        os.system('convert %s.ps %s_spacegroup%s_mutations%s_atoms%s.png' %(prefix,prefix,spacegroup.replace(' ',''),n_mutations_max,method,))
##        os.remove('%s.ps' %(prefix,))

    print d_spacegroup
    print d_starting_model

    print r1
    print r2
    print r3

    return
示例#7
0
def count_and_plot():

    chromosome = '1'
    import time
    t1 = time.time()
    for l_fp_in in [
        ['out_GATK/join/CombineVariants.vcf'],
        ['out_GATK/join/ApplyRecalibration.recalibrated.filtered.vcf'],
        ['SelectVariants_discordance1.vcf'],
        ['SelectVariants_discordance2.vcf'],
        ['%s.vqsr.filt.vcf' %(chromosome) for chromosome in range(1,23)+['X','Y',]],
        ['SelectVariants_concordance.vcf'],
        ]:

        import time
        t1 = time.time()

        ##
        ## prepare scatter lists
        ##
        l_gnuplot_MAF = []
        l_gnuplot_DP = []
        l_gnuplot_CR = []

        ##
        ## prepare contour dic
        ##
        d_contour = {}
        for AF in xrange(100+1):
            d_contour[AF*0.01] = {}
            for DP in xrange(150+1):
                d_contour[AF*0.01][DP*10.] = 0
        d_contour_CR = {}
        for AF in xrange(100+1):
            d_contour_CR[round(AF*0.01,2)] = {}
            for CR in xrange(100+1):
                d_contour_CR[round(AF*0.01,2)][CR] = 0

        for fp_in in l_fp_in:
            print fp_in
            fd = open(fp_in,'r')

            print fp_in
            for line in fd:
                if line[0] == '#':
                    continue
##                if line.count('./.')+line.count('0/0')+line.count('0/1')+line.count('1/1') != 100:
##                    print line
##                    stop
                CHROM, d_INFO, bool_continue = parse_line(line)
                if bool_continue == True:
                    continue

                CR = 100-line.count('./.')

                DP = int(d_INFO['DP'])
                try:
                    AF = float(d_INFO['AF'])
                except:
                    d_INFO['AF']
                    AF = 'N/A'

                if AF < 0.5:
                    MAF = AF
                else:
                    MAF = 1-AF

                ##
                ## append to list
                ##
                l_gnuplot_DP += [DP]
                l_gnuplot_CR += [CR]
                if fp_in != 'mp15_vqsr.vcf':
                    if AF == 'N/A':
                        stop
                    l_gnuplot_MAF += [MAF]
                    if DP < 1500:
                        d_contour[0.01*round(MAF/0.01,0)][10.*round(DP/10.,0)] += 1

                ##
                ## append to dic
                ##
                d_contour_CR[round(MAF,2)][CR] += 1

                if chromosome != CHROM:
                    t2 = time.time()
                    print fp_in, '%-2s' %(chromosome), '%2is' %(int(t2-t1))
                    chromosome = CHROM
                    t1 = t2

##            if CHROM == '2':
##                break
##            if POS[-1] == '0' and POS[-2] == '0' and POS[-3] == '0' and POS[-4] == '0':
##                print '%2s %9s %6s %4s' %(CHROM, POS, AF, DP,), fp_in
##                break
##            if POS[-1] == '0' and POS[-2] == '0' and POS[-3] == '0' and POS[-4] == '0':
##                print '%2s %9s %6s %4s' %(CHROM, POS, AF, DP,), fp_in

        title = fp_in.replace('_','').replace('out_GATK/','').replace('.vcf','')
        suffix = fp_in.replace('out_GATK','').replace('/','').replace('.vcf','')
        
        gnuplot.histogram2(
            'DP_%s' %(suffix),
            l_data=l_gnuplot_DP,
            x_step=10,x_min=0,x_max=1000,tic_step=100,
            xlabel='DP from VCF',
            ylabel='SNP count',
            title= title,
            )
        gnuplot.histogram2(
            'CR_%s' %(suffix),
            l_data=l_gnuplot_CR,
            x_step=1,x_min=0,x_max=100,tic_step=10,
            xlabel='SNP Call Rate',
            ylabel='SNP count',
            title= title,
            )
        if fp_in != 'mp15_vqsr.vcf':
            gnuplot.histogram2(
                'AF_%s' %(suffix),
                l_data=l_gnuplot_MAF,
                x_min=0,x_max=.5,tic_step=0.05,x_step=0.01,
                xlabel='AF from VCF',
                ylabel='SNP count',
                title = title,
                )

            lines = []
            for AF in xrange(50+1):
                for DP in xrange(150+1):
                    lines += ['%s %s %s\n' %(AF*0.01,DP*10.,d_contour[AF*0.01][DP*10.],)]
                lines += ['\n']
            gnuplot.contour_plot(
                'AFvDP_%s' %(suffix),
                lines,
                title = title,
                xlabel = 'AF from VCF',
                ylabel = 'DP from VCF',
                zlabel = 'count',
                )

            lines = []
            for AF in xrange(50+1):
                for CR in xrange(100+1):
                    lines += ['%s %s %s\n' %(AF*0.01,CR,d_contour_CR[round(AF*0.01,2)][CR],)]
                lines += ['\n']
            gnuplot.contour_plot(
                'AFvCR_%s' %(suffix),
                lines,
                title = title,
                xlabel = 'AF from VCF',
                ylabel = 'Call Rate from VCF',
                zlabel = 'count',
                )

##        t2 = time.time()
##        print t2-t1
##        stop

    return
示例#8
0
def plot_dihedrals(d_count, ):

    phipsi_step = 5
    d_titles = {
        'sheet': '{/Symbol b} sheet',
        'helix_alpha': '{/Symbol a} helix',
        'helix_pi': '{/Symbol p} helix',
        'helix_310': '3_1_0 helix',
        'prePRO_notGLY': 'prePro (excl. Gly)',
        'prePRO_GLY': 'Gly (prePro)',
        'all_notgly_notpro_notprepro': 'all (excl. Gly, Pro and pre-Pro)',
        'turns_notgly_notpro_notprepro': 'Turn (excl. Gly, Pro and pre-Pro)',
    }

    for k in d_count.keys():
        print 'plot', k
        l = []
        for phi in range(
                -180,
                180,
                phipsi_step,
        ):
            for psi in range(
                    -180,
                    180,
                    phipsi_step,
            ):
                l += ['%s %s %s\n' % (phi, psi, d_count[k][phi][psi])]
            l += ['\n']

        if k in d_titles.keys():
            title = d_titles[k]
        else:
            if k in [
                    'ALA',
                    'CYS',
                    'ASP',
                    'GLU',
                    'PHE',
                    'HIS',
                    'ILE',
                    'LYS',
                    'LEU',
                    'MET',
                    'ASN',
                    'GLN',
                    'ARG',
                    'SER',
                    'THR',
                    'VAL',
                    'TRP',
                    'TYR',
            ]:
                ##            if k not in [
                ##                'Turn','prePRO_notGLY','prePRO_GLY',
                ##                'GLY', ## not prePro
                ##                'PRO', ## not prePro
                ##                'cisPro','transPro',
                ##                ]:
                continue
            title = k

        gnuplot.contour_plot(
            k,
            l,
            title=title,
            xlabel='{/Symbol f}',
            ylabel='{/Symbol y}',
            x1=-180,
            x2=180,
            y1=-180,
            y2=180,
            z1=0,
            bool_remove=False,
        )

    return
def main():

    import sys
    sys.path.append('/home/people/tc/svn/tc_sandbox/misc/')
    import gnuplot

    min_count = 2
    phipsi_range = 1
    phipsi_step = 5

    d_residues = {
        'ALA':'A','CYS':'C','ASP':'D','GLU':'E','PHE':'F',
        'GLY':'G','HIS':'H','ILE':'I','LYS':'K','LEU':'L',
        'MET':'M','ASN':'N','PRO':'P','GLN':'Q','ARG':'R',
        'SER':'S','THR':'T','VAL':'V','TRP':'W','TYR':'Y',
        }

    d_ramachandran = parse_dihedrals(d_residues, plot = True)

    print 'preparing dictionary'
    for res1 in d_residues.values():
        for res2 in d_residues.values():
            if res1 == res2:
                continue
            d_ramachandran[res1+res2] = {}
            for phi in range(-180,180,phipsi_step,):
                d_ramachandran[res1+res2][phi] = {}
                for psi in range(-180,180,phipsi_step,):
                    d_ramachandran[res1+res2][phi][psi] = 0
    print 'prepared dictionary'

    fd = open('phipsi.txt','r')
    lines = fd.readlines()
    fd.close()

    print 'looping over', len(lines), 'lines'

    for line in lines:

        if len(line.split()) == 12:
            if (
                'N/A' in [
                    line.split()[8],
                    line.split()[9],
                    line.split()[10],
                    line.split()[11],
                    ]
                ):
                continue
            res1 = line.split()[6]
            res2 = line.split()[7]
            phi1 = float(line.split()[8])
            psi1 = float(line.split()[9])
            phi2 = float(line.split()[10])
            psi2 = float(line.split()[11])
        else:
            print line
            stop

        if res1 == 'X' or res2 == 'X':
            print line
            continue

##        count = sum_ramachandran(d_ramachandran,res1,phi1,psi1,phipsi_range,)
##        if count < count_min:
##            print line
##            print count
##            if res1 != 'G':
##                stop1
##
##        count = sum_ramachandran(d_ramachandran,res2,phi2,psi2,phipsi_range,)
##        if count < count_min:
##            print line
##            print count
##            if res2 != 'G':
##                stop2

        phi1 = round_angle(phi1,phipsi_step)
        psi1 = round_angle(psi1,phipsi_step)
        phi2 = round_angle(phi2,phipsi_step)
        psi2 = round_angle(psi2,phipsi_step)
        d_ramachandran[res2+res1][phi1][psi1] += 1
        d_ramachandran[res1+res2][phi2][psi2] += 1


    ##
    ## ramachandran plots
    ##
    for key in d_ramachandran.keys():
        if len(key) == 1:
            continue
        import os
        if os.path.isfile('plot_phipsi_%s.png' %(key)):
            os.remove('plot_phipsi_%s.png' %(key))
        print key
        l_gnuplot = []
        max_count = 0
        sum_count = 0
        for phi in range(-180,180,phipsi_step,):
            for psi in range(-180,180,phipsi_step,):
                 l_gnuplot += ['%s %s %s\n' %(phi,psi,d_ramachandran[key][phi][psi])]
                 if d_ramachandran[key][phi][psi] > max_count:
                     max_count = d_ramachandran[key][phi][psi]
                     sum_count += d_ramachandran[key][phi][psi]
            l_gnuplot += ['\n']
        if max_count > 1 and sum_count > 3:
            gnuplot.contour_plot(
                'plot_phipsi_%s' %(key), l_gnuplot,
                title='%s' %(key), xtitle='{/Symbol f}', ytitle='{/Symbol y}',
                x1=-180, x2=180, y1=-180, y2=180,
                z1=0,
                )


    return
示例#10
0
def parse_dihedrals(d_residues, plot = False):

    import math,sys
    sys.path.append('/home/people/tc/svn/tc_sandbox/misc/')
    import gnuplot

    d_ramachandran = {}
    for res in d_residues.values():
        d_ramachandran[res] = {}
        for phi in range(-180,180):
            d_ramachandran[res][phi] = {}
            for psi in range(-180,180):
                d_ramachandran[res][phi][psi] = 0

    for res in d_residues.values():
        print 'read', res
        ##
        ## read lines
        ##
        fd = open('phipsi_all_%s.txt' %(res),'r')
        lines = fd.readlines()
        fd.close()
        ##
        ## parse lines
        ##
        for line in lines:
            if 'N/A' in [line.split()[0],line.split()[1]]:
                continue
            phi = float(line.split()[0])
            psi = float(line.split()[1])
            if phi == 180.:
                phi = -180.
            if psi == 180.:
                psi = -180.
            d_ramachandran[res][int(phi)][int(psi)] += 1

    ##
    ## ramachandran plots
    ##
    if plot == True:
        for res in d_ramachandran.keys():
            print 'plot', res
            l_gnuplot_res = []
            for phi in range(-180,180):
                for psi in range(-180,180):
                    l_gnuplot_res += ['%s %s %s\n' %(phi,psi,d_ramachandran[res][phi][psi])]
                l_gnuplot_res += ['\n']
            gnuplot.contour_plot('plot_phipsi_%s' %(res), l_gnuplot_res, title='phipsi_%s' %(res), xtitle='{/Symbol f}', ytitle='{/Symbol y}')
        l_gnuplot = []
        for phi in range(-180,180):
            for psi in range(-180,180):
                for res in d_ramachandran.keys():
                    l_gnuplot += ['%s %s %s\n' %(phi,psi,d_ramachandran[res][phi][psi])]
            l_gnuplot += ['\n']
        gnuplot.contour_plot('plot_phipsi', l_gnuplot, title='phipsi', xtitle='{/Symbol f}', ytitle='{/Symbol y}')



##    d_dihedral = {
##        'A':{'phi':[],'psi':[]},
##        'C':{'phi':[],'psi':[]},
##        'D':{'phi':[],'psi':[]},
##        'E':{'phi':[],'psi':[]},
##        'F':{'phi':[],'psi':[]},
##        'G':{'phi':[],'psi':[]},
##        'H':{'phi':[],'psi':[]},
##        'I':{'phi':[],'psi':[]},
##        'K':{'phi':[],'psi':[]},
##        'L':{'phi':[],'psi':[]},
##        'M':{'phi':[],'psi':[]},
##        'N':{'phi':[],'psi':[]},
##        'P':{'phi':[],'psi':[]},
##        'Q':{'phi':[],'psi':[]},
##        'R':{'phi':[],'psi':[]},
##        'S':{'phi':[],'psi':[]},
##        'T':{'phi':[],'psi':[]},
##        'V':{'phi':[],'psi':[]},
##        'W':{'phi':[],'psi':[]},
##        'Y':{'phi':[],'psi':[]},
##        }
##
##    for line in lines:
##        residue = d_residues[line.split()[0]]
##        if 'N/A' in [line.split()[1],line.split()[2]]:
##            continue
##        phi = float(line.split()[1])
##        psi = float(line.split()[2])
##        d_dihedral[residue]['phi'] += [phi]
##        d_dihedral[residue]['psi'] += [psi]
##
##    for residue in d_dihedral.keys():
##        for dihedral in ['phi','psi',]:
##            sumx = 0
##            sumxx = 0
##            n = len(d_dihedral[residue][dihedral])
##            for x in d_dihedral[residue][dihedral]:
##                sumx += x
##                sumxx += x**2
##            mean = sumx/n
##            stddev = math.sqrt(n*sumxx-(sumx**2))/n
##            d_dihedral[residue][dihedral] = {'mean':mean,'stddev':stddev}
##    print d_dihedral

    return d_ramachandran
示例#11
0
def main():

    import sys
    sys.path.append('/home/people/tc/svn/tc_sandbox/misc/')
    import gnuplot

    min_count = 2
    phipsi_range = 1
    phipsi_step = 5

    d_residues = {
        'ALA': 'A',
        'CYS': 'C',
        'ASP': 'D',
        'GLU': 'E',
        'PHE': 'F',
        'GLY': 'G',
        'HIS': 'H',
        'ILE': 'I',
        'LYS': 'K',
        'LEU': 'L',
        'MET': 'M',
        'ASN': 'N',
        'PRO': 'P',
        'GLN': 'Q',
        'ARG': 'R',
        'SER': 'S',
        'THR': 'T',
        'VAL': 'V',
        'TRP': 'W',
        'TYR': 'Y',
    }

    d_ramachandran = parse_dihedrals(d_residues, plot=True)

    print 'preparing dictionary'
    for res1 in d_residues.values():
        for res2 in d_residues.values():
            if res1 == res2:
                continue
            d_ramachandran[res1 + res2] = {}
            for phi in range(
                    -180,
                    180,
                    phipsi_step,
            ):
                d_ramachandran[res1 + res2][phi] = {}
                for psi in range(
                        -180,
                        180,
                        phipsi_step,
                ):
                    d_ramachandran[res1 + res2][phi][psi] = 0
    print 'prepared dictionary'

    fd = open('phipsi.txt', 'r')
    lines = fd.readlines()
    fd.close()

    print 'looping over', len(lines), 'lines'

    for line in lines:

        if len(line.split()) == 12:
            if ('N/A' in [
                    line.split()[8],
                    line.split()[9],
                    line.split()[10],
                    line.split()[11],
            ]):
                continue
            res1 = line.split()[6]
            res2 = line.split()[7]
            phi1 = float(line.split()[8])
            psi1 = float(line.split()[9])
            phi2 = float(line.split()[10])
            psi2 = float(line.split()[11])
        else:
            print line
            stop

        if res1 == 'X' or res2 == 'X':
            print line
            continue


##        count = sum_ramachandran(d_ramachandran,res1,phi1,psi1,phipsi_range,)
##        if count < count_min:
##            print line
##            print count
##            if res1 != 'G':
##                stop1
##
##        count = sum_ramachandran(d_ramachandran,res2,phi2,psi2,phipsi_range,)
##        if count < count_min:
##            print line
##            print count
##            if res2 != 'G':
##                stop2

        phi1 = round_angle(phi1, phipsi_step)
        psi1 = round_angle(psi1, phipsi_step)
        phi2 = round_angle(phi2, phipsi_step)
        psi2 = round_angle(psi2, phipsi_step)
        d_ramachandran[res2 + res1][phi1][psi1] += 1
        d_ramachandran[res1 + res2][phi2][psi2] += 1

    ##
    ## ramachandran plots
    ##
    for key in d_ramachandran.keys():
        if len(key) == 1:
            continue
        import os
        if os.path.isfile('plot_phipsi_%s.png' % (key)):
            os.remove('plot_phipsi_%s.png' % (key))
        print key
        l_gnuplot = []
        max_count = 0
        sum_count = 0
        for phi in range(
                -180,
                180,
                phipsi_step,
        ):
            for psi in range(
                    -180,
                    180,
                    phipsi_step,
            ):
                l_gnuplot += [
                    '%s %s %s\n' % (phi, psi, d_ramachandran[key][phi][psi])
                ]
                if d_ramachandran[key][phi][psi] > max_count:
                    max_count = d_ramachandran[key][phi][psi]
                    sum_count += d_ramachandran[key][phi][psi]
            l_gnuplot += ['\n']
        if max_count > 1 and sum_count > 3:
            gnuplot.contour_plot(
                'plot_phipsi_%s' % (key),
                l_gnuplot,
                title='%s' % (key),
                xtitle='{/Symbol f}',
                ytitle='{/Symbol y}',
                x1=-180,
                x2=180,
                y1=-180,
                y2=180,
                z1=0,
            )

    return
示例#12
0
def parse_dihedrals(d_residues, plot=False):

    import math, sys
    sys.path.append('/home/people/tc/svn/tc_sandbox/misc/')
    import gnuplot

    d_ramachandran = {}
    for res in d_residues.values():
        d_ramachandran[res] = {}
        for phi in range(-180, 180):
            d_ramachandran[res][phi] = {}
            for psi in range(-180, 180):
                d_ramachandran[res][phi][psi] = 0

    for res in d_residues.values():
        print 'read', res
        ##
        ## read lines
        ##
        fd = open('phipsi_all_%s.txt' % (res), 'r')
        lines = fd.readlines()
        fd.close()
        ##
        ## parse lines
        ##
        for line in lines:
            if 'N/A' in [line.split()[0], line.split()[1]]:
                continue
            phi = float(line.split()[0])
            psi = float(line.split()[1])
            if phi == 180.:
                phi = -180.
            if psi == 180.:
                psi = -180.
            d_ramachandran[res][int(phi)][int(psi)] += 1

    ##
    ## ramachandran plots
    ##
    if plot == True:
        for res in d_ramachandran.keys():
            print 'plot', res
            l_gnuplot_res = []
            for phi in range(-180, 180):
                for psi in range(-180, 180):
                    l_gnuplot_res += [
                        '%s %s %s\n' %
                        (phi, psi, d_ramachandran[res][phi][psi])
                    ]
                l_gnuplot_res += ['\n']
            gnuplot.contour_plot('plot_phipsi_%s' % (res),
                                 l_gnuplot_res,
                                 title='phipsi_%s' % (res),
                                 xtitle='{/Symbol f}',
                                 ytitle='{/Symbol y}')
        l_gnuplot = []
        for phi in range(-180, 180):
            for psi in range(-180, 180):
                for res in d_ramachandran.keys():
                    l_gnuplot += [
                        '%s %s %s\n' %
                        (phi, psi, d_ramachandran[res][phi][psi])
                    ]
            l_gnuplot += ['\n']
        gnuplot.contour_plot('plot_phipsi',
                             l_gnuplot,
                             title='phipsi',
                             xtitle='{/Symbol f}',
                             ytitle='{/Symbol y}')


##    d_dihedral = {
##        'A':{'phi':[],'psi':[]},
##        'C':{'phi':[],'psi':[]},
##        'D':{'phi':[],'psi':[]},
##        'E':{'phi':[],'psi':[]},
##        'F':{'phi':[],'psi':[]},
##        'G':{'phi':[],'psi':[]},
##        'H':{'phi':[],'psi':[]},
##        'I':{'phi':[],'psi':[]},
##        'K':{'phi':[],'psi':[]},
##        'L':{'phi':[],'psi':[]},
##        'M':{'phi':[],'psi':[]},
##        'N':{'phi':[],'psi':[]},
##        'P':{'phi':[],'psi':[]},
##        'Q':{'phi':[],'psi':[]},
##        'R':{'phi':[],'psi':[]},
##        'S':{'phi':[],'psi':[]},
##        'T':{'phi':[],'psi':[]},
##        'V':{'phi':[],'psi':[]},
##        'W':{'phi':[],'psi':[]},
##        'Y':{'phi':[],'psi':[]},
##        }
##
##    for line in lines:
##        residue = d_residues[line.split()[0]]
##        if 'N/A' in [line.split()[1],line.split()[2]]:
##            continue
##        phi = float(line.split()[1])
##        psi = float(line.split()[2])
##        d_dihedral[residue]['phi'] += [phi]
##        d_dihedral[residue]['psi'] += [psi]
##
##    for residue in d_dihedral.keys():
##        for dihedral in ['phi','psi',]:
##            sumx = 0
##            sumxx = 0
##            n = len(d_dihedral[residue][dihedral])
##            for x in d_dihedral[residue][dihedral]:
##                sumx += x
##                sumxx += x**2
##            mean = sumx/n
##            stddev = math.sqrt(n*sumxx-(sumx**2))/n
##            d_dihedral[residue][dihedral] = {'mean':mean,'stddev':stddev}
##    print d_dihedral

    return d_ramachandran