Пример #1
0
def refined_vs_notrefined(models_dir, dope_profile):
    """Creates a comparison energy plot between the model generated by the program and the refined one."""
    env = environ()
    env.io.atom_files_directory = [models_dir]

    mdl_list = []
    aln = modeller.alignment(env)
    code_list = []

    mdl_nr_list = []
    aln_nr = modeller.alignment(env)
    code_list_nr = []

    for file in os.listdir(models_dir):
        name = file
        if name.endswith('.B'):
            mdl = modeller.model(env)
            mdl.read(file = file)
            code = str(file)
            code_list.append(code)
            s = selection(mdl)
            s.assess_dope(output='ENERGY_PROFILE NO_REPORT', file= models_dir + code + '.profile',
                  normalize_profile=True, smoothing_window=15)
            mdl_list.append(mdl)
            aln.append_model(mdl, align_codes = code, atom_files = code)
            aln.write(file=dope_profile+'build_profile_ref.ali', alignment_format='PIR')
        else:
            mdl_nr = modeller.model(env)
            mdl_nr.read(file = file)
            code = str(file)
            print (code)
            code_list_nr.append(code)
            t = selection(mdl_nr)
            t.assess_dope(output='ENERGY_PROFILE NO_REPORT', file= models_dir + code + '.profile',
                  normalize_profile=True, smoothing_window=15)
            mdl_nr_list.append(mdl_nr)
            aln_nr.append_model(mdl_nr, align_codes = code, atom_files = code)
            aln_nr.write(file=dope_profile+'build_profile_notref.ali', alignment_format='PIR')

    if len(mdl_nr_list) == len(mdl_list):
        for a, b, c, d in zip(mdl_nr_list, mdl_list, code_list_nr, code_list):
            model1 = get_profile(models_dir + c + ".profile", aln_nr[str(c)])
            model2 = get_profile(models_dir + d + ".profile", aln[str(d)])
            pylab.figure(1, figsize=(30,18))
            pylab.xlabel('Alignment position', fontsize = 20)
            pylab.ylabel('DOPE per-residue score', fontsize = 20)
            pylab.plot(model1, color='red', linewidth=2, label='Model')
            pylab.plot(model2, color='green', linewidth=2, label='Optimized model')
            pylab.legend(fontsize = 20)
            pylab.savefig(dope_profile + c + '.dope_profile.jpg', dpi=100)
            pylab.close()

    return ("Comparison energy plot between refined and not refined model has been created here:\n%s\n" % (dope_profile))
Пример #2
0
def main(args):
    mod.log.verbose()
    env = mod.environ()
    env.io.atom_files_directory = [".", args.dir, "../" + args.dir]
    aln = mod.alignment(env)

    mdl = mod.model(
        env,
        file=args.template,
        model_segment=(
            "FIRST:" + args.chains[0].upper(),
            "LAST:" + args.chains[1].upper(),
        ),
    )
    aln.append_model(
        mdl, align_codes=args.template.replace(".pdb", ""), atom_files=args.template
    )

    sequence_file = os.path.join(args.dir, args.target)
    sequence_code = args.target.replace(".ali", "")
    aln.append(file=sequence_file, align_codes=sequence_code)

    aln.align2d()  # perform alignment
    align_file = os.path.join(
        args.dir, sequence_code + "-" + args.template.replace(".pdb", "")
    )
    aln.write(file=align_file + ".ali", alignment_format="PIR")  # para o modeller
    aln.write(file=align_file + ".pap", alignment_format="PAP")  # +fácil de ler

    # check files
    aln.check()
Пример #3
0
def _align_structures(structures, verbose):
    """Aligns structures using iterative structural alignment."""

    # set up modeller environment
    if verbose:
        modeller.log.verbose()
    else:
        modeller.log.none()
    env = modeller.environ()
    aln = modeller.alignment(env)

    # read structures into modeller environment
    for (id, structure) in structures.items():
        mdl = modeller.model(env, file=structure)
        aln.append_model(mdl, align_codes=id, atom_files=structure)

    # align structures using iterative structural alignment
    modeller.salign.iterative_structural_align(aln)

    # convert modeller alignment to Alignment object
    mod_aln_f = tempfile.NamedTemporaryFile(mode='w',
                                            prefix=fnameprefix,
                                            suffix='.ali',
                                            delete=False)
    mod_aln_fname = mod_aln_f.name
    mod_aln_f.close()
    aln.write(mod_aln_fname, alignment_format='PIR')
    alnobj = Alignment(mod_aln_fname)
    os.remove(mod_aln_fname)
    return alnobj
Пример #4
0
def DOPE_profiles_maker(temp_dir, outputs):
    """Creates a DOPE profile plot (.jpg) from a macrocomplex (.pdb), which has no acid nucleic chains using Modeller."""
    flist = []
    env = environ()
    env.io.atom_files_directory = [temp_dir]
    dl = os.listdir(temp_dir)

    for file in dl:
        if file.startswith("mod"):
            flist.append(file)

    aln = modeller.alignment(env)

    for file in flist:
        mdl = modeller.model(env)
        code = str(file)
        mdl.read(file=code, model_segment=('FIRST:@', 'END:'))
        aln.append_model(mdl, align_codes=code, atom_files=code)
        t = selection(mdl)
        file_dope = outputs + code + '.profile'
        t.assess_dope(output='ENERGY_PROFILE NO_REPORT', file=file_dope, normalize_profile=True, smoothing_window=15)
        model = get_profile(file_dope, aln[str(file)])
        pylab.figure(1, figsize=(20, 12))
        pylab.xlabel('Alignment position', fontsize=20)
        pylab.ylabel('DOPE per-residue score', fontsize=20)
        pylab.plot(model, color='green', linewidth=3, label=file[3:-4])
        pylab.savefig(outputs + file[:-4] + '.dope_profile.jpg', dpi=100)
        pylab.close()

    path_img = outputs + file[:-4] + '.dope_profile.jpg'
    return("DOPE profile plot for model created here:\n  %s\n" % (path_img))
Пример #5
0
def align_template_to_reference(msmseed, ref_msmseed):
    import modeller
    import tempfile
    import shutil
    import copy
    import os
    temp_dir = tempfile.mkdtemp()
    try:
        os.chdir(temp_dir)
        alignment_file = open('aln_tmp.pir','w')
        aln = _PIR_alignment(ref_msmseed.template_sequence, ref_msmseed.template_id, msmseed.template_sequence, msmseed.template_id)
        alignment_file.writelines(aln)
        alignment_file.close()
        template_file = open(msmseed.template_id + '.pdb','w')
        template_pdb = msmseed.template_structure
        template_pdb.writeFile(template_pdb.topology, template_pdb.positions, template_file)
        template_file.close()
        ref_pdb = ref_msmseed.template_structure
        ref_file = open(ref_msmseed.template_id + '.pdb', 'w')
        ref_pdb.writeFile(ref_pdb.topology, ref_pdb.positions, ref_file)
        ref_file.close()
        modeller.log.none()
        env = modeller.environ()
        env.io.atom_files_directory = temp_dir
        aln = modeller.alignment(env, file='aln_tmp.pir', align_codes=(ref_msmseed.template_id, msmseed.template_id))
        mdl  = modeller.model(env, file=ref_msmseed.template_id + '.pdb')
        mdl2 = modeller.model(env, file=msmseed.template_id+'.pdb')
        atmsel = modeller.selection(mdl).only_atom_types('CA')
        r = atmsel.superpose(mdl2, aln)
        msmseed.rmsd_to_reference = copy.deepcopy(r.rms)
    except Exception as e:
        msmseed.error_message = e.message
    finally:
        shutil.rmtree(temp_dir)
    return msmseed
Пример #6
0
 def test_script9(self):
     """Test step 9 (multiple fitting)"""
     # Get inputs (outputs from step 8)
     for i in ('top', 'bottom'):
         shutil.copy('precalculate_results/stage8_split_density/' \
                     'groel-11.5A.%s.mrc' % i, 'output')
     # Make sure the script runs without errors
     p = subprocess.check_call(['scripts/' \
                                'script9_symmetric_multiple_fitting.py'])
     e = modeller.environ()
     ref = modeller.model(e,
            file='precalculate_results/stage9_symmetric_multiple_fitting/' \
                 'model.top.0.pdb')
     sel = modeller.selection(ref).only_atom_types('CA')
     # At least one model in each ring should be close to the reference
     for side in ('top', 'bottom'):
         rms = []
         for i in range(6):
             fname = 'output/model.%s.%d.pdb' % (side, i)
             m =  modeller.model(e, file=fname)
             a = modeller.alignment(e)
             a.append_model(ref, align_codes='ref')
             a.append_model(m, align_codes='model')
             rms.append(sel.superpose(m, a).rms)
             os.unlink(fname)
         self.assertTrue(min(rms) < 10.0)
     os.unlink('output/intermediate_asmb_sols.out')
     for side in ('top', 'bottom'):
         os.unlink('output/multifit.%s.output' % side)
         os.unlink('output/multifit.%s.output.symm.ref' % side)
         os.unlink('output/multifit.%s.param' % side)
Пример #7
0
 def test_feature_hbond(self):
     """Check hydrogen bond features"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     mlib.hbond_classes.read("data/atmcls-hbda.lib")
     donor = mdt.features.HydrogenBondDonor(mlib, mdt.uniform_bins(7, 1.0, 1.0))
     accep = mdt.features.HydrogenBondAcceptor(mlib, mdt.uniform_bins(7, 1.0, 1.0))
     totchg = mdt.features.HydrogenBondCharge(mlib, mdt.uniform_bins(9, 1.0, 1.0))
     satisf = mdt.features.HydrogenBondSatisfaction(mlib, mdt.uniform_bins(100, 0.0, 10.0))
     self.assertRaises(mdt.MDTError, mlib.hbond_classes.read, "data/atmcls-hbda.lib")
     m = mdt.Table(mlib, features=donor)
     m2 = mdt.Table(mlib, features=accep)
     m3 = mdt.Table(mlib, features=satisf)
     m4 = mdt.Table(mlib, features=totchg)
     aln = modeller.alignment(env, file="test/data/alignment.ali")
     m.add_alignment(aln)
     m2.add_alignment(aln)
     m3.add_alignment(aln)
     m4.add_alignment(aln)
     self.assertInTolerance(m[0], 295.0, 0.0005)
     self.assertInTolerance(m[1], 139.0, 0.0005)
     self.assertEqual(m[-1], 349.0)
     self.assertInTolerance(m2[0], 236.0, 0.0005)
     self.assertInTolerance(m2[1], 223.0, 0.0005)
     self.assertEqual(m2[-1], 168.0)
     self.assertInTolerance(m3[0], 1.0, 0.0005)
     self.assertInTolerance(m3[1], 0.0, 0.0005)
     self.assertEqual(m3[-1], 0.0)
     self.assertInTolerance(m4[0], 78.0, 0.0005)
     self.assertInTolerance(m4[1], 24.0, 0.0005)
     self.assertEqual(m4[-1], 739.0)
     # Exercise writing of hbond information to HDF5 files:
     for t in (m, m2, m3, m4):
         t.write_hdf5("test.hdf5")
         os.unlink("test.hdf5")
Пример #8
0
 def test_feature_angle_type(self):
     """Check angle type features"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     mlib.angle_classes.read("data/anggrp.lib")
     angletype = mdt.features.AngleType(mlib)
     angle = mdt.features.Angle(mlib, bins=mdt.uniform_bins(288, 0.0, 0.625))
     self.assertRaises(mdt.MDTError, mlib.angle_classes.read, "data/anggrp.lib")
     m = mdt.Table(mlib, features=angletype)
     m2 = mdt.Table(mlib, features=angle)
     aln = modeller.alignment(env, file="test/data/alignment.ali")
     m.add_alignment(aln)
     m2.add_alignment(aln)
     self.assertInTolerance(m[0], 7.0, 0.0005)
     self.assertInTolerance(m[7], 9.0, 0.0005)
     self.assertInTolerance(m[15], 11.0, 0.0005)
     self.assertEqual(m.shape, (236,))
     self.assertEqual(m[-1], 0.0)
     self.assertInTolerance(m2[176], 48.0, 1.0005)
     self.assertInTolerance(m2[177], 42.0, 0.0005)
     self.assertInTolerance(m2[178], 38.0, 0.0005)
     self.assertEqual(m2.shape, (289,))
     self.assertEqual(m2[-1], 0.0)
     # Exercise writing of angle class information to HDF5 files:
     m.write_hdf5("test.hdf5")
     os.unlink("test.hdf5")
Пример #9
0
    def test_feature_residue_distance_difference(self):
        """Check residue-residue distance difference feature"""
        env = self.get_environ()
        mlib = self.get_mdt_library()
        ddist = mdt.features.ResidueDistanceDifference(mlib, bins=mdt.uniform_bins(20, -10, 1))
        aln = modeller.alignment(env, file="test/data/struc-struc.ali")
        m = mdt.Table(mlib, features=ddist)
        m.add_alignment(aln)
        self.assertEqual(m[9], 20)
        self.assertEqual(m[10], 20)
        self.assertEqual(sum([b for b in m]), 40)
        self.assertEqual(m[-1], 0)

        # Undefined (-999) coordinates in either structure should put
        # features in the undefined bin
        oldx = aln[0].residues[0].atoms["CA"].x
        aln[0].residues[0].atoms["CA"].x = -999
        m = mdt.Table(mlib, features=ddist)
        m.add_alignment(aln)
        self.assertEqual(m[-1], 16)

        aln[0].residues[0].atoms["CA"].x = oldx
        aln[1].residues[0].atoms["CA"].x = -999
        m = mdt.Table(mlib, features=ddist)
        m.add_alignment(aln)
        self.assertEqual(m[-1], 16)
Пример #10
0
 def test_feature_dihedral_type(self):
     """Check dihedral type features"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     mlib.dihedral_classes.read("data/impgrp.lib")
     dihedtype = mdt.features.DihedralType(mlib)
     dihedral = mdt.features.Dihedral(mlib, bins=mdt.uniform_bins(288, -180, 1.25))
     self.assertRaises(mdt.MDTError, mlib.dihedral_classes.read, "data/impgrp.lib")
     m = mdt.Table(mlib, features=dihedtype)
     m2 = mdt.Table(mlib, features=dihedral)
     aln = modeller.alignment(env, file="test/data/alignment.ali")
     m.add_alignment(aln)
     m2.add_alignment(aln)
     self.assertInTolerance(m[0], 7.0, 0.0005)
     self.assertInTolerance(m[2], 9.0, 0.0005)
     self.assertInTolerance(m[4], 11.0, 0.0005)
     self.assertEqual(m.shape, (79,))
     self.assertEqual(m[-1], 0.0)
     self.assertInTolerance(m2[143], 60.0, 1.0005)
     self.assertInTolerance(m2[144], 53.0, 1.0005)
     self.assertInTolerance(m2[145], 24.0, 0.0005)
     self.assertEqual(m2.shape, (289,))
     self.assertEqual(m2[-1], 0.0)
     # Exercise writing of dihedral class information to HDF5 files:
     m.write_hdf5("test.hdf5")
     os.unlink("test.hdf5")
Пример #11
0
 def test_disulfide(self):
     """Test handling of disulfide bonds"""
     mlib = self.get_all_libraries()
     bsep = mdt.features.AtomBondSeparation(mlib,
                                     bins=mdt.uniform_bins(20, 0, 1.0))
     bsep_ss = mdt.features.AtomBondSeparation(mlib,
                                     bins=mdt.uniform_bins(20, 0, 1.0),
                                     disulfide=True)
     env = self.get_environ()
     mdl = modeller.model(env)
     mdl.build_sequence('CC')
     # When SG-SG distance is small enough, an extra bond
     # (separation feature = 1) should be detected, but only with
     # disulfide=True
     for (dist, num) in [(2.6, 11.0), (2.4, 12.0)]:
         sg1 = mdl.residues[0].atoms['SG']
         sg2 = mdl.residues[1].atoms['SG']
         sg1.x = sg1.y = sg1.z = 0.
         sg2.x = sg2.y = 0.
         sg2.z = dist
         a = modeller.alignment(env)
         a.append_model(mdl, atom_files='test', align_codes='test')
         m = mdt.Table(mlib, features=bsep)
         m.add_alignment(a, residue_span_range=(-999,0,0,999))
         self.assertEqual(m[1], 11.0)
         m2 = mdt.Table(mlib, features=bsep_ss)
         m2.add_alignment(a, residue_span_range=(-999,0,0,999))
         self.assertEqual(m2[1], num)
Пример #12
0
 def test_feature_sidechain_biso(self):
     """Check average sidechain Biso feature"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     self.assertRaises(ValueError, mdt.features.SidechainBiso, mlib, bins=mdt.uniform_bins(5, 0, 10), protein=3)
     sidechain_biso = mdt.features.SidechainBiso(mlib, bins=mdt.uniform_bins(5, 0, 10))
     mdl = modeller.model(env)
     mdl.build_sequence("A")
     aln = modeller.alignment(env)
     aln.append_model(mdl, align_codes="test")
     s = aln[0]
     # Mainchain atom Biso should be ignored:
     for mainchain in ("N:1", "C:1", "O:1", "OXT:1", "CA:1"):
         s.atoms[mainchain].biso = 1000
     for (biso, bin) in (
         (22, 2),
         (32, 3),  # Map regular values to bins
         (0, -1),  # Zero Biso should be "undefined"
         (1, 3),
     ):  # Biso < 2 is multiplied by 4pi^2
         s.atoms["CB:1"].biso = biso
         m = mdt.Table(mlib, features=sidechain_biso)
         m.add_alignment(aln)
         self.assertEqual(m.shape, (6,))
         self.assertEqual(m.sum(), 1)
         self.assertEqual(m[bin], 1)
Пример #13
0
    def test_feature_iresol(self):
        """Check resolution features"""
        env = self.get_environ()
        mlib = self.get_mdt_library()
        bins = mdt.uniform_bins(3, -1.0, 1.5)
        xray0 = mdt.features.XRayResolution(mlib, bins, protein=0)
        xray0_nmr = mdt.features.XRayResolution(mlib, bins, protein=0, nmr=1.0)
        xray1 = mdt.features.XRayResolution(mlib, bins, protein=1)
        xray2 = mdt.features.XRayResolution(mlib, bins, protein=2)
        # Check valid range for protein argument
        for p in (-1, 3):
            self.assertRaises(ValueError, mdt.features.XRayResolution, mlib, bins, protein=p)
        m = self.get_test_mdt(mlib, features=xray0)
        m2 = self.get_test_mdt(mlib, features=xray1)
        self.assertEqual(m.shape, (4,))
        self.assertEqual([b for b in m], [0.0, 1.0, 1.0, 0.0])
        self.assertMDTDataEqual(m, m2)

        for (code, feat, bin) in (
            ("bin0", xray0, 0),
            ("bin0", xray0_nmr, 1),
            ("bin1", xray0, 1),
            ("bin2", xray0, 2),
            ("undef1", xray0, 3),
            ("undef2", xray0, 3),
        ):
            m = mdt.Table(mlib, features=feat)
            aln = modeller.alignment(env, file="test/data/resol.ali", align_codes=code)
            m.add_alignment(aln)
            self.assertEqual(m[bin], 1.0)
Пример #14
0
 def test_feature_resind_diff(self):
     """Test the residue index difference feature"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     diff = mdt.features.ResidueIndexDifference(mlib, bins=mdt.uniform_bins(21, -10, 1))
     absdiff = mdt.features.ResidueIndexDifference(mlib, absolute=True, bins=mdt.uniform_bins(21, -10, 1))
     aln = modeller.alignment(env, file="test/data/alignment.ali", align_codes="5fd1")
     m1 = mdt.Table(mlib, features=diff)
     m2 = mdt.Table(mlib, features=absdiff)
     self.assertEqual(m1.symmetric, False)
     self.assertEqual(m2.symmetric, True)
     m1.add_alignment(aln, residue_span_range=(-999, -2, 2, 999))
     m2.add_alignment(aln, residue_span_range=(-999, -2, 2, 999))
     self.assertEqual(m1.sum(), 10920)
     self.assertEqual(m2.sum(), 5460)
     # span range should result in 0, +/- 1 bins being zero:
     for m in (m1, m2):
         self.assertEqual(m[9], 0.0)
         self.assertEqual(m[10], 0.0)
         self.assertEqual(m[11], 0.0)
     # Non-absolute feature should have other bins symmetrically distributed:
     for i in range(9):
         self.assertEqual(m1[i], m[-2 - i])
     # Absolute feature should have no negative values:
     for i in range(9):
         self.assertEqual(m2[i], 0.0)
Пример #15
0
 def build_mdt_from_model(self, mlib, features, mdl, **keys):
     """Build a simple test MDT for a given model"""
     env = self.get_environ()
     m = mdt.Table(mlib, features=features)
     a = modeller.alignment(env)
     a.append_model(mdl, atom_files="test", align_codes="test")
     m.add_alignment(a, **keys)
     return m
Пример #16
0
 def test_feature_triplet_type(self):
     """Check triplet type features"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     mlib.tuple_classes.read("data/trpcls.lib")
     tuple_type = mdt.features.TupleType(mlib)
     tuple_type2 = mdt.features.TupleType(mlib, pos2=True)
     tuple_dist = mdt.features.TupleDistance(mlib, bins=mdt.uniform_bins(9, 2.0, 0.2))
     tuple_angle1 = mdt.features.TupleAngle1(mlib, bins=mdt.uniform_bins(6, 0, 30.0))
     tuple_dihed1 = mdt.features.TupleDihedral1(mlib, bins=mdt.uniform_bins(6, -180, 60.0))
     tuple_dihed2 = mdt.features.TupleDihedral2(mlib, bins=mdt.uniform_bins(6, -180, 60.0))
     tuple_dihed3 = mdt.features.TupleDihedral3(mlib, bins=mdt.uniform_bins(6, -180, 60.0))
     self.assertRaises(mdt.MDTError, mlib.tuple_classes.read, "data/trpcls.lib")
     m1 = mdt.Table(mlib, features=tuple_type)
     m2 = mdt.Table(mlib, features=tuple_type2)
     m3 = mdt.Table(mlib, features=tuple_dist)
     m4 = mdt.Table(mlib, features=tuple_angle1)
     m5 = mdt.Table(mlib, features=tuple_dihed1)
     m6 = mdt.Table(mlib, features=tuple_dihed2)
     m7 = mdt.Table(mlib, features=tuple_dihed3)
     aln = modeller.alignment(env, file="test/data/tiny.ali")
     for m in (m1, m2, m3, m4, m5, m6, m7):
         m.add_alignment(aln, residue_span_range=(-9999, 0, 0, 9999))
     self.assertInTolerance(m1[0], 1.0, 0.0005)
     self.assertInTolerance(m1[1], 0.0, 0.0005)
     self.assertInTolerance(m1[2], 1.0, 0.0005)
     self.assertEqual(m1.shape, (236,))
     self.assertEqual(m1[-1], 0.0)
     self.assertInTolerance(m2[0], 60.0, 0.0005)
     self.assertInTolerance(m2[1], 0.0, 0.0005)
     self.assertInTolerance(m2[2], 60.0, 0.0005)
     self.assertEqual(m2.shape, (236,))
     self.assertEqual(m2[-1], 0.0)
     self.assertInTolerance(m3[0], 0.0, 0.0005)
     self.assertInTolerance(m3[1], 82.0, 0.0005)
     self.assertInTolerance(m3[2], 226.0, 0.0005)
     self.assertEqual(m3.shape, (10,))
     self.assertInTolerance(m3[-1], 3018.0, 0.0005)
     self.assertInTolerance(m4[0], 479.0, 0.0005)
     self.assertInTolerance(m4[1], 806.0, 0.0005)
     self.assertInTolerance(m4[2], 471.0, 0.0005)
     self.assertEqual(m4.shape, (7,))
     self.assertEqual(m4[-1], 0.0)
     self.assertInTolerance(m5[0], 556.0, 0.0005)
     self.assertInTolerance(m5[1], 642.0, 0.0005)
     self.assertInTolerance(m5[2], 470.0, 6.0005)
     self.assertEqual(m5.shape, (7,))
     self.assertInTolerance(m5[-1], 180.0, 0.0005)
     self.assertInTolerance(m6[0], 661.0, 0.0005)
     self.assertInTolerance(m6[1], 520.0, 0.0005)
     self.assertInTolerance(m6[2], 545.0, 6.0005)
     self.assertEqual(m6.shape, (7,))
     self.assertInTolerance(m6[-1], 112.0, 0.0005)
     self.assertInTolerance(m7[0], 661.0, 0.0005)
     self.assertInTolerance(m7[1], 520.0, 0.0005)
     self.assertInTolerance(m7[2], 545.0, 6.0005)
     self.assertEqual(m7.shape, (7,))
     self.assertInTolerance(m7[-1], 112.0, 0.0005)
Пример #17
0
    def test_integrative_modeling(self):
        """Test the entire integrative modeling run"""
        import modeller
        # Compile the clustering program
        subprocess.check_call(['gfortran', 'cluster.f', 'u3best.f',
                               '-o', 'cluster.x'],
                              cwd='integrative_modeling/bin')

        # Run sampling
        subprocess.check_call(['./run_modeling.py'],
                              cwd='integrative_modeling')

        # Analysis
        subprocess.check_call(['bin/get_frames.sh'],
                              cwd='integrative_modeling')

        # Make sure that at least two of the three "known good" clusters
        # are reproduced
        clusters = glob.glob('integrative_modeling/clustering/clus.*.pdb')
        clusters = [x for x in clusters if '-' not in x]
        exp_clusters = glob.glob('model_refinement/cluster*/model.pdb')

        env = modeller.environ()
        n_cluster = 0
        rms = []
        cluster_match = [0] * len(clusters)
        exp_cluster_match = [0] * len(exp_clusters)
        # Get a matrix of RMSD between all clusters and the expected clusters
        for ncluster, cluster in enumerate(clusters):
            per_cluster = []
            for nexp_cluster, exp_cluster in enumerate(exp_clusters):
                mc = modeller.model(env, file=cluster)
                s = modeller.selection(mc)
                a = modeller.alignment(env)
                me = modeller.model(env, file=exp_cluster)
                a.append_model(mc, align_codes='clus')
                a.append_model(me, align_codes='exp_clus')
                # We only care about the global (non-cutoff) RMSD, so use a
                # large cutoff so that refine_local doesn't increase the number
                # of equivalent positions at the expense of worsening the RMSD
                r = s.superpose(me, a, rms_cutoff=999.)
                if r.rms < 15.0:
                    cluster_match[ncluster] += 1
                    exp_cluster_match[nexp_cluster] += 1
                per_cluster.append(r.rms)
            rms.append(per_cluster)
        # Count the number of clusters which are close to an expected cluster
        ncluster_match = len(cluster_match) - cluster_match.count(0)
        # Count the number of expected clusters which are close to a cluster
        nexp_cluster_match = len(exp_cluster_match) - exp_cluster_match.count(0)
        # Make sure that at least 2 of the 3 expected clusters is close to one
        # of the clusters we produced (but not all the *same* cluster)
        self.assertTrue(ncluster_match >= 2 and nexp_cluster_match >= 2,
                        "Could not find any match between the %d clusters "
                        "found in this test and 2 of the 3 'known good' "
                        "clusters (match defined as all-atom RMSD less than "
                        "15.0A). RMSD matrix: %s" % (len(clusters), str(rms)))
Пример #18
0
def perform_sequence_alignment():
    e = modeller.environ()
    m1 = modeller.model(e, file='experimental.pdb')
    m2 = modeller.model(e, file='rosetta.pdb')
    aln = modeller.alignment(e)
    aln.append_model(m1, align_codes='experimental', atom_files='experimental.pdb')
    aln.append_model(m2, align_codes='rosetta')
    aln.align2d()
    aln.write(file='align.ali', alignment_format='PIR')
Пример #19
0
 def test_script5(self):
     """Test step 5 (template alignment)"""
     # Make sure the script runs without errors
     p = subprocess.check_call(['scripts/script5_template_alignment.py'])
     # Check output alignment
     e = modeller.environ()
     a = modeller.alignment(e, file='output/groel-1iokA.ali')
     self.assertEqual([x.code for x in a], ['1iok', 'P0A6F5'])
     os.unlink('output/groel-1iokA.ali')
Пример #20
0
 def test_feature_residue_distance(self):
     """Check residue-residue distance feature"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     dist = mdt.features.ResidueDistance(mlib, bins=mdt.uniform_bins(7, 0, 2.0))
     aln = modeller.alignment(env, file="test/data/tiny.ali")
     m = mdt.Table(mlib, features=dist)
     m.add_alignment(aln)
     self.assertEqual([b for b in m], [0, 0, 0, 8, 2, 4, 4, 2])
Пример #21
0
def mk_strct_al_modeller(strct_data1, strct_data2):
    _stdout = sys.stdout
    sys.stdout = open(os.devnull, 'w')

    tmp_file = tempfile.NamedTemporaryFile(suffix=".fasta", delete=False)
    env = m.environ()

    aln = m.alignment(env)
    code1 = 'pdb' + strct_data1['id']
    code2 = 'pdb' + strct_data2['id']
    chain1 = strct_data1['chain_id']
    chain2 = strct_data2['chain_id']
    env.io.atom_files_directory = ['.', PDB_DIR]
    result = {}
    try:
        for (code, chain) in ((code1, chain1), (code2, chain2)):
            mdl = m.model(env, file=code, model_segment=('FIRST:'+chain,
                                                         'LAST:'+chain))
            aln.append_model(mdl, atom_files=code, align_codes=code+chain)

        for (weights, write_fit, whole) in (((1., 0., 0., 0., 1., 0.), False,
                                             True),
                                            ((1., 0.5, 1., 1., 1., 0.), False,
                                             True),
                                            ((1., 1., 1., 1., 1., 0.), True,
                                             False)):
            r = aln.salign(rms_cutoff=3.5, normalize_pp_scores=False,
                           rr_file='$(LIB)/as1.sim.mat', overhang=30,
                           gap_penalties_1d=(-450, -50),
                           gap_penalties_3d=(0, 3), gap_gap_score=0,
                           gap_residue_score=0,
                           alignment_type='tree', # If 'progresive', the tree is not
                                                  # computed and all structures will be
                                                  # aligned sequentially to the first
                           #ext_tree_file='1is3A_exmat.mtx', # Tree building can be avoided
                                                             # if the tree is input
                           feature_weights=weights, # For a multiple sequence alignment only
                                                    # the first feature needs to be non-zero
                           improve_alignment=True, fit=True, write_fit=False,
                           write_whole_pdb=whole, output='ALIGNMENT QUALITY')
        if r.qscorepct > 70:
            aln.write(file=tmp_file.name, alignment_format='FASTA')
            with open(tmp_file.name) as a:
                alignment = unwrap(a.read().splitlines())

            for i in range(len(alignment[1])):
                if alignment[1] != '-' and alignment[3] != '-':
                    pos1 = get_real_position_al(alignment[1], i)
                    pos2 = get_real_position_al(alignment[3], i)
                    result[pos1] = pos2
    except:
        print 'Modeller failed'
    sys.stdout.close()
    sys.stdout = _stdout
    return result
Пример #22
0
 def test_script1(self):
     """Test step 1 (build profile)"""
     # Make sure the script runs without errors
     p = subprocess.check_call(['scripts/script1_build_profile.py'])
     # Make sure the profile contains the sequences we expect
     e = modeller.environ()
     a = modeller.alignment(e, file='output/build_profile.ali')
     self.assertEqual(sorted(s.code for s in a),
                      sorted(self.templates) + ['P0A6F5'])
     os.unlink('output/build_profile.prf')
     os.unlink('output/build_profile.ali')
Пример #23
0
    def build_mdt_from_sequence(self, mlib, features, seq, **keys):
        """Build a simple test MDT for a given sequence"""
        env = self.get_environ()
        mdl = modeller.model(env)
        mdl.build_sequence(seq)

        m = mdt.Table(mlib, features=features)
        a = modeller.alignment(env)
        a.append_model(mdl, atom_files='test', align_codes='test')
        m.add_alignment(a, **keys)
        return m
Пример #24
0
def main(argv):
    wp = ''
    mp = ''
    tp = ''
    try:
        opts, args = getopt.getopt(argv, "hw:m:t:",
                                   ["wprofile=", "mprofile=", "tprofile="])
    except getopt.GetoptError:
        print(
            '%s -w wt_model_profile -m mt_model_profile -t template_profile' %
            sys.argv[0])
        sys.exit(2)
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            print(
                '%s -w wt_model_profile -m mt_model_profile -t template_profile'
                % sys.argv[0])
            sys.exit()
        elif opt in (" ", ""):
            print(
                '%s -w wt_model_profile -m mt_model_profile -t template_profile'
                % sys.argv[0])
            sys.exit()
        elif opt in ("-w", "--wprofile"):
            wp = arg
        elif opt in ("-m", "--mprofile"):
            mp = arg
        elif opt in ("-t", "--tprofile"):
            tp = arg

    pic_out = mp.replace("profile", "png")

    e = modeller.environ()
    a = modeller.alignment(e, file=mp.replace("profile", "ali"))

    template = get_profile(tp, a['6Y2HA'])
    wmodel = get_profile(mp, a['CLIC5'])
    mmodel = get_profile(wp, a['CLIC5'])

    # Plot the template and model profiles in the same plot for comparison:
    pylab.figure(1, figsize=(10, 6))
    pylab.xlabel('Alignment position')
    pylab.ylabel('DOPE per-residue score')
    pylab.plot(template, color='red', linewidth=2, label='Template')
    pylab.plot(wmodel,
               color='green',
               linewidth=2,
               label='Wt multi-template Model')
    pylab.plot(mmodel,
               color='black',
               linewidth=2,
               label='Mt multi-template Model')
    pylab.legend()
    pylab.savefig(pic_out, dpi=65)
Пример #25
0
 def test_feature_rama(self):
     """Check Ramachandran mainchain conformation class feature"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     self.assertRaises(ValueError, mdt.features.MainchainConformation, mlib, protein=3)
     conf = mdt.features.MainchainConformation(mlib)
     aln = modeller.alignment(env, file="test/data/tiny.ali")
     m = mdt.Table(mlib, features=conf)
     m.add_alignment(aln)
     self.assertEqual([b.symbol for b in m.features[0].bins], ["A", "P", "B", "L", "E", "U"])
     self.assertEqual([b for b in m], [0, 2, 2, 0, 0, 2])
Пример #26
0
    def get_test_mdt(self, mlib, features):
        env = self.get_environ()
        mdl = modeller.model(env)
        mdl.build_sequence('C')

        m = mdt.Table(mlib, features=features)
        a = modeller.alignment(env)
        a.append_model(mdl, atom_files='test', align_codes='test')
        m.add_alignment(a)
        m = m.reshape(features, [0] * len(features), [-1] * len(features))
        return m
Пример #27
0
 def test_feature_iatta_special(self):
     """Check atom type feature with disulfide/termini special handling"""
     env = self.get_environ()
     mlib = mdt.Library(env, special_atoms=True)
     mlib.atom_classes.read("${LIB}/atmcls-melo.lib")
     attyp = mdt.features.AtomType(mlib)
     aln = modeller.alignment(env, file="test/data/tiny.ali")
     m = mdt.Table(mlib, features=attyp)
     m.add_alignment(aln)
     self.assertInTolerance(m[0], 6.0, 0.0005)
     self.assertInTolerance(m[1], 0.0, 0.0005)
     self.assertInTolerance(m[2], 5.0, 0.0005)
Пример #28
0
def count_alignments(aln_file, target, templates):
    import modeller
    modeller.log.none()
    env = modeller.environ()
    aln = modeller.alignment(env, file=aln_file)
    target = aln[target]
    templates = [aln[t] for t in templates]
    num_align = 0
    for r in target.residues:
        for template in templates:
            if r.get_aligned_residue(template) is not None:
                num_align += 1
    return num_align, len(target.residues)
Пример #29
0
 def test_feature_alpha_content(self):
     """Check alpha content feature"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     self.assertRaises(ValueError, mdt.features.AlphaContent, mlib, bins=mdt.uniform_bins(10, 0, 0.1), protein=3)
     alpha = mdt.features.AlphaContent(mlib, bins=mdt.uniform_bins(10, 0, 0.1))
     for (alnfile, bin) in (("tiny.ali", 0), ("alignment.ali", 5)):
         m = mdt.Table(mlib, features=alpha)
         a = modeller.alignment(env, file=os.path.join("test", "data", alnfile))
         m.add_alignment(a)
         self.assertEqual(m.shape, (11,))
         self.assertEqual(m.sum(), 1)
         self.assertEqual(m[bin], 1)
Пример #30
0
 def test_feature_neighborhood_difference(self):
     """Check residue neighborhood difference features"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     bins = mdt.uniform_bins(9, 0, 0.2)
     ndif = mdt.features.NeighborhoodDifference(mlib, bins)
     avndif = mdt.features.AverageNeighborhoodDifference(mlib, bins)
     aln = modeller.alignment(env, file="test/data/struc-struc.ali")
     m = mdt.Table(mlib, features=ndif)
     m.add_alignment(aln)
     self.assertEqual([b for b in m], [4, 6, 2] + [0] * 7)
     m = mdt.Table(mlib, features=avndif)
     m.add_alignment(aln)
     self.assertEqual([b for b in m], [6, 12, 2] + [0] * 7)
Пример #31
0
def align(target_name: str, target_sequence: str, template_name: str, template_chain: chr) -> None:
    # creates a file called f'alignment_{target_name}_and_{template_name}.pir'
    # assumes a file already exists called f'{template_name}.pdb'
    target_pir = f'>P1;{target_name}\nsequence:{target_name}::::::::\n{target_sequence}*'
    target_pir = StringIO(target_pir)
    alignment_instance = m.alignment(env)
    model_instance = m.model(env)
    model_instance.read(file=template_name, model_segment=(f'FIRST:{template_chain}', f'LAST:{template_chain}'))
    alignment_instance.append_model(model_instance,
                                    align_codes=template_name,
                                    atom_files=template_name)
    alignment_instance.append(file=target_pir, align_codes=target_name)
    alignment_instance.align2d()
    alignment_instance.write(file=f'alignment_{target_name}_and_{template_name}.pir')
Пример #32
0
 def test_read_alnstructure(self):
     """Check reading a Modeller alignment structure"""
     env = self.get_environ()
     m = modeller.model(env)
     m.build_sequence('C')
     a = modeller.alignment(env)
     a.append_model(m, align_codes='test', atom_files='test')
     m = IMP.kernel.Model()
     loader = IMP.modeller.ModelLoader(a[0])
     mp = loader.load_atoms(m)
     all_atoms = IMP.atom.get_by_type(mp, IMP.atom.ATOM_TYPE)
     self.assertEqual(7, len(all_atoms))
     # Alignment structures don't have charges or CHARMM types
     self.assertEqual(IMP.atom.Charged.get_is_setup(all_atoms[0]), False)
     self.assertEqual(IMP.atom.CHARMMAtom.get_is_setup(all_atoms[0]), False)
Пример #33
0
    def test_feature_triplet_residue(self):
        """Check triplet features with residue qualifier"""
        env = self.get_environ()
        mlib = self.get_mdt_library()
        mlib.tuple_classes.read("test/data/trpcls-residue.lib")
        feat = mdt.features.TupleType(mlib)
        m = mdt.Table(mlib, features=feat)

        mdl = modeller.model(env)
        mdl.build_sequence("AAACAAACSAA")
        a = modeller.alignment(env)
        a.append_model(mdl, align_codes="test")

        m.add_alignment(a)
        self.assertEqual([x for x in m], [6.0, 2.0, 1.0, 1.0, 0.0, 0.0])
Пример #34
0
 def test_read_alnstructure(self):
     """Check reading a Modeller alignment structure"""
     env = self.get_environ()
     m = modeller.model(env)
     m.build_sequence('C')
     a = modeller.alignment(env)
     a.append_model(m, align_codes='test', atom_files='test')
     m = IMP.Model()
     loader = IMP.modeller.ModelLoader(a[0])
     mp = loader.load_atoms(m)
     all_atoms = IMP.atom.get_by_type(mp, IMP.atom.ATOM_TYPE)
     self.assertEqual(7, len(all_atoms))
     # Alignment structures don't have charges or CHARMM types
     self.assertEqual(IMP.atom.Charged.get_is_setup(all_atoms[0]), False)
     self.assertEqual(IMP.atom.CHARMMAtom.get_is_setup(all_atoms[0]), False)
Пример #35
0
 def test_feature_sequence_identity(self):
     """Check sequence identity feature"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     # Put into 25% bins
     sid = mdt.features.SequenceIdentity(mlib, bins=mdt.uniform_bins(5, 0, 0.250))
     for (seq, id) in (("GGG", 0), ("AFV", 100), ("A--", 100), ("AV-", 50)):
         aln = modeller.alignment(env)
         aln.append_sequence("AFV")
         aln.append_sequence(seq)
         m = mdt.Table(mlib, features=sid)
         m.add_alignment(aln)
         self.assertEqual(m.shape, (6,))
         self.assertEqual(m.sum(), 2.0)
         self.assertEqual(m[int(id / 25)], 2.0)
Пример #36
0
 def _structureX_seq_from_modeller(self):
     """
     return a str containing the first two lines of the sequence corresponding to structureX 
     a file named [self._id]_structureX.seq also written
     """
     env = modeller.environ()
     model = modeller.model(env, file=self._id)
     aln = modeller.alignment(env)
     aln.append_model(model, align_codes=self._id)
     out_file = self._id + "_structureX.seq"
     aln.write(file=out_file)
     out_str = open(out_file, "r").read()
     out_str = [c for c in out_str.split("\n") if c]
     out_str = "\n".join(out_str[:2]) + "\n*"
     return out_str
def plot_profiles(aln_file, template_profile, template_code, model_profile,
                  model_code):
    e = modeller.environ()
    a = modeller.alignment(e, file=aln_file)

    template = get_profile(template_profile, a[template_code])
    model = get_profile(model_profile, a[model_code])

    # Plot the template and model profiles in the same plot for comparison:
    pylab.figure(1, figsize=(10, 6))
    pylab.xlabel('Alignment position')
    pylab.ylabel('DOPE per-residue score')
    pylab.plot(model, color='red', linewidth=2, label=model_code)
    pylab.plot(template, color='green', linewidth=2, label=template_code)
    pylab.legend()
    pylab.savefig('dope_profile_best_model.png', dpi=65)
Пример #38
0
def get_auto_align(in_aln_file, target, templates, out_aln_file):
    import modeller
    modeller.log.none()
    env = modeller.environ()
    env.io.atom_files_directory = ['.']
    aln = modeller.alignment(env, file=in_aln_file, align_codes=target)

    with allosmod.util.temporary_directory() as tempd:
        temp_aln = os.path.join(tempd, "templates.ali")
        with open(temp_aln, 'w') as fh:
            for template in templates:
                pdb2ali(template, fh=fh)
        aln.append(file=temp_aln)
        aln.salign(overhang=30, gap_penalties_1d=(-450, -50),
                   alignment_type='tree', output='ALIGNMENT')
    aln.write(file=out_aln_file)
Пример #39
0
def complete_pdb(env, filename, special_patches=None, transfer_res_num=False,
                 model_segment=None, patch_default=True):
    """Reads the given PDB file, reorders the atoms to match the current
       topology library, and adds any missing atoms.

       You should read topology and parameters into 'env' before calling
       this routine.

       :param env: Modeller environment.
       :type  env: :class:`environ`
       :param filename: the PDB file to read.
       :param special_patches: if set, it is expected to be a routine which
              takes one parameter (the model) and applies any patches (e.g.
              disulfide bridges).
       :param transfer_res_num: if True, the residue numbering from the
              original PDB is retained (by default, residues are renumbered
              from 1).
       :param patch_default: if True, default terminal patches are applied.

       :return: the completed model.
       :rtype: :class:`model`"""

    vars = {}
    if model_segment is not None:
        vars['model_segment'] = model_segment
    mdl = model(env, file=filename, model_format='PDB_OR_MMCIF', **vars)
    # Save original chain IDs, since generate_topology resets them
    chain_ids = [c.name for c in mdl.chains]
    aln = alignment(env)
    aln.append_model(mdl, atom_files=filename, align_codes='struc')
    aln.append_model(mdl, atom_files=filename+'.ini', align_codes='struc-ini')
    mdl.clear_topology()
    mdl.generate_topology(aln[-1], patch_default=patch_default)
    if special_patches:
        special_patches(mdl)
    # Save original seq_id, as transfer_xyz sets it
    seq_id = mdl.seq_id
    mdl.transfer_xyz(aln)
    mdl.seq_id = seq_id
    # Restore original chain IDs
    for (chain, chainid) in zip(mdl.chains, chain_ids):
        chain.name = chainid
    mdl.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
    if transfer_res_num:
        mdl2 = model(env, file=filename, **vars)
        mdl.res_num_from(mdl2, aln)
    return mdl
Пример #40
0
    def test_dihedral_diff_periodic(self):
        """Make sure that dihedral difference features are periodic"""

        def set_omega(mdl, angle):
            ca = mdl.atoms["CA:1"]
            c = mdl.atoms["C:1"]
            n2 = mdl.atoms["N:2"]
            ca2 = mdl.atoms["CA:2"]
            n2.x = n2.y = n2.z = 0.0
            c.x = -2.0
            c.y = c.z = 0.0
            ca.x = -2.0
            ca.y = 2.0
            ca.z = 0.0
            ca2.x = 0.0
            ca2.y = 2.0 * math.cos(math.pi * angle / 180.0)
            ca2.z = 2.0 * math.sin(math.pi * angle / 180.0)

        env = self.get_environ()
        mlib = self.get_mdt_library()
        # Make bins start at slightly less than -180, to allow for floating
        # point rounding
        omegadiff = mdt.features.OmegaDihedralDifference(mlib, mdt.uniform_bins(36, -180.01, 10))
        # Note that difference must be shortest around the circle, so
        # 100.0 - (-100.0) is not 200 degrees but -160 degrees
        for dih1, dih2, expected in (
            (80.0, 80.0, 0.0),
            (80.0, -80.0, -160.0),
            (-80.0, 80.0, 160.0),
            (-100.0, 100.0, -160.0),
            (100.0, -100.0, 160.0),
        ):
            m = mdt.Table(mlib, features=omegadiff)
            a = modeller.alignment(env)
            for d in dih1, dih2:
                mdl = modeller.model(env)
                mdl.build_sequence("CC")
                set_omega(mdl, d)
                a.append_model(mdl, atom_files="test", align_codes="test")
            m.add_alignment(a, sympairs=True)
            # 2 data points, 1 for each residue
            self.assertInTolerance(m.sample_size, 2.0, 1e-5)
            # Last residue has no omega, so is always undefined
            self.assertInTolerance(m[-1], 1.0, 1e-5)
            expected_bin = int((expected + 180.0) / 10.0)
            self.assertInTolerance(m[expected_bin], 1.0, 1e-5)
Пример #41
0
def align_template_to_reference(msmseed, ref_msmseed):
    import modeller
    import tempfile
    import shutil
    import copy
    import os
    temp_dir = tempfile.mkdtemp()
    try:
        os.chdir(temp_dir)
        alignment_file = open('aln_tmp.pir', 'w')
        aln = _PIR_alignment(ref_msmseed.template_sequence,
                             ref_msmseed.template_id,
                             msmseed.template_sequence, msmseed.template_id)
        alignment_file.writelines(aln)
        alignment_file.close()
        template_file = open(msmseed.template_id + '.pdb', 'w')
        template_pdb = msmseed.template_structure
        template_pdb.writeFile(template_pdb.topology, template_pdb.positions,
                               template_file)
        template_file.close()
        ref_pdb = ref_msmseed.template_structure
        ref_file = open(ref_msmseed.template_id + '.pdb', 'w')
        ref_pdb.writeFile(ref_pdb.topology, ref_pdb.positions, ref_file)
        ref_file.close()
        modeller.log.none()
        env = modeller.environ()
        env.io.atom_files_directory = temp_dir
        aln = modeller.alignment(env,
                                 file='aln_tmp.pir',
                                 align_codes=(ref_msmseed.template_id,
                                              msmseed.template_id))
        mdl = modeller.model(env, file=ref_msmseed.template_id + '.pdb')
        mdl2 = modeller.model(env, file=msmseed.template_id + '.pdb')
        atmsel = modeller.selection(mdl).only_atom_types('CA')
        r = atmsel.superpose(mdl2, aln)
        msmseed.rmsd_to_reference = copy.deepcopy(r.rms)
    except Exception as e:
        msmseed.error_message = e.message
    finally:
        shutil.rmtree(temp_dir)
    return msmseed
def plot_profiles(aln_file, template_profile: list, model_file, model_code):
    e = modeller.environ()
    a = modeller.alignment(e, file=aln_file)

    model = get_profile(model_file, a[model_code])

    # Plot the template and model profiles in the same plot for comparison:
    pylab.figure(1, figsize=(10, 6))
    pylab.xlabel('Alignment position')
    pylab.ylabel('DOPE per-residue score')
    rank = 0
    pylab.plot(model, color=tableau20[rank], linewidth=2, label=model_code)
    for template_code in template_profile:
        rank = rank + 1
        templatefile = template_code + ".profile"
        template = get_profile(templatefile, a[template_code])
        pylab.plot(template,
                   color=tableau20[rank],
                   linewidth=2,
                   label=template_code)
    pylab.legend()
    pylab.savefig('dope_profile_best_model.png', dpi=65)
Пример #43
0
def plot(target, template, model, dir):
    """Plot model and template profiles."""
    e = mod.environ()

    seq_code = target.replace(".ali", "")
    alnfile = os.path.join(dir,
                           seq_code + "-" + template.replace(".pdb", ".ali"))
    a = mod.alignment(e, file=alnfile)

    pdb_code = template.replace(".pdb", "")
    target_profile = os.path.join(dir, pdb_code + ".profile")
    model_profile = os.path.join(dir, model.replace(".pdb", ".profile"))
    t = get_profile(target_profile, a[pdb_code])
    m = get_profile(model_profile, a[seq_code])

    # plot the template and model profiles in the same plot for comparison
    pylab.figure(1, figsize=(10, 6))
    pylab.xlabel("Alignment position")
    pylab.ylabel("DOPE per-residue score")
    pylab.plot(m, color="red", linewidth=2, label=f"Model ({model})")
    pylab.plot(t, color="green", linewidth=2, label=f"Template ({template})")
    pylab.legend()
    pylab.title("DOPE score model vs. template")
    pylab.savefig(f"{seq_code}-{pdb_code}_dope.png", dpi=150)
Пример #44
0
    def _create_aligment(self, env, base_models):

        _log.debug("creating alignments for %s with %s pdbs" %
                   (self.seqrecord.id, len(base_models)))
        aligned_models = []

        env.io.atom_files_directory = [self.out_folder + '/']

        aln = alignment(env)

        aln.append_sequence(str(self.seqrecord.seq))
        aln[0].code = str(self.seqrecord.id)

        for i, pdb_chain_file_path in enumerate(base_models, 1):
            # TODO sacar parseo feo
            code = pdb_chain_file_path.split("/")[-1].replace(".ent",
                                                              "").replace(
                                                                  "pdb", "")
            m = model(env, file=code)
            aln.append_model(m, align_codes=code)
            aln[i].code = code
            aligned_models.append(code)

        aln.malign()
        aln.id_table(matrix_file=self.seqrecord.id + '_family.mat')

        aln.write(file=self.model_directory() + "/" + self.seqrecord.id +
                  '.ali',
                  alignment_format='PIR')
        assert os.path.exists(self.model_directory() + "/" +
                              self.seqrecord.id +
                              '.ali'), "NOOOOOOOOOOOO!!!!:  " + os.getcwd(
                              ) + "/" + self.seqrecord.id + '.ali'
        aln.write(file=self.model_directory() + self.seqrecord.id + '.pap',
                  alignment_format='PAP')
        return aligned_models
Пример #45
0
       the alignment sequence `seq`."""
    # Read all non-comment and non-blank lines from the file:
    f = file(profile_file)
    vals = []
    for line in f:
        if not line.startswith('#') and len(line) > 10:
            spl = line.split()
            vals.append(float(spl[-1]))
    # Insert gaps into the profile corresponding to those in seq:
    for n, res in enumerate(seq.residues):
        for gap in range(res.get_leading_gaps()):
            vals.insert(n, None)
    # Add a gap at position '0', so that we effectively count from 1:
    vals.insert(0, None)
    return vals

e = modeller.environ()
a = modeller.alignment(e, file='TvLDH-1bdmA.ali')

template = get_profile('1bdmA.profile', a['1bdmA'])
model = get_profile('TvLDH.profile', a['TvLDH'])

# Plot the template and model profiles in the same plot for comparison:
pylab.figure(1, figsize=(10,6))
pylab.xlabel('Alignment position')
pylab.ylabel('DOPE per-residue score')
pylab.plot(model, color='red', linewidth=2, label='Model')
pylab.plot(template, color='green', linewidth=2, label='Template')
pylab.legend()
pylab.savefig('dope_profile.png', dpi=65)
Пример #46
0
def align_res_nums(key_pdb_file, key_chain_id, value_pdb_file, value_chain_id):
    """Determine which residues in one PDB file correspond to which in another PDB file.

    Parameters
    ----------
    key_pdb_file : string
        The location of the pdb file whose residue numbers will be keys in the
        returned dictionary.
    key_chain_id : string
        The chain of key_pdb_file that will be aligned.
    value_pdb_file : string
        The location of the pdb file whose residue numbers will be values in the
        returned dictionary.
    value_chain_id : string
        The chain of value_pdb_file that will be aligned.

    Returns
    -------
    dict_residue_nums : dictionary{string : string}
        The keys and values are string-typed residue numbers (from key_pdb_file and
        value_pdb_file).  Any residues that are missing from value_pdb_file
        will be assigned the value "NA".  If any residues in key_pdb_file are
        classified as HETATMs, then they will only included in dict_residue_nums if they
        are MSE, MEX, or ABU.  This matches MODELLER's behavior.
    """

    # A temporary directory to store the output of Modeller's alignment.
    temp_dir_path = tempfile.mkdtemp()
    env = modeller.environ()
    aln = modeller.alignment(env)
    key_model = modeller.model(env,
                               file=key_pdb_file,
                               model_segment=("FIRST:%s" % (key_chain_id),
                                              "LAST:%s" % (key_chain_id)))
    aln.append_model(key_model,
                     atom_files=key_pdb_file,
                     align_codes="key%s" % (key_chain_id))
    value_model = modeller.model(env,
                                 file=value_pdb_file,
                                 model_segment=("FIRST:%s" % (value_chain_id),
                                                "LAST:%s" % (value_chain_id)))
    aln.append_model(value_model,
                     atom_files=value_pdb_file,
                     align_codes="value%s" % (value_chain_id))
    aln.salign()
    salign_out_loc = temp_dir_path + "key%s_value%s_salign_output.ali" % (
        key_chain_id, value_chain_id)
    aln.write(file=salign_out_loc, alignment_format="PIR")
    with open(salign_out_loc, "r") as alignment_opened:
        alignment_lines = alignment_opened.readlines()
        # Ignore the header lines.  The format requires a 2-line header; there may be a
        # blank line before this.
        if alignment_lines[0][0] == ">":
            line_index = 2
        else:
            line_index = 3
        key_sequence_aligned = ""
        while True:
            next_line = alignment_lines[line_index].strip()
            key_sequence_aligned += next_line
            if next_line[len(next_line) - 1] == "*":
                key_sequence_aligned = key_sequence_aligned[:-1]
                break
            line_index += 1
        if alignment_lines[line_index + 1][0] == ">":
            line_index += 3
        else:
            line_index += 4
        value_sequence_aligned = ""
        while True:
            next_line = alignment_lines[line_index].strip()
            value_sequence_aligned += next_line
            if next_line[len(next_line) - 1] == "*":
                value_sequence_aligned = value_sequence_aligned[:-1]
                break
            line_index += 1
    shutil.rmtree(temp_dir_path)
    key_pdb_res_numbers = get_numbers_from_pdb(key_pdb_file, key_chain_id)
    value_pdb_res_numbers = get_numbers_from_pdb(value_pdb_file,
                                                 value_chain_id)
    dict_residue_nums = {}
    # value_residues_passed is incremented whenever the iteration reaches a spot in the
    # alignment where the value sequence has a residue.
    value_residues_passed = 0
    key_residues_passed = 0
    for i in range(len(value_sequence_aligned)):
        # If both key_sequence_aligned and value_sequence_aligned have residues at
        # the position, then add a dictionary entry mapping the residue number in key
        # to the residue number in value.
        if (key_sequence_aligned[i] != "-") and (value_sequence_aligned[i] !=
                                                 "-"):
            current_key_resnum = key_pdb_res_numbers[key_residues_passed]
            current_value_resnum = value_pdb_res_numbers[value_residues_passed]
            dict_residue_nums[current_key_resnum] = current_value_resnum
            value_residues_passed += 1
            key_residues_passed += 1
        # If key_sequence_aligned has a residue where value_sequence_aligned has a gap,
        # then create a dictionary entry with value NA.
        elif (key_sequence_aligned[i] != "-") and (value_sequence_aligned[i]
                                                   == "-"):
            dict_residue_nums[key_pdb_res_numbers[key_residues_passed]] = "NA"
            key_residues_passed += 1
        # If key_sequence_aligned has a gap where value_sequence_aligned has a residue,
        # then don't add a dictionary entry.
        elif (key_sequence_aligned[i]
              == "-") and (value_sequence_aligned[i] != "-"):
            value_residues_passed += 1
    return dict_residue_nums
Пример #47
0
def runmodeller(target,database_path='default',models_path='default',templates_path='default',working='default',\
    mod_per_temp=20,excludes=[],max_seq_id=0.95,min_seq_id=0.25,max_eval=0.01,num_iter=1,gaps=False):
    '''File requirement: A .ali Sequence File and a pdb_95.pir database file

       Parameters:
       target: a .ali file path specifying the sequence of the protein
       models_path: the folder to save the models to. Defalut is ./models
       templates_path: the folder to save the templates to. Default is ./templates
       database: the path to the pdb_95.pir folder the default is ./database
       working: the working dir. The default is ./working
       mod_per_temp: number of models to be generated by modeller, the default is 20
       excludes: list of strings specifying PDB files to be excluded from the templates
       max_seq_id: Maximum of the sequence identity for a template to be considered. Ranges from 0 to 1. Default is 0.95
       min_seq_id: Minimum of the sequence identity for a template to be considered. Ranges from 0 to 1. Default is 0.25
       
       This function returns a list of paths of the generated models
    '''
    #set log to verbose
    modeller.log.verbose()
    env = modeller.environ()
    entering_dir = os.getcwd()

    #set paths
    if database_path == 'default':
        database_dir = os.path.abspath('./database')
    else:
        database_dir = os.path.abspath(database_path)
    target_file = os.path.basename(target)
    target_dir = os.path.dirname(os.path.abspath(target))
    if target_file.endswith('.ali'):
        target_name = target_file[:-4]
    if working == 'default':
        if not os.path.exists('./working'):
            os.mkdir('./working')
        working_dir = os.path.abspath('./working')
    else:
        working_dir = os.path.abspath(working)
        if not os.path.exists(working_dir):
            os.mkdir(working_dir)

    if models_path == 'default':
        if not os.path.exists('./models'):
            os.mkdir('./models')
        models_dir = os.path.abspath('./models')
    else:
        models_dir = os.path.abspath(models_path)
        if not os.path.exists(models_dir):
            os.mkdir(models_dir)

    if templates_path == 'default':
        if not os.path.exists('./templates'):
            os.mkdir('./templates')
        template_dir = os.path.abspath('./templates')
    else:
        template_dir = os.path.abspath(templates_path)
        if not os.path.exists(templates_path):
            os.mkdir(templates_path)
    #cd to woring. script will cd back at the end
    os.chdir(working_dir)
    #-- Prepare the input files
    #-- Read in the sequence database

    sdb = modeller.sequence_db(env)
    sdb.read(seq_database_file=database_dir + '/pdb_95.pir',
             seq_database_format='PIR',
             chains_list='ALL',
             minmax_db_seq_len=(30, 4000),
             clean_sequences=True)

    #-- Write the sequence database in binary form
    sdb.write(seq_database_file=database_dir + '/pdb_95.bin',
              seq_database_format='BINARY',
              chains_list='ALL')

    #-- Now, read in the binary database
    sdb.read(seq_database_file=database_dir + '/pdb_95.bin',
             seq_database_format='BINARY',
             chains_list='ALL')

    #-- Read in the target sequence/alignment
    target_aln = modeller.alignment(env)
    target_aln.append(file=target_dir + '/' + target_file,
                      alignment_format='PIR',
                      align_codes='ALL')

    #-- Convert the input sequence/alignment into
    #   profile format
    target_prf = target_aln.to_profile()

    #-- Scan sequence database to pick up homologous sequences
    target_prf.build(sdb,
                     matrix_offset=-450,
                     rr_file='${LIB}/blosum62.sim.mat',
                     gap_penalties_1d=(-500, -50),
                     n_prof_iterations=num_iter,
                     check_profile=True,
                     max_aln_evalue=max_eval,
                     gaps_in_target=gaps)

    #-- Write out the profile in text format
    target_prf.write(file=working_dir + '/' + target_name + '_profile.prf',
                     profile_format='TEXT')

    #-- Convert the profile back to alignment format
    target_aln = target_prf.to_alignment()

    #-- Write out the alignment file
    target_aln.write(file=working_dir + '/' + target_name + '_profile.ali',
                     alignment_format='PIR')
    #CLEAN UP
    del sdb, target_aln, target_prf, env

    #Read the name of the templates
    templates = []
    txt_input = open(working_dir + '/' + target_name + '_profile.prf', 'r')
    for eachline in txt_input:
        if eachline.lstrip(' ')[0] == '#':
            continue
        entries = eachline.split()
        if len(entries) != 13:
            continue
        if entries[2] != 'X':
            continue
        name = entries[1]
        seqid = float(entries[10])
        templates.append(template(name, seqid))
    txt_input.close()
    del entries, eachline, txt_input, name, seqid

    #Select templates
    i = 0
    while (i < len(templates)):
        if( (templates[i].code in excludes) \
             or (templates[i].seqid > max_seq_id*100) \
             or (templates[i].seqid < min_seq_id*100) \
          ):
            templates.pop(i)
        else:
            i += 1

    #Download templates pdb
    for eachtemplate in templates:
        pdbname = eachtemplate.code.upper()
        url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbname
        pdb_download = open(template_dir + '/' + pdbname.lower() + '.pdb', 'w')
        pdb_download.write(urllib.urlopen(url).read())
        pdb_download.close()

    del pdb_download, pdbname, url

    #model alignment
    alnlist = []
    for i in range(len(templates)):
        env = modeller.environ()
        aln = modeller.alignment(env)
        mdl = modeller.model(env,
                             file=template_dir + '/' + templates[i].code,
                             model_segment=('FIRST:' + templates[i].chain,
                                            'LAST:' + templates[i].chain))
        aln.append_model(mdl,
                         align_codes=templates[i].name,
                         atom_files=templates[i].filename)
        aln.append(file=target_dir + '/' + target_file,
                   align_codes=target_name)

        aln.align2d()
        aln.write(file=working_dir + '/' + target_name + '-' +
                  templates[i].name + '.ali',
                  alignment_format='pir')
        aln.write(file=working_dir + '/' + target_name + '-' +
                  templates[i].name + '.pap',
                  alignment_format='pap')
        alnlist.append(working_dir + '/' + target_name + '-' +
                       templates[i].name + '.ali')
    del i, aln, env

    #Make models
    filelist = []
    for i in range(len(alnlist)):
        env = modeller.environ()
        env.io.atom_files_directory = [target_dir, working_dir, template_dir]
        a = modeller.automodel.automodel(
            env,
            alnfile=alnlist[i],
            knowns=templates[i].name,
            sequence=target_name,
            assess_methods=(modeller.automodel.assess.DOPE,
                            modeller.automodel.assess.GA341))
        a.starting_model = 1
        a.ending_model = mod_per_temp
        a.make()

        for j in range(1, mod_per_temp + 1):
            scrname = target_name + '.B9999' + str(j).zfill(4) + '.pdb'
            tgtname = models_dir + '/' + target_name + '_' + templates[
                i].code + '.B9999' + str(j).zfill(4) + '.pdb'
            os.rename(scrname, tgtname)
            filelist.append(tgtname)
    del env, a, scrname, tgtname, i, j

    os.chdir(entering_dir)
    return filelist
Пример #48
0
    """Read `profile_file` into a Python array, and add gaps corresponding to the alignment sequence `seq`."""
    #CG# read all non-comment and non-blank lines from the file:
    f = file(profile_file)
    vals = []
    for line in f:
        if not line.startswith('#') and len(line) > 10:
            spl = line.split()
            vals.append(float(spl[-1]))
    #CG# insert gaps into the profile corresponding to those in seq:
    for n, res in enumerate(seq.residues):
        for gap in range(res.get_leading_gaps()):
            vals.insert(n, None)
    #CG# add a gap at position '0', so that we effectively count from 1:
    vals.insert(0, None)
    return vals


e = modeller.environ()
a = modeller.alignment(e, file='res_align.ali')

template = get_profile('4GRV.profile', a['4GRV'])
model = get_profile('orexin.profile', a['orexin'])

#CG# plot the template and model profiles in the same plot for comparison:
pylab.figure(1, figsize=(10, 6))
pylab.xlabel('Alignment position')
pylab.ylabel('DOPE per-residue score')
pylab.plot(model, color='red', linewidth=2, label='Model')
pylab.plot(template, color='green', linewidth=2, label='Template')
pylab.legend()
pylab.savefig('dope_profile.png', dpi=65)
    # Read all non-comment and non-blank lines from the file:
    f = open(profile_file)
    vals = []
    for line in f:
        if not line.startswith('#') and len(line) > 10:
            spl = line.split()
            vals.append(float(spl[-1]))
    # Insert gaps into the profile corresponding to those in seq:
    for n, res in r_enumerate(seq.residues):
        for gap in range(res.get_leading_gaps()):
            vals.insert(n, None)
    # Add a gap at position '0', so that we effectively count from 1:
    vals.insert(0, None)
    return vals


e = modeller.environ()
a = modeller.alignment(e, file='W2T758-3n2gD.ali')

template = get_profile('3n2gD.profile', a['3n2gD'])
model = get_profile('W2T758.profile', a['W2T758'])

# Plot the template and model profiles in the same plot for comparison:
pylab.figure(1, figsize=(10, 6))
pylab.xlabel('Alignment position')
pylab.ylabel('DOPE per-residue score')
pylab.plot(model, color='red', linewidth=2, label='Model')
pylab.plot(template, color='green', linewidth=2, label='Template')
pylab.legend()
pylab.savefig('dope_profile.png', dpi=65)
Пример #50
0
    def modelMissingAtoms(self, pdbFilename, outputFilename, chain=' ', debug = False, allHydrogen = False):
        """Model missing atoms/residues in a specified PDB file using MODELLER.
      
        REQUIRED ARGUMENTS
          pdbFilename - the filename of the PDB file to model missing atoms and residues for
          outputFilename - the filename for the desired final model

        OPTIONAL ARGUMENTS
          chain - the one-character chain ID of the chain to model (default ' ')
          debug - flag to print extra debug output and leave temporary directory (default False)

        NOTES

        The specified chain from pdbFilename is processed through MODELLER to build missing
        atoms and residues specified in the SEQRES entry of the PDB file but not present in
        the PDB file.
        
        This procedure is loosely based on the protocol appearing at
        
        http://salilab.org/modeller/wiki/Missing_residues
        
        The complete sequence is read from the SEQRES fields, and the DBREF field used to
        determine the span of residues described in the SEQRES fields.  A heavy-atom topology
        as constructed in MODELLER for the complete sequence, coordinates present in the PDB file
        transferred, and the remaining heavy-atom coordinates built from ideal geometry.
        Finally, a single standard simulated-annealing-based modeling step is performed using
        the standard automodel protocol but allowing only the atoms and residues that were undefined in
        the PDB file to move.
        
        """
        
        # Ensure specified PDB file exists.
        import os.path
        if not os.path.exists(pdbFilename):
            raise ParameterException, "Specified PDB file %s not found." % pdbFilename
        
        # Append full path to pdbFilename and outputFilename
        import os.path
        pdbFilename = os.path.abspath(pdbFilename)
        outputFilename = os.path.abspath(outputFilename)
                
        # Create a temporary directory for running MODELLER.
        import tempfile
        import os.path
        tmpdir = tempfile.mkdtemp()
        if debug: print "tmpdir = %s" % tmpdir
        
        # Get the complete sequence without chain breaks from the SEQRES/DBREF fields of the source PDB file.
        first_residue_id, complete_sequence = self.getCompleteSequence(pdbFilename, chain)
        nresidues = len(complete_sequence)
        last_residue_id = first_residue_id + nresidues - 1
        
        # Get the sequence of residues that are at least partially present in the PDB file as a dictionary.
        # present_sequence_dict[residue_id] is the one-letter-code of the residue residue_id, if there are any ATOM records for this residue.
        present_sequence_dict = self.getPresentSequence(pdbFilename, chain)
                
        # Generate alignment of the template sequence (residues for which any coordinates are defined) against the target (complete sequence from SEQRES/DBREF)
        present_sequence = ""
        for residue_id in range(first_residue_id, first_residue_id + nresidues):
            if present_sequence_dict.has_key(residue_id):
                # TODO: Check integrity against complete_sequence.            
                present_sequence += present_sequence_dict[residue_id]
            else:
                present_sequence += '-'

        # Change working directory to temporary directory.
        import os
        olddir = os.getcwd()
        os.chdir(tmpdir)

        # Generate alignment file for MODELLER.
        import os
        alignment_filename = os.path.join(tmpdir, 'model.ali')
        alignment_file = open(alignment_filename, 'w')
        print >> alignment_file, ">P1;%s" % "template"
        print >> alignment_file, "%s:%s:%d:%s:%d:%s:%s:%s:%s:%s" % ( "structure", pdbFilename, min(present_sequence_dict.keys()), chain, max(present_sequence_dict.keys()), chain, " ", " ", " ", " " )
        print >> alignment_file, "%s*" % present_sequence
        print >> alignment_file, ""    
        print >> alignment_file, ">P1;%s" % "target"
        print >> alignment_file, "%s:%s:%d:%s:%d:%s:%s:%s:%s:%s" % ( "sequence", "target", first_residue_id, chain, last_residue_id, chain, " ", " ", " ", " " )
        print >> alignment_file, "%s*" % complete_sequence
        alignment_file.close()
        if debug:
            import commands
            print "alignment file:"
            print commands.getoutput('cat %(alignment_filename)s' % vars())
        
        # Call MODELLER to generate topology, transfer coordinates, and build from internal coordinates.
        import modeller
        import modeller.automodel
        
        # Create a new environemnt.
        env = modeller.environ()
        
        # Specify the topology and parameters to use.
        # TODO: Is this necessary, or can we rely on the defaults?
        env.libs.topology.read(file='$(LIB)/top_heav.lib')
        env.libs.parameters.read(file='$(LIB)/par.lib')
        
        # Read in alignment.
        aln = modeller.alignment(env)
        print alignment_filename
        aln.append(file=alignment_filename, align_codes='all')
        
        # Create a model.
        model = modeller.model(env)
        
        # Generate the topology from the target sequence.
        model.generate_topology(aln['target'])
        
        # Transfer defined coordinates from template.
        model.transfer_xyz(aln)
        
        # Determine which atoms are undefined because they are missing in the template, and create a selection from them.
        missing_atom_indices = []
        for atom_index in range(len(model.atoms)):
            atom = model.atoms[atom_index]
            if atom.x == -999:
                missing_atom_indices.append(atom_index)
                
        # DEBUG: Write model coordinates to a PDB file.
        model.write(file=os.path.join(tmpdir,'transferred.pdb'))
        
        # Build the remaining undefined atomic coordinates from ideal internal coordinates stored in residue topology files.
        model.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
        
        # DEBUG: Write model coordinates to a PDB file.
        if debug: model.write(file=os.path.join(tmpdir,'built.pdb'))
        
        # Override the 'select_atoms' routine in the 'automodel' class to select only the atoms with undefined atomic coordinates in template PDB.
        if (allHydrogen):
            class mymodel(modeller.automodel.allhmodel):
                def select_atoms(self):
                    missing_atoms = modeller.selection()
                    for atom_index in missing_atom_indices:
                        missing_atoms.add(self.atoms[atom_index])
                    return missing_atoms
        else:
            class mymodel(modeller.automodel.automodel):
                def select_atoms(self):
                    missing_atoms = modeller.selection()
                    for atom_index in missing_atom_indices:
                        missing_atoms.add(self.atoms[atom_index])
                    return missing_atoms

        # Ensure selected atoms feel all nonbonded interactions.
        env.edat.nonbonded_sel_atoms = 1
        
        # Set up automodel.
        #a = mymodel(env, inifile='built.pdb', alnfile=alignment_filename, knowns='template', sequence='target')
        a = mymodel(env, alnfile=alignment_filename, knowns='template', sequence='target')
        
        # Set parameters for automodel.
        # Build only one model.
        # TODO: Have more models built by default (perhaps 50?)
        a.starting_model = 1
        a.ending_model = 1
        
        # Generate model(s).
        a.make()

        # TODO: Rescore models and select the best one.
        # For now, we only use the first model.
        final_model_summary = a.outputs[0]
        
        # Copy resulting model to desired output PDB filename.
        import shutil
        shutil.copy(final_model_summary['name'], outputFilename)
                
        # Restore working directory.
        os.chdir(olddir)
        
        # Clean up temporary directory.
        if (not debug):
            for filename in os.listdir(tmpdir):
                os.remove(os.path.join(tmpdir,filename))
            os.rmdir(tmpdir)

        return
Пример #51
0
        '-t',
        '--target',
        help='mobile pdb structure file to transfer sequence on',
        type=str)
    parser.add_argument(
        '-r',
        '--ref',
        help='reference pdb structure file with sequence to transfer',
        type=str)
    args = parser.parse_args()

    env = modeller.environ()
    lib = '/usr/lib/modeller9.23/modlib'
    env.libs.topology.read(file=f'{lib}/top_heav.lib')
    env.libs.parameters.read(file=f'{lib}/par.lib')
    aln = modeller.alignment(env)

    target = modeller.model(env, file=args.target)
    target_name = os.path.basename(args.target).split('.')[0]
    aln.append_model(target, align_codes=target_name)

    ref = modeller.model(env, file=args.ref)
    ref_name = os.path.basename(args.ref).split('.')[0]
    aln.append_model(ref, align_codes=ref_name)

    aln.align()
    # aln.align3d()
    alnfile = f'{target_name}_{ref_name}.seq'
    aln.write(file=alnfile)

    mdl = modeller.model(env)
Пример #52
0
def _build_models(structfname, basedir, nmodels, refstructure, verbose,
                  seq_rep_list):
    """
    Builds replicate structural models of a list of protein sequences.

    seq_rep_list is a list of (sequence,replicates) pairs, giving each
    sequence object to be modeled and the number of replicates needed for
    that sequence object

    SIDE EFFECT: models are placed in basedir/sequence_id directory
    """

    # set up path links, assuming current working directory
    workingdir  = os.getcwd()
    structfname = os.path.normpath(os.path.join(workingdir, structfname))
    basedir     = os.path.normpath(os.path.join(workingdir, basedir))

    # calculate total number of reps for each sequence id
    reps_per_id = {}
    for seq,reps in seq_rep_list:
        if seq.identifier in reps_per_id.keys():
            reps_per_id[seq.identifier] += reps
        else:
            reps_per_id[seq.identifier] = reps

    for seq,reps in seq_rep_list:
        # calculate some information on total reps for this id and how many
        # models to build for this particular sequence
        total_reps_needed = reps_per_id[seq.identifier]
        models_per_rep    = round(nmodels / total_reps_needed)
        if models_per_rep < 1:
            models_per_rep = 1
        mynmodels = models_per_rep * reps

        # check this sequence's existing structures; bail out if done
        mindex = 1
        outdir = basedir + os.path.sep + seq.identifier
        if not os.path.isdir(outdir):
            os.makedirs(outdir)
        else:
            existing_fnames = [ x.split(os.path.sep)[-1] for x in \
                                glob.glob(outdir + os.path.sep + 'rep*.pdb') ]
            existing_reps = [ int(x.split('rep')[1].split('.pdb')[0]) for \
                              x in existing_fnames]
            if existing_reps:
                existing_reps.sort(reverse=True)
                last_rep = existing_reps[0]
                if last_rep < total_reps_needed:
                    mindex = existing_reps[0] + 1
                else:
                    continue

        # set up temporary directory for modeller execution
        with tempfile.TemporaryDirectory(prefix=dnameprefix) as tempdir:
            os.chdir(tempdir)

            # set up modeller environment
            if verbose:
                modeller.log.verbose()
            else:
                modeller.log.none()
            env = modeller.environ()
            env.io.atom_files_directory = [workingdir]

            # set up complete alignment
            aln = modeller.alignment(env)
            aln.append(file=structfname, remove_gaps=False)
            knowns = [s.code for s in aln]
            aln.append_sequence(seq.sequence)
            aln[-1].code = seq.identifier

            # write alignment - modeller doesn't like alignment in memory
            full_aln_fname = 'structaligntemp.ali'
            aln.write(full_aln_fname, alignment_format='PIR')

            # set up model assessments
            ASSESS_METHODS = [modeller.automodel.assess.DOPE,
                              modeller.automodel.assess.DOPEHR]
            ASSESS_NAMES   = ["DOPE score", "DOPE-HR score"]

            a = modeller.automodel.dope_loopmodel(env, alnfile=full_aln_fname,
                                                  knowns=knowns,
                                                  sequence=seq.identifier,
                                                  assess_methods=ASSESS_METHODS)
            a.starting_model = 1          # index of the first model
            a.ending_model   = mynmodels  # index of the last model
            # adjust optimization parameters
            a.library_schedule = modeller.automodel.autosched.slow
            a.md_level         = modeller.automodel.refine.slow
            a.make()  # do homology modeling

            # evaluate structural models
            ok_models = [ x for x in a.outputs if x["failure"] is None ]
            score_results = []

            for data in ok_models:
                fname  = data["name"]
                myscrs = []
                for score_name in ASSESS_NAMES:
                    myscrs.append(data[score_name])
                ave_score = sum(myscrs) / len(myscrs)
                score_results.append((ave_score, fname, myscrs))

            score_results.sort()
            best_models = score_results[:reps]
            rest_models = score_results[reps:]

            # map to reference structure
            refseq = aln[0]
            if refstructure:
                refseq = aln[refstructure]

            refcode  = refseq.code
            refpdbf  = refseq.atom_file
            refrange = refseq.range
            refmdl   = modeller.model(env, file=refpdbf, model_segment=refrange)
            refpos   = modeller.selection(refmdl).only_atom_types('CA')

            # get best models
            final_files = []
            for (score,infname,scores) in best_models:
                outfname = outdir + os.path.sep + 'rep{}.pdb'.format(mindex)
                final_files.append(outfname)

                # build alignment
                myaln = modeller.alignment(env)
                myaln.append(file=structfname, align_codes=(refcode),
                             remove_gaps=False)
                myaln.append_sequence(seq.sequence)
                myaln[-1].code      = seq.identifier
                myaln[-1].atom_file = infname

                # read pdb file
                mymodel = modeller.model(env, file=infname)
                # translate to reference coordinates
                r = refpos.superpose(mymodel, myaln)
                # write translated pdb file
                mymodel.write(file=outfname)

                mindex += 1

            os.chdir(workingdir)

    return
Пример #53
0
def peptide_rebuild_modeller(name, selection='all', hetatm=0, sequence=None,
        nmodels=1, hydro=0, quiet=1, *, _self=cmd):
    '''
DESCRIPTION

    Remodel the given selection using modeller. This is useful for example to
    build incomplete sidechains. More complicated modelling tasks are not
    the intention of this simple interface.

    Side effects: Alters "type" property for MSE residues in selection
    (workaround for bug #3512313).

USAGE

    peptide_rebuild_modeller name [, selection [, hetatm [, sequence ]]]

ARGUMENTS

    name = string: new object name

    selection = string: atom selection

    hetatm = 0/1: read and model HETATMs (ligands) {default: 0}

    sequence = string: if provided, use this sequence instead of the
    template sequence {default: None}

    nmodels = int: number of models (states) to generate {default: 1}
    '''
    import modeller
    from modeller.automodel import automodel, allhmodel

    import tempfile, shutil, os
    _assert_package_import()
    from .editing import update_identifiers

    nmodels, hetatm, quiet = int(nmodels), int(hetatm), int(quiet)

    if int(hydro):
        automodel = allhmodel

    tempdir = tempfile.mkdtemp()
    pdbfile = os.path.join(tempdir, 'template.pdb')
    alnfile = os.path.join(tempdir, 'aln.pir')

    cwd = os.getcwd()
    os.chdir(tempdir)

    if not quiet:
        print(' Notice: PWD=%s' % (tempdir))

    try:
        modeller.log.none()
        env = modeller.environ()
        env.io.hetatm = hetatm

        # prevent PyMOL to put TER records before MSE residues (bug #3512313)
        _self.alter('(%s) and polymer' % (selection), 'type="ATOM"')

        _self.save(pdbfile, selection)
        mdl = modeller.model(env, file=pdbfile)

        aln = modeller.alignment(env)
        aln.append_model(mdl, align_codes='foo', atom_files=pdbfile)

        # get sequence from non-present atoms
        if not sequence and _self.count_atoms('(%s) & !present' % (selection)):
            sequence = get_seq(selection)

        if sequence:
            aln.append_sequence(sequence)
            aln[-1].code = 'bar'
            aln.malign()
        aln.write(alnfile)

        a = automodel(env, alnfile=alnfile, sequence=aln[-1].code,
                knowns=[s.code for s in aln if s.prottyp.startswith('structure')])
        a.max_ca_ca_distance = 30.0

        if nmodels > 1:
            a.ending_model = nmodels
            from multiprocessing import cpu_count
            ncpu = min(cpu_count(), nmodels)
            if ncpu > 1:
                from modeller import parallel
                job = parallel.job(parallel.local_slave()
                        for _ in range(ncpu))
                a.use_parallel_job(job)

        a.make()

        for output in a.outputs:
            _self.load(output['name'], name, quiet=quiet)
    finally:
        os.chdir(cwd)
        shutil.rmtree(tempdir)

    _self.align(name, selection, cycles=0)
    if not sequence:
        update_identifiers(name, selection, _self=_self)

    if not quiet:
        print(' peptide_rebuild_modeller: done')
def align_res_nums(apo_pdb_file, apo_pdb_id, apo_chain_id, holo_pdb_file,
                   holo_pdb_id, holo_chain_id):
    env = modeller.environ()
    aln = modeller.alignment(env)
    apo_model = modeller.model(env,
                               file=apo_pdb_file,
                               model_segment=("FIRST:%s" % (apo_chain_id),
                                              "LAST:%s" % (apo_chain_id)))
    aln.append_model(apo_model,
                     atom_files=apo_pdb_id,
                     align_codes="%s%s" % (apo_pdb_id, apo_chain_id))
    holo_model = modeller.model(env,
                                file=holo_pdb_file,
                                model_segment=("FIRST:%s" % (holo_chain_id),
                                               "LAST:%s" % (holo_chain_id)))
    aln.append_model(holo_model,
                     atom_files=holo_pdb_id,
                     align_codes="%s%s" % (holo_pdb_id, holo_chain_id))
    aln.salign()
    alignment_filename = "%s%s_%s%s_salign_output.ali" % (
        apo_pdb_id, apo_chain_id, holo_pdb_id, holo_chain_id)
    aln.write(file=alignment_filename, alignment_format="PIR")
    with open(alignment_filename, "r") as alignment_opened:
        alignment_lines = alignment_opened.readlines()
        # Ignore the header lines.  The format requires a 2-line header; there may be a blank line before this.
        if alignment_lines[0][0] == ">":
            line_index = 2
        else:
            line_index = 3
        apo_sequence_aligned = ""
        while True:
            next_line = alignment_lines[line_index].strip()
            apo_sequence_aligned += next_line
            if next_line[len(next_line) - 1] == "*":
                apo_sequence_aligned = apo_sequence_aligned[:-1]
                break
            line_index += 1
        if alignment_lines[line_index + 1][0] == ">":
            line_index += 3
        else:
            line_index += 4
        holo_sequence_aligned = ""
        while True:
            next_line = alignment_lines[line_index].strip()
            holo_sequence_aligned += next_line
            if next_line[len(next_line) - 1] == "*":
                holo_sequence_aligned = holo_sequence_aligned[:-1]
                break
            line_index += 1
    os.remove(alignment_filename)
    apo_pdb_res_numbers = get_numbers_from_pdb(apo_pdb_file, apo_chain_id)
    holo_pdb_res_numbers = get_numbers_from_pdb(holo_pdb_file, holo_chain_id)
    dict_key_apo_val_holo = {}
    holo_residues_passed = 0  # incremented whenever the iteration reaches a spot in the alignment where the holo sequence has a residue.
    apo_residues_passed = 0
    for i in range(len(holo_sequence_aligned)):
        if (apo_sequence_aligned[i] != "-") and (holo_sequence_aligned[i] !=
                                                 "-"):
            #print(len(apo_pdb_res_numbers), apo_residues_passed, len(holo_pdb_res_numbers), holo_residues_passed)
            #print(apo_pdb_res_numbers, holo_pdb_res_numbers)
            #print(len(apo_sequence_aligned), len(holo_sequence_aligned), "len")
            #print(apo_sequence_aligned, holo_sequence_aligned)
            dict_key_apo_val_holo[apo_pdb_res_numbers[
                apo_residues_passed]] = holo_pdb_res_numbers[
                    holo_residues_passed]
            holo_residues_passed += 1
            apo_residues_passed += 1
        elif (apo_sequence_aligned[i] != "-") and (holo_sequence_aligned[i]
                                                   == "-"):
            dict_key_apo_val_holo[
                apo_pdb_res_numbers[apo_residues_passed]] = "NA"
            apo_residues_passed += 1
        elif (apo_sequence_aligned[i]
              == "-") and (holo_sequence_aligned[i] != "-"):
            holo_residues_passed += 1
    print(dict_key_apo_val_holo)
    print(apo_sequence_aligned)
    print(holo_sequence_aligned)
    return dict_key_apo_val_holo
            vals.append(float(spl[-1]))
    # Insert gaps into the profile corresponding to those in seq:
    for t, res in r_enumerate(seq.residues):
        for gap in range(res.get_leading_gaps()):
            vals.insert(t, None)
    # Add a gap at position '0', so that we effectively count from 1:
    vals.insert(0, None)
    return vals


#* ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Plot all DOPE scores ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *#

import modeller

k = modeller.environ()
y = modeller.alignment(k, file=str(iD) + '-mult.ali')

model = get_profile(str(iD) + '_multi_model.profile',
                    y[str(iD)])  #   multi model
template = get_profile(str(iD) + '_single_model.profile',
                       y[str(iD)])  #  single model
loop_refined = get_profile(
    str(iD) + '_multi_model_loop_refined.profile', y[str(iD)])

#* Plot the template and model profiles in the same plot for comparison *#

pylab.figure(1, figsize=(10, 6))
pylab.xlabel('Alignment position')
pylab.ylabel('DOPE per-residue score')

pylab.plot(model,
Пример #56
0
    vals = []
    for line in f:
        if not line.startswith('#') and len(line) > 10:
            spl = line.split()
            vals.append(float(spl[-1]))
    # Insert gaps into the profile corresponding to those in seq:
    for n, res in r_enumerate(seq.residues):
        for gap in range(res.get_leading_gaps()):
            vals.insert(n, None)
    # Add a gap at position '0', so that we effectively count from 1:
    vals.insert(0, None)
    return vals


e = modeller.environ()
a = modeller.alignment(e, file='gabar_MM.01chimeric.pir')

template = get_profile('chimeric.profile', a['chimeric'])

models = [1, 34, 53, 60, 42, 24, 49]
models_mod = []

for model in models:
    model_mod = get_profile('model_{0:02d}.profile'.format(model),
                            a['gabar_MM.01'])
    models_mod.append(model_mod)

    profile_mod = open('dope_profile_model_{0:02d}.dat'.format(model), 'w')
    for i, res in enumerate(model_mod):
        profile_mod.write(str(i) + '   ' + str(res) + '\n')
    profile_mod.close()
Пример #57
0
    def analyse_target_template_pairs(self):
        """
        Check the compatibility between the target and model sequences and then
        extracts delta_d data from the target-template pairs in a series .csv files.
        These HDDR parameters files will be used by the 'rebuild_restraints_file'
        method of the 'Automodel_custom_restraints' class to edit the default
        restraints file of MODELLER.
        """

        aln = self.read_alignment()

        #------------------------------------------
        # Compare the model and target sequences. -
        #------------------------------------------

        # Get the model sequence.
        modeller_mod_seq = aln[self.sequence]
        if len(modeller_mod_seq.chains) > 1:
            raise NotImplementedError(
                "Optimal restraints with multiple chain models are currently not implemented in altMOD."
            )
        mod_seq = "".join([r.code for r in modeller_mod_seq.residues])

        # Get the target sequence.
        modeller_tar_obj = complete_pdb(self.env, self.target_filepath)
        if len(modeller_tar_obj.chains) > 1:
            if self.target_chain == None:
                raise ValueError(
                    "The selected target structure has more than chain (%s). In order to extract optimal restraints, provide to the 'set_target_structure' the chain corresponding to the model."
                    % len(modeller_tar_obj.chains))
            modeller_tar_obj = modeller_tar_obj.chains[self.target_chain]
        tar_seq = "".join([r.code for r in modeller_tar_obj.residues])

        # Check if they are compatible (by aligning them through salign).
        new_aln = alignment(self.env)
        new_aln.append_sequence(tar_seq)
        new_aln.append_sequence(mod_seq)
        new_aln.salign(gap_penalties_1d=(
            -900.0,
            -50.0))  # The as1.sim.mat similarity matrix is used by default.
        tar_aliseq = "".join([
            _get_modeller_res_code(p.get_residue(new_aln[0]))
            for p in new_aln.positions
        ])
        mod_aliseq = "".join([
            _get_modeller_res_code(p.get_residue(new_aln[1]))
            for p in new_aln.positions
        ])
        '''
        import random
        gr = lambda i: i if random.random() > 0.3 else random.choice("QWERTYIPASDFGHKLCVNM" + "-"*5)
        mod_aliseq = "".join([gr(i) for i in mod_aliseq])
        print (mod_aliseq)
        '''

        # Computes the sequence identity between the model and target sequences.
        matches_count = 0
        identities_count = 0
        for mod_p, tar_p in zip(mod_aliseq, tar_aliseq):
            if mod_p != "-" and tar_p != "-":
                if mod_p == tar_p:
                    identities_count += 1
                matches_count += 1
        mod_tar_seqid = identities_count / float(matches_count)

        # Allows only a small fraction of mismatches.
        if mod_tar_seqid < self.mod_tar_seqid_threshold:
            message = "The target and model sequence do not correspond:\n* Tar: %s\n* Mod: %s" % (
                tar_aliseq, mod_aliseq)
            raise ValueError(message)

        # Find the correspondance between the model and target residues.
        mod_c = 0
        tar_c = 0
        mod_tar_res_dict = {}
        for mod_pos, tar_pos in zip(mod_aliseq, tar_aliseq):
            if mod_pos != "-" and tar_pos != "-":
                mod_tar_res_dict[mod_c] = tar_c
            if mod_pos != "-":
                mod_c += 1
            if tar_pos != "-":
                tar_c += 1

        #---------------------------------------------
        # Analyse each of the target-template pairs. -
        #---------------------------------------------

        template_filepaths = self._get_template_filepaths(aln)

        for tem_idx, tem_name in enumerate(self.knowns):

            print("\n* Analysing target-tem_%s (%s) pair." %
                  (tem_idx, tem_name))
            t1 = time.time()

            modeller_tem_seq = aln[tem_name]

            # Get the model-template matches from the 'Alignment' object from MODELLER (here, match
            # is defined as any couple of aligned residue). Each match is a tuple containing two
            # 'Residue' objects from MODELLER (the first from the template, the second from the
            # model).
            matches = []
            matches_dict = {}
            mod_c = 0
            for pos in aln.positions:
                mod_pos = pos.get_residue(modeller_mod_seq)
                tem_pos = pos.get_residue(modeller_tem_seq)
                if mod_pos != None and tem_pos != None:
                    matches.append((tem_pos, mod_pos))
                    matches_dict[mod_pos.index] = (mod_pos, tem_pos)
                if mod_pos != None:
                    # Assign an index (starting from 0) to the model residue.
                    mod_pos._id = mod_c
                    mod_c += 1
            '''
            for res in modeller_mod_seq.residues:
                print res, res.index
            '''

            # Iterate through the HDDRs found in the MODELLER restraints file.
            results_list = []
            for atm_1, atm_2 in self.hddr_dict["all"]:

                # Get atom types of the atoms engaged in the HDDRs.
                atm_1_type = self.atm_type_dict[atm_1]
                atm_2_type = self.atm_type_dict[atm_2]

                # Get the model and the equivalent template residues.
                try:
                    mod_res_1, tem_res_1 = matches_dict[
                        self.atm_to_res_dict[atm_1]]
                    mod_res_2, tem_res_2 = matches_dict[
                        self.atm_to_res_dict[atm_2]]
                except KeyError:
                    continue

                # Check if the model residue is also present in the target.
                if not mod_res_1._id in mod_tar_res_dict:
                    continue
                if not mod_res_2._id in mod_tar_res_dict:
                    continue

                # Get the target residues corresponding to the model residues.
                tar_res_1 = modeller_tar_obj.residues[mod_tar_res_dict[
                    mod_res_1._id]]
                tar_res_2 = modeller_tar_obj.residues[mod_tar_res_dict[
                    mod_res_2._id]]

                # Get the target and template residues.
                tem_atm_1 = get_modeller_atom(tem_res_1, atm_1_type)
                tem_atm_2 = get_modeller_atom(tem_res_2, atm_2_type)

                tar_atm_1 = get_modeller_atom(tar_res_1, atm_1_type)
                tar_atm_2 = get_modeller_atom(tar_res_2, atm_2_type)

                # Get the interatomic distances between all the heavy atoms of the two template
                # residues.
                if tem_atm_1 != None and tem_atm_2 != None:
                    grp_dt = get_modeller_dist(tem_atm_1, tem_atm_2)
                # The template residue may have different atoms with respect to the target/model residue.
                else:
                    continue

                # Get the iteratomic distances between the target heavy atoms.
                if tar_atm_1 != None and tar_atm_2 != None:
                    grp_dn = get_modeller_dist(tar_atm_1, tar_atm_2)
                else:
                    continue

                # Assigns the MODELLER code for the type of restraint.
                if atm_1_type == "CA" and atm_2_type == "CA":
                    grp_name = "9"
                elif (atm_1_type == "N"
                      and atm_2_type == "O") or (atm_1_type == "O"
                                                 and atm_2_type == "N"):
                    grp_name = "10"
                else:
                    if atm_1_type in main_chain_atoms or atm_2_type in main_chain_atoms:
                        grp_name = "23"
                    else:
                        grp_name = "26"

                # Get the delta_d value.
                grp_dd = grp_dn - grp_dt
                # if abs(grp_dd) >= self.max_delta_d_abs_val:
                #     continue

                # Prepare the main columns.
                pair_results = {
                    "RST_GRP": grp_name,
                    "GRP_DN": grp_dn,
                    "GRP_DT": grp_dt,
                    "GRP_DD": grp_dd,
                    "MOD_ATOM_TYPE_I": atm_1_type,
                    "MOD_ATOM_TYPE_J": atm_2_type,
                    "MOD_ATOM_INDEX_I": atm_1,
                    "MOD_ATOM_INDEX_J": atm_2,
                }

                # Prepare additional columns.
                base_pair_results = {
                    "MOD_RES_PDB_ID_I": mod_res_1.index,
                    "MOD_RES_PDB_ID_J": mod_res_2.index,
                    "MOD_RES_NAME_I": mod_res_1.code,
                    "MOD_RES_NAME_J": mod_res_2.code,
                    "TAR_RES_PDB_ID_I": tar_res_1.num,
                    "TAR_RES_PDB_ID_J": tar_res_2.num,
                    "TAR_RES_NAME_I": tar_res_1.code,
                    "TAR_RES_NAME_J": tar_res_2.code,
                    "TEM_RES_PDB_ID_I": tem_res_1.num,
                    "TEM_RES_PDB_ID_J": tem_res_2.num,
                    "TEM_RES_NAME_I": tem_res_1.code,
                    "TEM_RES_NAME_J": tem_res_2.code,
                }
                pair_results.update(base_pair_results)

                # Add a row in the results .csv file.
                results_list.append(pair_results)

            #-------------------------------------------------------
            # Writes a results file for each target-template pair. -
            #-------------------------------------------------------

            t2 = time.time()

            print("- It took %s." % (t2 - t1), len(results_list))
            analysis_filename = "%s_tar_tem_%s.csv" % (self.sequence, tem_idx)
            with open(analysis_filename, "w") as c_fh:
                if len(results_list) != 0:
                    column_names = list(sorted(results_list[0].keys()))
                    writer = csv.DictWriter(c_fh, fieldnames=column_names)
                    writer.writeheader()
                    for pair_results in results_list:
                        writer.writerow(pair_results)
            # Sets the custom HDDR params files of the class.
            self.hddr_params_filepaths[tem_idx] = analysis_filename
Пример #58
0
       the alignment sequence `seq`."""
    # Read all non-comment and non-blank lines from the file:
    f = open(profile_file)
    vals = []
    for line in f:
        if not line.startswith('#') and len(line) > 10:
            spl = line.split()
            vals.append(float(spl[-1]))
    # Insert gaps into the profile corresponding to those in seq:
    for n, res in r_enumerate(seq.residues):
        for gap in range(res.get_leading_gaps()):
            vals.insert(n, None)
    # Add a gap at position '0', so that we effectively count from 1:
    vals.insert(0, None)
    return vals


a = modeller.alignment(env, file='TMPRSS2_254_2a31A.ali')

template = get_profile('2a31.profile', a['2a31A'])

plt.figure(1, figsize=(10, 6))
plt.xlabel('Alignment position')
plt.ylabel('DOPE per-residue score')
plt.plot(template, color='red', linewidth=2, label='Template')
for fileName in pdb_lyst[1:]:
    model = get_profile(fileName + '.profile', a['TMPRSS2_254'])
    plt.plot(model, linewidth=2, label=fileName)
plt.legend()
plt.savefig('dope_profile_TMPRSS2_254_2a31A_single.png', dpi=100)
       the alignment sequence `seq`."""
    # Read all non-comment and non-blank lines from the file:
    f = open(profile_file)
    vals = []
    for line in f:
        if not line.startswith('#') and len(line) > 10:
            spl = line.split()
            vals.append(float(spl[-1]))
    # Insert gaps into the profile corresponding to those in seq:
    for n, res in r_enumerate(seq.residues):
        for gap in range(res.get_leading_gaps()):
            vals.insert(n, None)
    # Add a gap at position '0', so that we effectively count from 1:
    vals.insert(0, None)
    return vals


a = modeller.alignment(env, file='TMPRSS2_254-mult.ali')

plt.figure(1, figsize=(10, 6))
plt.xlabel('Alignment position')
plt.ylabel('DOPE per-residue score')
for fileName in pdb_lyst[:3]:
    model = get_profile(fileName + '.profile', a[fileName + 'A'])
    plt.plot(model, linewidth=2, label=fileName)
for fileName in pdb_lyst[3:]:
    model = get_profile(fileName + '.profile', a['TMPRSS2_254'])
    plt.plot(model, linewidth=2, label=fileName)
plt.legend()
plt.savefig('dope_profile_TMPRSS2_254_multi.png', dpi=100)