def refined_vs_notrefined(models_dir, dope_profile): """Creates a comparison energy plot between the model generated by the program and the refined one.""" env = environ() env.io.atom_files_directory = [models_dir] mdl_list = [] aln = modeller.alignment(env) code_list = [] mdl_nr_list = [] aln_nr = modeller.alignment(env) code_list_nr = [] for file in os.listdir(models_dir): name = file if name.endswith('.B'): mdl = modeller.model(env) mdl.read(file = file) code = str(file) code_list.append(code) s = selection(mdl) s.assess_dope(output='ENERGY_PROFILE NO_REPORT', file= models_dir + code + '.profile', normalize_profile=True, smoothing_window=15) mdl_list.append(mdl) aln.append_model(mdl, align_codes = code, atom_files = code) aln.write(file=dope_profile+'build_profile_ref.ali', alignment_format='PIR') else: mdl_nr = modeller.model(env) mdl_nr.read(file = file) code = str(file) print (code) code_list_nr.append(code) t = selection(mdl_nr) t.assess_dope(output='ENERGY_PROFILE NO_REPORT', file= models_dir + code + '.profile', normalize_profile=True, smoothing_window=15) mdl_nr_list.append(mdl_nr) aln_nr.append_model(mdl_nr, align_codes = code, atom_files = code) aln_nr.write(file=dope_profile+'build_profile_notref.ali', alignment_format='PIR') if len(mdl_nr_list) == len(mdl_list): for a, b, c, d in zip(mdl_nr_list, mdl_list, code_list_nr, code_list): model1 = get_profile(models_dir + c + ".profile", aln_nr[str(c)]) model2 = get_profile(models_dir + d + ".profile", aln[str(d)]) pylab.figure(1, figsize=(30,18)) pylab.xlabel('Alignment position', fontsize = 20) pylab.ylabel('DOPE per-residue score', fontsize = 20) pylab.plot(model1, color='red', linewidth=2, label='Model') pylab.plot(model2, color='green', linewidth=2, label='Optimized model') pylab.legend(fontsize = 20) pylab.savefig(dope_profile + c + '.dope_profile.jpg', dpi=100) pylab.close() return ("Comparison energy plot between refined and not refined model has been created here:\n%s\n" % (dope_profile))
def main(args): mod.log.verbose() env = mod.environ() env.io.atom_files_directory = [".", args.dir, "../" + args.dir] aln = mod.alignment(env) mdl = mod.model( env, file=args.template, model_segment=( "FIRST:" + args.chains[0].upper(), "LAST:" + args.chains[1].upper(), ), ) aln.append_model( mdl, align_codes=args.template.replace(".pdb", ""), atom_files=args.template ) sequence_file = os.path.join(args.dir, args.target) sequence_code = args.target.replace(".ali", "") aln.append(file=sequence_file, align_codes=sequence_code) aln.align2d() # perform alignment align_file = os.path.join( args.dir, sequence_code + "-" + args.template.replace(".pdb", "") ) aln.write(file=align_file + ".ali", alignment_format="PIR") # para o modeller aln.write(file=align_file + ".pap", alignment_format="PAP") # +fácil de ler # check files aln.check()
def _align_structures(structures, verbose): """Aligns structures using iterative structural alignment.""" # set up modeller environment if verbose: modeller.log.verbose() else: modeller.log.none() env = modeller.environ() aln = modeller.alignment(env) # read structures into modeller environment for (id, structure) in structures.items(): mdl = modeller.model(env, file=structure) aln.append_model(mdl, align_codes=id, atom_files=structure) # align structures using iterative structural alignment modeller.salign.iterative_structural_align(aln) # convert modeller alignment to Alignment object mod_aln_f = tempfile.NamedTemporaryFile(mode='w', prefix=fnameprefix, suffix='.ali', delete=False) mod_aln_fname = mod_aln_f.name mod_aln_f.close() aln.write(mod_aln_fname, alignment_format='PIR') alnobj = Alignment(mod_aln_fname) os.remove(mod_aln_fname) return alnobj
def DOPE_profiles_maker(temp_dir, outputs): """Creates a DOPE profile plot (.jpg) from a macrocomplex (.pdb), which has no acid nucleic chains using Modeller.""" flist = [] env = environ() env.io.atom_files_directory = [temp_dir] dl = os.listdir(temp_dir) for file in dl: if file.startswith("mod"): flist.append(file) aln = modeller.alignment(env) for file in flist: mdl = modeller.model(env) code = str(file) mdl.read(file=code, model_segment=('FIRST:@', 'END:')) aln.append_model(mdl, align_codes=code, atom_files=code) t = selection(mdl) file_dope = outputs + code + '.profile' t.assess_dope(output='ENERGY_PROFILE NO_REPORT', file=file_dope, normalize_profile=True, smoothing_window=15) model = get_profile(file_dope, aln[str(file)]) pylab.figure(1, figsize=(20, 12)) pylab.xlabel('Alignment position', fontsize=20) pylab.ylabel('DOPE per-residue score', fontsize=20) pylab.plot(model, color='green', linewidth=3, label=file[3:-4]) pylab.savefig(outputs + file[:-4] + '.dope_profile.jpg', dpi=100) pylab.close() path_img = outputs + file[:-4] + '.dope_profile.jpg' return("DOPE profile plot for model created here:\n %s\n" % (path_img))
def align_template_to_reference(msmseed, ref_msmseed): import modeller import tempfile import shutil import copy import os temp_dir = tempfile.mkdtemp() try: os.chdir(temp_dir) alignment_file = open('aln_tmp.pir','w') aln = _PIR_alignment(ref_msmseed.template_sequence, ref_msmseed.template_id, msmseed.template_sequence, msmseed.template_id) alignment_file.writelines(aln) alignment_file.close() template_file = open(msmseed.template_id + '.pdb','w') template_pdb = msmseed.template_structure template_pdb.writeFile(template_pdb.topology, template_pdb.positions, template_file) template_file.close() ref_pdb = ref_msmseed.template_structure ref_file = open(ref_msmseed.template_id + '.pdb', 'w') ref_pdb.writeFile(ref_pdb.topology, ref_pdb.positions, ref_file) ref_file.close() modeller.log.none() env = modeller.environ() env.io.atom_files_directory = temp_dir aln = modeller.alignment(env, file='aln_tmp.pir', align_codes=(ref_msmseed.template_id, msmseed.template_id)) mdl = modeller.model(env, file=ref_msmseed.template_id + '.pdb') mdl2 = modeller.model(env, file=msmseed.template_id+'.pdb') atmsel = modeller.selection(mdl).only_atom_types('CA') r = atmsel.superpose(mdl2, aln) msmseed.rmsd_to_reference = copy.deepcopy(r.rms) except Exception as e: msmseed.error_message = e.message finally: shutil.rmtree(temp_dir) return msmseed
def test_script9(self): """Test step 9 (multiple fitting)""" # Get inputs (outputs from step 8) for i in ('top', 'bottom'): shutil.copy('precalculate_results/stage8_split_density/' \ 'groel-11.5A.%s.mrc' % i, 'output') # Make sure the script runs without errors p = subprocess.check_call(['scripts/' \ 'script9_symmetric_multiple_fitting.py']) e = modeller.environ() ref = modeller.model(e, file='precalculate_results/stage9_symmetric_multiple_fitting/' \ 'model.top.0.pdb') sel = modeller.selection(ref).only_atom_types('CA') # At least one model in each ring should be close to the reference for side in ('top', 'bottom'): rms = [] for i in range(6): fname = 'output/model.%s.%d.pdb' % (side, i) m = modeller.model(e, file=fname) a = modeller.alignment(e) a.append_model(ref, align_codes='ref') a.append_model(m, align_codes='model') rms.append(sel.superpose(m, a).rms) os.unlink(fname) self.assertTrue(min(rms) < 10.0) os.unlink('output/intermediate_asmb_sols.out') for side in ('top', 'bottom'): os.unlink('output/multifit.%s.output' % side) os.unlink('output/multifit.%s.output.symm.ref' % side) os.unlink('output/multifit.%s.param' % side)
def test_feature_hbond(self): """Check hydrogen bond features""" env = self.get_environ() mlib = self.get_mdt_library() mlib.hbond_classes.read("data/atmcls-hbda.lib") donor = mdt.features.HydrogenBondDonor(mlib, mdt.uniform_bins(7, 1.0, 1.0)) accep = mdt.features.HydrogenBondAcceptor(mlib, mdt.uniform_bins(7, 1.0, 1.0)) totchg = mdt.features.HydrogenBondCharge(mlib, mdt.uniform_bins(9, 1.0, 1.0)) satisf = mdt.features.HydrogenBondSatisfaction(mlib, mdt.uniform_bins(100, 0.0, 10.0)) self.assertRaises(mdt.MDTError, mlib.hbond_classes.read, "data/atmcls-hbda.lib") m = mdt.Table(mlib, features=donor) m2 = mdt.Table(mlib, features=accep) m3 = mdt.Table(mlib, features=satisf) m4 = mdt.Table(mlib, features=totchg) aln = modeller.alignment(env, file="test/data/alignment.ali") m.add_alignment(aln) m2.add_alignment(aln) m3.add_alignment(aln) m4.add_alignment(aln) self.assertInTolerance(m[0], 295.0, 0.0005) self.assertInTolerance(m[1], 139.0, 0.0005) self.assertEqual(m[-1], 349.0) self.assertInTolerance(m2[0], 236.0, 0.0005) self.assertInTolerance(m2[1], 223.0, 0.0005) self.assertEqual(m2[-1], 168.0) self.assertInTolerance(m3[0], 1.0, 0.0005) self.assertInTolerance(m3[1], 0.0, 0.0005) self.assertEqual(m3[-1], 0.0) self.assertInTolerance(m4[0], 78.0, 0.0005) self.assertInTolerance(m4[1], 24.0, 0.0005) self.assertEqual(m4[-1], 739.0) # Exercise writing of hbond information to HDF5 files: for t in (m, m2, m3, m4): t.write_hdf5("test.hdf5") os.unlink("test.hdf5")
def test_feature_angle_type(self): """Check angle type features""" env = self.get_environ() mlib = self.get_mdt_library() mlib.angle_classes.read("data/anggrp.lib") angletype = mdt.features.AngleType(mlib) angle = mdt.features.Angle(mlib, bins=mdt.uniform_bins(288, 0.0, 0.625)) self.assertRaises(mdt.MDTError, mlib.angle_classes.read, "data/anggrp.lib") m = mdt.Table(mlib, features=angletype) m2 = mdt.Table(mlib, features=angle) aln = modeller.alignment(env, file="test/data/alignment.ali") m.add_alignment(aln) m2.add_alignment(aln) self.assertInTolerance(m[0], 7.0, 0.0005) self.assertInTolerance(m[7], 9.0, 0.0005) self.assertInTolerance(m[15], 11.0, 0.0005) self.assertEqual(m.shape, (236,)) self.assertEqual(m[-1], 0.0) self.assertInTolerance(m2[176], 48.0, 1.0005) self.assertInTolerance(m2[177], 42.0, 0.0005) self.assertInTolerance(m2[178], 38.0, 0.0005) self.assertEqual(m2.shape, (289,)) self.assertEqual(m2[-1], 0.0) # Exercise writing of angle class information to HDF5 files: m.write_hdf5("test.hdf5") os.unlink("test.hdf5")
def test_feature_residue_distance_difference(self): """Check residue-residue distance difference feature""" env = self.get_environ() mlib = self.get_mdt_library() ddist = mdt.features.ResidueDistanceDifference(mlib, bins=mdt.uniform_bins(20, -10, 1)) aln = modeller.alignment(env, file="test/data/struc-struc.ali") m = mdt.Table(mlib, features=ddist) m.add_alignment(aln) self.assertEqual(m[9], 20) self.assertEqual(m[10], 20) self.assertEqual(sum([b for b in m]), 40) self.assertEqual(m[-1], 0) # Undefined (-999) coordinates in either structure should put # features in the undefined bin oldx = aln[0].residues[0].atoms["CA"].x aln[0].residues[0].atoms["CA"].x = -999 m = mdt.Table(mlib, features=ddist) m.add_alignment(aln) self.assertEqual(m[-1], 16) aln[0].residues[0].atoms["CA"].x = oldx aln[1].residues[0].atoms["CA"].x = -999 m = mdt.Table(mlib, features=ddist) m.add_alignment(aln) self.assertEqual(m[-1], 16)
def test_feature_dihedral_type(self): """Check dihedral type features""" env = self.get_environ() mlib = self.get_mdt_library() mlib.dihedral_classes.read("data/impgrp.lib") dihedtype = mdt.features.DihedralType(mlib) dihedral = mdt.features.Dihedral(mlib, bins=mdt.uniform_bins(288, -180, 1.25)) self.assertRaises(mdt.MDTError, mlib.dihedral_classes.read, "data/impgrp.lib") m = mdt.Table(mlib, features=dihedtype) m2 = mdt.Table(mlib, features=dihedral) aln = modeller.alignment(env, file="test/data/alignment.ali") m.add_alignment(aln) m2.add_alignment(aln) self.assertInTolerance(m[0], 7.0, 0.0005) self.assertInTolerance(m[2], 9.0, 0.0005) self.assertInTolerance(m[4], 11.0, 0.0005) self.assertEqual(m.shape, (79,)) self.assertEqual(m[-1], 0.0) self.assertInTolerance(m2[143], 60.0, 1.0005) self.assertInTolerance(m2[144], 53.0, 1.0005) self.assertInTolerance(m2[145], 24.0, 0.0005) self.assertEqual(m2.shape, (289,)) self.assertEqual(m2[-1], 0.0) # Exercise writing of dihedral class information to HDF5 files: m.write_hdf5("test.hdf5") os.unlink("test.hdf5")
def test_disulfide(self): """Test handling of disulfide bonds""" mlib = self.get_all_libraries() bsep = mdt.features.AtomBondSeparation(mlib, bins=mdt.uniform_bins(20, 0, 1.0)) bsep_ss = mdt.features.AtomBondSeparation(mlib, bins=mdt.uniform_bins(20, 0, 1.0), disulfide=True) env = self.get_environ() mdl = modeller.model(env) mdl.build_sequence('CC') # When SG-SG distance is small enough, an extra bond # (separation feature = 1) should be detected, but only with # disulfide=True for (dist, num) in [(2.6, 11.0), (2.4, 12.0)]: sg1 = mdl.residues[0].atoms['SG'] sg2 = mdl.residues[1].atoms['SG'] sg1.x = sg1.y = sg1.z = 0. sg2.x = sg2.y = 0. sg2.z = dist a = modeller.alignment(env) a.append_model(mdl, atom_files='test', align_codes='test') m = mdt.Table(mlib, features=bsep) m.add_alignment(a, residue_span_range=(-999,0,0,999)) self.assertEqual(m[1], 11.0) m2 = mdt.Table(mlib, features=bsep_ss) m2.add_alignment(a, residue_span_range=(-999,0,0,999)) self.assertEqual(m2[1], num)
def test_feature_sidechain_biso(self): """Check average sidechain Biso feature""" env = self.get_environ() mlib = self.get_mdt_library() self.assertRaises(ValueError, mdt.features.SidechainBiso, mlib, bins=mdt.uniform_bins(5, 0, 10), protein=3) sidechain_biso = mdt.features.SidechainBiso(mlib, bins=mdt.uniform_bins(5, 0, 10)) mdl = modeller.model(env) mdl.build_sequence("A") aln = modeller.alignment(env) aln.append_model(mdl, align_codes="test") s = aln[0] # Mainchain atom Biso should be ignored: for mainchain in ("N:1", "C:1", "O:1", "OXT:1", "CA:1"): s.atoms[mainchain].biso = 1000 for (biso, bin) in ( (22, 2), (32, 3), # Map regular values to bins (0, -1), # Zero Biso should be "undefined" (1, 3), ): # Biso < 2 is multiplied by 4pi^2 s.atoms["CB:1"].biso = biso m = mdt.Table(mlib, features=sidechain_biso) m.add_alignment(aln) self.assertEqual(m.shape, (6,)) self.assertEqual(m.sum(), 1) self.assertEqual(m[bin], 1)
def test_feature_iresol(self): """Check resolution features""" env = self.get_environ() mlib = self.get_mdt_library() bins = mdt.uniform_bins(3, -1.0, 1.5) xray0 = mdt.features.XRayResolution(mlib, bins, protein=0) xray0_nmr = mdt.features.XRayResolution(mlib, bins, protein=0, nmr=1.0) xray1 = mdt.features.XRayResolution(mlib, bins, protein=1) xray2 = mdt.features.XRayResolution(mlib, bins, protein=2) # Check valid range for protein argument for p in (-1, 3): self.assertRaises(ValueError, mdt.features.XRayResolution, mlib, bins, protein=p) m = self.get_test_mdt(mlib, features=xray0) m2 = self.get_test_mdt(mlib, features=xray1) self.assertEqual(m.shape, (4,)) self.assertEqual([b for b in m], [0.0, 1.0, 1.0, 0.0]) self.assertMDTDataEqual(m, m2) for (code, feat, bin) in ( ("bin0", xray0, 0), ("bin0", xray0_nmr, 1), ("bin1", xray0, 1), ("bin2", xray0, 2), ("undef1", xray0, 3), ("undef2", xray0, 3), ): m = mdt.Table(mlib, features=feat) aln = modeller.alignment(env, file="test/data/resol.ali", align_codes=code) m.add_alignment(aln) self.assertEqual(m[bin], 1.0)
def test_feature_resind_diff(self): """Test the residue index difference feature""" env = self.get_environ() mlib = self.get_mdt_library() diff = mdt.features.ResidueIndexDifference(mlib, bins=mdt.uniform_bins(21, -10, 1)) absdiff = mdt.features.ResidueIndexDifference(mlib, absolute=True, bins=mdt.uniform_bins(21, -10, 1)) aln = modeller.alignment(env, file="test/data/alignment.ali", align_codes="5fd1") m1 = mdt.Table(mlib, features=diff) m2 = mdt.Table(mlib, features=absdiff) self.assertEqual(m1.symmetric, False) self.assertEqual(m2.symmetric, True) m1.add_alignment(aln, residue_span_range=(-999, -2, 2, 999)) m2.add_alignment(aln, residue_span_range=(-999, -2, 2, 999)) self.assertEqual(m1.sum(), 10920) self.assertEqual(m2.sum(), 5460) # span range should result in 0, +/- 1 bins being zero: for m in (m1, m2): self.assertEqual(m[9], 0.0) self.assertEqual(m[10], 0.0) self.assertEqual(m[11], 0.0) # Non-absolute feature should have other bins symmetrically distributed: for i in range(9): self.assertEqual(m1[i], m[-2 - i]) # Absolute feature should have no negative values: for i in range(9): self.assertEqual(m2[i], 0.0)
def build_mdt_from_model(self, mlib, features, mdl, **keys): """Build a simple test MDT for a given model""" env = self.get_environ() m = mdt.Table(mlib, features=features) a = modeller.alignment(env) a.append_model(mdl, atom_files="test", align_codes="test") m.add_alignment(a, **keys) return m
def test_feature_triplet_type(self): """Check triplet type features""" env = self.get_environ() mlib = self.get_mdt_library() mlib.tuple_classes.read("data/trpcls.lib") tuple_type = mdt.features.TupleType(mlib) tuple_type2 = mdt.features.TupleType(mlib, pos2=True) tuple_dist = mdt.features.TupleDistance(mlib, bins=mdt.uniform_bins(9, 2.0, 0.2)) tuple_angle1 = mdt.features.TupleAngle1(mlib, bins=mdt.uniform_bins(6, 0, 30.0)) tuple_dihed1 = mdt.features.TupleDihedral1(mlib, bins=mdt.uniform_bins(6, -180, 60.0)) tuple_dihed2 = mdt.features.TupleDihedral2(mlib, bins=mdt.uniform_bins(6, -180, 60.0)) tuple_dihed3 = mdt.features.TupleDihedral3(mlib, bins=mdt.uniform_bins(6, -180, 60.0)) self.assertRaises(mdt.MDTError, mlib.tuple_classes.read, "data/trpcls.lib") m1 = mdt.Table(mlib, features=tuple_type) m2 = mdt.Table(mlib, features=tuple_type2) m3 = mdt.Table(mlib, features=tuple_dist) m4 = mdt.Table(mlib, features=tuple_angle1) m5 = mdt.Table(mlib, features=tuple_dihed1) m6 = mdt.Table(mlib, features=tuple_dihed2) m7 = mdt.Table(mlib, features=tuple_dihed3) aln = modeller.alignment(env, file="test/data/tiny.ali") for m in (m1, m2, m3, m4, m5, m6, m7): m.add_alignment(aln, residue_span_range=(-9999, 0, 0, 9999)) self.assertInTolerance(m1[0], 1.0, 0.0005) self.assertInTolerance(m1[1], 0.0, 0.0005) self.assertInTolerance(m1[2], 1.0, 0.0005) self.assertEqual(m1.shape, (236,)) self.assertEqual(m1[-1], 0.0) self.assertInTolerance(m2[0], 60.0, 0.0005) self.assertInTolerance(m2[1], 0.0, 0.0005) self.assertInTolerance(m2[2], 60.0, 0.0005) self.assertEqual(m2.shape, (236,)) self.assertEqual(m2[-1], 0.0) self.assertInTolerance(m3[0], 0.0, 0.0005) self.assertInTolerance(m3[1], 82.0, 0.0005) self.assertInTolerance(m3[2], 226.0, 0.0005) self.assertEqual(m3.shape, (10,)) self.assertInTolerance(m3[-1], 3018.0, 0.0005) self.assertInTolerance(m4[0], 479.0, 0.0005) self.assertInTolerance(m4[1], 806.0, 0.0005) self.assertInTolerance(m4[2], 471.0, 0.0005) self.assertEqual(m4.shape, (7,)) self.assertEqual(m4[-1], 0.0) self.assertInTolerance(m5[0], 556.0, 0.0005) self.assertInTolerance(m5[1], 642.0, 0.0005) self.assertInTolerance(m5[2], 470.0, 6.0005) self.assertEqual(m5.shape, (7,)) self.assertInTolerance(m5[-1], 180.0, 0.0005) self.assertInTolerance(m6[0], 661.0, 0.0005) self.assertInTolerance(m6[1], 520.0, 0.0005) self.assertInTolerance(m6[2], 545.0, 6.0005) self.assertEqual(m6.shape, (7,)) self.assertInTolerance(m6[-1], 112.0, 0.0005) self.assertInTolerance(m7[0], 661.0, 0.0005) self.assertInTolerance(m7[1], 520.0, 0.0005) self.assertInTolerance(m7[2], 545.0, 6.0005) self.assertEqual(m7.shape, (7,)) self.assertInTolerance(m7[-1], 112.0, 0.0005)
def test_integrative_modeling(self): """Test the entire integrative modeling run""" import modeller # Compile the clustering program subprocess.check_call(['gfortran', 'cluster.f', 'u3best.f', '-o', 'cluster.x'], cwd='integrative_modeling/bin') # Run sampling subprocess.check_call(['./run_modeling.py'], cwd='integrative_modeling') # Analysis subprocess.check_call(['bin/get_frames.sh'], cwd='integrative_modeling') # Make sure that at least two of the three "known good" clusters # are reproduced clusters = glob.glob('integrative_modeling/clustering/clus.*.pdb') clusters = [x for x in clusters if '-' not in x] exp_clusters = glob.glob('model_refinement/cluster*/model.pdb') env = modeller.environ() n_cluster = 0 rms = [] cluster_match = [0] * len(clusters) exp_cluster_match = [0] * len(exp_clusters) # Get a matrix of RMSD between all clusters and the expected clusters for ncluster, cluster in enumerate(clusters): per_cluster = [] for nexp_cluster, exp_cluster in enumerate(exp_clusters): mc = modeller.model(env, file=cluster) s = modeller.selection(mc) a = modeller.alignment(env) me = modeller.model(env, file=exp_cluster) a.append_model(mc, align_codes='clus') a.append_model(me, align_codes='exp_clus') # We only care about the global (non-cutoff) RMSD, so use a # large cutoff so that refine_local doesn't increase the number # of equivalent positions at the expense of worsening the RMSD r = s.superpose(me, a, rms_cutoff=999.) if r.rms < 15.0: cluster_match[ncluster] += 1 exp_cluster_match[nexp_cluster] += 1 per_cluster.append(r.rms) rms.append(per_cluster) # Count the number of clusters which are close to an expected cluster ncluster_match = len(cluster_match) - cluster_match.count(0) # Count the number of expected clusters which are close to a cluster nexp_cluster_match = len(exp_cluster_match) - exp_cluster_match.count(0) # Make sure that at least 2 of the 3 expected clusters is close to one # of the clusters we produced (but not all the *same* cluster) self.assertTrue(ncluster_match >= 2 and nexp_cluster_match >= 2, "Could not find any match between the %d clusters " "found in this test and 2 of the 3 'known good' " "clusters (match defined as all-atom RMSD less than " "15.0A). RMSD matrix: %s" % (len(clusters), str(rms)))
def perform_sequence_alignment(): e = modeller.environ() m1 = modeller.model(e, file='experimental.pdb') m2 = modeller.model(e, file='rosetta.pdb') aln = modeller.alignment(e) aln.append_model(m1, align_codes='experimental', atom_files='experimental.pdb') aln.append_model(m2, align_codes='rosetta') aln.align2d() aln.write(file='align.ali', alignment_format='PIR')
def test_script5(self): """Test step 5 (template alignment)""" # Make sure the script runs without errors p = subprocess.check_call(['scripts/script5_template_alignment.py']) # Check output alignment e = modeller.environ() a = modeller.alignment(e, file='output/groel-1iokA.ali') self.assertEqual([x.code for x in a], ['1iok', 'P0A6F5']) os.unlink('output/groel-1iokA.ali')
def test_feature_residue_distance(self): """Check residue-residue distance feature""" env = self.get_environ() mlib = self.get_mdt_library() dist = mdt.features.ResidueDistance(mlib, bins=mdt.uniform_bins(7, 0, 2.0)) aln = modeller.alignment(env, file="test/data/tiny.ali") m = mdt.Table(mlib, features=dist) m.add_alignment(aln) self.assertEqual([b for b in m], [0, 0, 0, 8, 2, 4, 4, 2])
def mk_strct_al_modeller(strct_data1, strct_data2): _stdout = sys.stdout sys.stdout = open(os.devnull, 'w') tmp_file = tempfile.NamedTemporaryFile(suffix=".fasta", delete=False) env = m.environ() aln = m.alignment(env) code1 = 'pdb' + strct_data1['id'] code2 = 'pdb' + strct_data2['id'] chain1 = strct_data1['chain_id'] chain2 = strct_data2['chain_id'] env.io.atom_files_directory = ['.', PDB_DIR] result = {} try: for (code, chain) in ((code1, chain1), (code2, chain2)): mdl = m.model(env, file=code, model_segment=('FIRST:'+chain, 'LAST:'+chain)) aln.append_model(mdl, atom_files=code, align_codes=code+chain) for (weights, write_fit, whole) in (((1., 0., 0., 0., 1., 0.), False, True), ((1., 0.5, 1., 1., 1., 0.), False, True), ((1., 1., 1., 1., 1., 0.), True, False)): r = aln.salign(rms_cutoff=3.5, normalize_pp_scores=False, rr_file='$(LIB)/as1.sim.mat', overhang=30, gap_penalties_1d=(-450, -50), gap_penalties_3d=(0, 3), gap_gap_score=0, gap_residue_score=0, alignment_type='tree', # If 'progresive', the tree is not # computed and all structures will be # aligned sequentially to the first #ext_tree_file='1is3A_exmat.mtx', # Tree building can be avoided # if the tree is input feature_weights=weights, # For a multiple sequence alignment only # the first feature needs to be non-zero improve_alignment=True, fit=True, write_fit=False, write_whole_pdb=whole, output='ALIGNMENT QUALITY') if r.qscorepct > 70: aln.write(file=tmp_file.name, alignment_format='FASTA') with open(tmp_file.name) as a: alignment = unwrap(a.read().splitlines()) for i in range(len(alignment[1])): if alignment[1] != '-' and alignment[3] != '-': pos1 = get_real_position_al(alignment[1], i) pos2 = get_real_position_al(alignment[3], i) result[pos1] = pos2 except: print 'Modeller failed' sys.stdout.close() sys.stdout = _stdout return result
def test_script1(self): """Test step 1 (build profile)""" # Make sure the script runs without errors p = subprocess.check_call(['scripts/script1_build_profile.py']) # Make sure the profile contains the sequences we expect e = modeller.environ() a = modeller.alignment(e, file='output/build_profile.ali') self.assertEqual(sorted(s.code for s in a), sorted(self.templates) + ['P0A6F5']) os.unlink('output/build_profile.prf') os.unlink('output/build_profile.ali')
def build_mdt_from_sequence(self, mlib, features, seq, **keys): """Build a simple test MDT for a given sequence""" env = self.get_environ() mdl = modeller.model(env) mdl.build_sequence(seq) m = mdt.Table(mlib, features=features) a = modeller.alignment(env) a.append_model(mdl, atom_files='test', align_codes='test') m.add_alignment(a, **keys) return m
def main(argv): wp = '' mp = '' tp = '' try: opts, args = getopt.getopt(argv, "hw:m:t:", ["wprofile=", "mprofile=", "tprofile="]) except getopt.GetoptError: print( '%s -w wt_model_profile -m mt_model_profile -t template_profile' % sys.argv[0]) sys.exit(2) for opt, arg in opts: if opt in ('-h', '--help'): print( '%s -w wt_model_profile -m mt_model_profile -t template_profile' % sys.argv[0]) sys.exit() elif opt in (" ", ""): print( '%s -w wt_model_profile -m mt_model_profile -t template_profile' % sys.argv[0]) sys.exit() elif opt in ("-w", "--wprofile"): wp = arg elif opt in ("-m", "--mprofile"): mp = arg elif opt in ("-t", "--tprofile"): tp = arg pic_out = mp.replace("profile", "png") e = modeller.environ() a = modeller.alignment(e, file=mp.replace("profile", "ali")) template = get_profile(tp, a['6Y2HA']) wmodel = get_profile(mp, a['CLIC5']) mmodel = get_profile(wp, a['CLIC5']) # Plot the template and model profiles in the same plot for comparison: pylab.figure(1, figsize=(10, 6)) pylab.xlabel('Alignment position') pylab.ylabel('DOPE per-residue score') pylab.plot(template, color='red', linewidth=2, label='Template') pylab.plot(wmodel, color='green', linewidth=2, label='Wt multi-template Model') pylab.plot(mmodel, color='black', linewidth=2, label='Mt multi-template Model') pylab.legend() pylab.savefig(pic_out, dpi=65)
def test_feature_rama(self): """Check Ramachandran mainchain conformation class feature""" env = self.get_environ() mlib = self.get_mdt_library() self.assertRaises(ValueError, mdt.features.MainchainConformation, mlib, protein=3) conf = mdt.features.MainchainConformation(mlib) aln = modeller.alignment(env, file="test/data/tiny.ali") m = mdt.Table(mlib, features=conf) m.add_alignment(aln) self.assertEqual([b.symbol for b in m.features[0].bins], ["A", "P", "B", "L", "E", "U"]) self.assertEqual([b for b in m], [0, 2, 2, 0, 0, 2])
def get_test_mdt(self, mlib, features): env = self.get_environ() mdl = modeller.model(env) mdl.build_sequence('C') m = mdt.Table(mlib, features=features) a = modeller.alignment(env) a.append_model(mdl, atom_files='test', align_codes='test') m.add_alignment(a) m = m.reshape(features, [0] * len(features), [-1] * len(features)) return m
def test_feature_iatta_special(self): """Check atom type feature with disulfide/termini special handling""" env = self.get_environ() mlib = mdt.Library(env, special_atoms=True) mlib.atom_classes.read("${LIB}/atmcls-melo.lib") attyp = mdt.features.AtomType(mlib) aln = modeller.alignment(env, file="test/data/tiny.ali") m = mdt.Table(mlib, features=attyp) m.add_alignment(aln) self.assertInTolerance(m[0], 6.0, 0.0005) self.assertInTolerance(m[1], 0.0, 0.0005) self.assertInTolerance(m[2], 5.0, 0.0005)
def count_alignments(aln_file, target, templates): import modeller modeller.log.none() env = modeller.environ() aln = modeller.alignment(env, file=aln_file) target = aln[target] templates = [aln[t] for t in templates] num_align = 0 for r in target.residues: for template in templates: if r.get_aligned_residue(template) is not None: num_align += 1 return num_align, len(target.residues)
def test_feature_alpha_content(self): """Check alpha content feature""" env = self.get_environ() mlib = self.get_mdt_library() self.assertRaises(ValueError, mdt.features.AlphaContent, mlib, bins=mdt.uniform_bins(10, 0, 0.1), protein=3) alpha = mdt.features.AlphaContent(mlib, bins=mdt.uniform_bins(10, 0, 0.1)) for (alnfile, bin) in (("tiny.ali", 0), ("alignment.ali", 5)): m = mdt.Table(mlib, features=alpha) a = modeller.alignment(env, file=os.path.join("test", "data", alnfile)) m.add_alignment(a) self.assertEqual(m.shape, (11,)) self.assertEqual(m.sum(), 1) self.assertEqual(m[bin], 1)
def test_feature_neighborhood_difference(self): """Check residue neighborhood difference features""" env = self.get_environ() mlib = self.get_mdt_library() bins = mdt.uniform_bins(9, 0, 0.2) ndif = mdt.features.NeighborhoodDifference(mlib, bins) avndif = mdt.features.AverageNeighborhoodDifference(mlib, bins) aln = modeller.alignment(env, file="test/data/struc-struc.ali") m = mdt.Table(mlib, features=ndif) m.add_alignment(aln) self.assertEqual([b for b in m], [4, 6, 2] + [0] * 7) m = mdt.Table(mlib, features=avndif) m.add_alignment(aln) self.assertEqual([b for b in m], [6, 12, 2] + [0] * 7)
def align(target_name: str, target_sequence: str, template_name: str, template_chain: chr) -> None: # creates a file called f'alignment_{target_name}_and_{template_name}.pir' # assumes a file already exists called f'{template_name}.pdb' target_pir = f'>P1;{target_name}\nsequence:{target_name}::::::::\n{target_sequence}*' target_pir = StringIO(target_pir) alignment_instance = m.alignment(env) model_instance = m.model(env) model_instance.read(file=template_name, model_segment=(f'FIRST:{template_chain}', f'LAST:{template_chain}')) alignment_instance.append_model(model_instance, align_codes=template_name, atom_files=template_name) alignment_instance.append(file=target_pir, align_codes=target_name) alignment_instance.align2d() alignment_instance.write(file=f'alignment_{target_name}_and_{template_name}.pir')
def test_read_alnstructure(self): """Check reading a Modeller alignment structure""" env = self.get_environ() m = modeller.model(env) m.build_sequence('C') a = modeller.alignment(env) a.append_model(m, align_codes='test', atom_files='test') m = IMP.kernel.Model() loader = IMP.modeller.ModelLoader(a[0]) mp = loader.load_atoms(m) all_atoms = IMP.atom.get_by_type(mp, IMP.atom.ATOM_TYPE) self.assertEqual(7, len(all_atoms)) # Alignment structures don't have charges or CHARMM types self.assertEqual(IMP.atom.Charged.get_is_setup(all_atoms[0]), False) self.assertEqual(IMP.atom.CHARMMAtom.get_is_setup(all_atoms[0]), False)
def test_feature_triplet_residue(self): """Check triplet features with residue qualifier""" env = self.get_environ() mlib = self.get_mdt_library() mlib.tuple_classes.read("test/data/trpcls-residue.lib") feat = mdt.features.TupleType(mlib) m = mdt.Table(mlib, features=feat) mdl = modeller.model(env) mdl.build_sequence("AAACAAACSAA") a = modeller.alignment(env) a.append_model(mdl, align_codes="test") m.add_alignment(a) self.assertEqual([x for x in m], [6.0, 2.0, 1.0, 1.0, 0.0, 0.0])
def test_read_alnstructure(self): """Check reading a Modeller alignment structure""" env = self.get_environ() m = modeller.model(env) m.build_sequence('C') a = modeller.alignment(env) a.append_model(m, align_codes='test', atom_files='test') m = IMP.Model() loader = IMP.modeller.ModelLoader(a[0]) mp = loader.load_atoms(m) all_atoms = IMP.atom.get_by_type(mp, IMP.atom.ATOM_TYPE) self.assertEqual(7, len(all_atoms)) # Alignment structures don't have charges or CHARMM types self.assertEqual(IMP.atom.Charged.get_is_setup(all_atoms[0]), False) self.assertEqual(IMP.atom.CHARMMAtom.get_is_setup(all_atoms[0]), False)
def test_feature_sequence_identity(self): """Check sequence identity feature""" env = self.get_environ() mlib = self.get_mdt_library() # Put into 25% bins sid = mdt.features.SequenceIdentity(mlib, bins=mdt.uniform_bins(5, 0, 0.250)) for (seq, id) in (("GGG", 0), ("AFV", 100), ("A--", 100), ("AV-", 50)): aln = modeller.alignment(env) aln.append_sequence("AFV") aln.append_sequence(seq) m = mdt.Table(mlib, features=sid) m.add_alignment(aln) self.assertEqual(m.shape, (6,)) self.assertEqual(m.sum(), 2.0) self.assertEqual(m[int(id / 25)], 2.0)
def _structureX_seq_from_modeller(self): """ return a str containing the first two lines of the sequence corresponding to structureX a file named [self._id]_structureX.seq also written """ env = modeller.environ() model = modeller.model(env, file=self._id) aln = modeller.alignment(env) aln.append_model(model, align_codes=self._id) out_file = self._id + "_structureX.seq" aln.write(file=out_file) out_str = open(out_file, "r").read() out_str = [c for c in out_str.split("\n") if c] out_str = "\n".join(out_str[:2]) + "\n*" return out_str
def plot_profiles(aln_file, template_profile, template_code, model_profile, model_code): e = modeller.environ() a = modeller.alignment(e, file=aln_file) template = get_profile(template_profile, a[template_code]) model = get_profile(model_profile, a[model_code]) # Plot the template and model profiles in the same plot for comparison: pylab.figure(1, figsize=(10, 6)) pylab.xlabel('Alignment position') pylab.ylabel('DOPE per-residue score') pylab.plot(model, color='red', linewidth=2, label=model_code) pylab.plot(template, color='green', linewidth=2, label=template_code) pylab.legend() pylab.savefig('dope_profile_best_model.png', dpi=65)
def get_auto_align(in_aln_file, target, templates, out_aln_file): import modeller modeller.log.none() env = modeller.environ() env.io.atom_files_directory = ['.'] aln = modeller.alignment(env, file=in_aln_file, align_codes=target) with allosmod.util.temporary_directory() as tempd: temp_aln = os.path.join(tempd, "templates.ali") with open(temp_aln, 'w') as fh: for template in templates: pdb2ali(template, fh=fh) aln.append(file=temp_aln) aln.salign(overhang=30, gap_penalties_1d=(-450, -50), alignment_type='tree', output='ALIGNMENT') aln.write(file=out_aln_file)
def complete_pdb(env, filename, special_patches=None, transfer_res_num=False, model_segment=None, patch_default=True): """Reads the given PDB file, reorders the atoms to match the current topology library, and adds any missing atoms. You should read topology and parameters into 'env' before calling this routine. :param env: Modeller environment. :type env: :class:`environ` :param filename: the PDB file to read. :param special_patches: if set, it is expected to be a routine which takes one parameter (the model) and applies any patches (e.g. disulfide bridges). :param transfer_res_num: if True, the residue numbering from the original PDB is retained (by default, residues are renumbered from 1). :param patch_default: if True, default terminal patches are applied. :return: the completed model. :rtype: :class:`model`""" vars = {} if model_segment is not None: vars['model_segment'] = model_segment mdl = model(env, file=filename, model_format='PDB_OR_MMCIF', **vars) # Save original chain IDs, since generate_topology resets them chain_ids = [c.name for c in mdl.chains] aln = alignment(env) aln.append_model(mdl, atom_files=filename, align_codes='struc') aln.append_model(mdl, atom_files=filename+'.ini', align_codes='struc-ini') mdl.clear_topology() mdl.generate_topology(aln[-1], patch_default=patch_default) if special_patches: special_patches(mdl) # Save original seq_id, as transfer_xyz sets it seq_id = mdl.seq_id mdl.transfer_xyz(aln) mdl.seq_id = seq_id # Restore original chain IDs for (chain, chainid) in zip(mdl.chains, chain_ids): chain.name = chainid mdl.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES') if transfer_res_num: mdl2 = model(env, file=filename, **vars) mdl.res_num_from(mdl2, aln) return mdl
def test_dihedral_diff_periodic(self): """Make sure that dihedral difference features are periodic""" def set_omega(mdl, angle): ca = mdl.atoms["CA:1"] c = mdl.atoms["C:1"] n2 = mdl.atoms["N:2"] ca2 = mdl.atoms["CA:2"] n2.x = n2.y = n2.z = 0.0 c.x = -2.0 c.y = c.z = 0.0 ca.x = -2.0 ca.y = 2.0 ca.z = 0.0 ca2.x = 0.0 ca2.y = 2.0 * math.cos(math.pi * angle / 180.0) ca2.z = 2.0 * math.sin(math.pi * angle / 180.0) env = self.get_environ() mlib = self.get_mdt_library() # Make bins start at slightly less than -180, to allow for floating # point rounding omegadiff = mdt.features.OmegaDihedralDifference(mlib, mdt.uniform_bins(36, -180.01, 10)) # Note that difference must be shortest around the circle, so # 100.0 - (-100.0) is not 200 degrees but -160 degrees for dih1, dih2, expected in ( (80.0, 80.0, 0.0), (80.0, -80.0, -160.0), (-80.0, 80.0, 160.0), (-100.0, 100.0, -160.0), (100.0, -100.0, 160.0), ): m = mdt.Table(mlib, features=omegadiff) a = modeller.alignment(env) for d in dih1, dih2: mdl = modeller.model(env) mdl.build_sequence("CC") set_omega(mdl, d) a.append_model(mdl, atom_files="test", align_codes="test") m.add_alignment(a, sympairs=True) # 2 data points, 1 for each residue self.assertInTolerance(m.sample_size, 2.0, 1e-5) # Last residue has no omega, so is always undefined self.assertInTolerance(m[-1], 1.0, 1e-5) expected_bin = int((expected + 180.0) / 10.0) self.assertInTolerance(m[expected_bin], 1.0, 1e-5)
def align_template_to_reference(msmseed, ref_msmseed): import modeller import tempfile import shutil import copy import os temp_dir = tempfile.mkdtemp() try: os.chdir(temp_dir) alignment_file = open('aln_tmp.pir', 'w') aln = _PIR_alignment(ref_msmseed.template_sequence, ref_msmseed.template_id, msmseed.template_sequence, msmseed.template_id) alignment_file.writelines(aln) alignment_file.close() template_file = open(msmseed.template_id + '.pdb', 'w') template_pdb = msmseed.template_structure template_pdb.writeFile(template_pdb.topology, template_pdb.positions, template_file) template_file.close() ref_pdb = ref_msmseed.template_structure ref_file = open(ref_msmseed.template_id + '.pdb', 'w') ref_pdb.writeFile(ref_pdb.topology, ref_pdb.positions, ref_file) ref_file.close() modeller.log.none() env = modeller.environ() env.io.atom_files_directory = temp_dir aln = modeller.alignment(env, file='aln_tmp.pir', align_codes=(ref_msmseed.template_id, msmseed.template_id)) mdl = modeller.model(env, file=ref_msmseed.template_id + '.pdb') mdl2 = modeller.model(env, file=msmseed.template_id + '.pdb') atmsel = modeller.selection(mdl).only_atom_types('CA') r = atmsel.superpose(mdl2, aln) msmseed.rmsd_to_reference = copy.deepcopy(r.rms) except Exception as e: msmseed.error_message = e.message finally: shutil.rmtree(temp_dir) return msmseed
def plot_profiles(aln_file, template_profile: list, model_file, model_code): e = modeller.environ() a = modeller.alignment(e, file=aln_file) model = get_profile(model_file, a[model_code]) # Plot the template and model profiles in the same plot for comparison: pylab.figure(1, figsize=(10, 6)) pylab.xlabel('Alignment position') pylab.ylabel('DOPE per-residue score') rank = 0 pylab.plot(model, color=tableau20[rank], linewidth=2, label=model_code) for template_code in template_profile: rank = rank + 1 templatefile = template_code + ".profile" template = get_profile(templatefile, a[template_code]) pylab.plot(template, color=tableau20[rank], linewidth=2, label=template_code) pylab.legend() pylab.savefig('dope_profile_best_model.png', dpi=65)
def plot(target, template, model, dir): """Plot model and template profiles.""" e = mod.environ() seq_code = target.replace(".ali", "") alnfile = os.path.join(dir, seq_code + "-" + template.replace(".pdb", ".ali")) a = mod.alignment(e, file=alnfile) pdb_code = template.replace(".pdb", "") target_profile = os.path.join(dir, pdb_code + ".profile") model_profile = os.path.join(dir, model.replace(".pdb", ".profile")) t = get_profile(target_profile, a[pdb_code]) m = get_profile(model_profile, a[seq_code]) # plot the template and model profiles in the same plot for comparison pylab.figure(1, figsize=(10, 6)) pylab.xlabel("Alignment position") pylab.ylabel("DOPE per-residue score") pylab.plot(m, color="red", linewidth=2, label=f"Model ({model})") pylab.plot(t, color="green", linewidth=2, label=f"Template ({template})") pylab.legend() pylab.title("DOPE score model vs. template") pylab.savefig(f"{seq_code}-{pdb_code}_dope.png", dpi=150)
def _create_aligment(self, env, base_models): _log.debug("creating alignments for %s with %s pdbs" % (self.seqrecord.id, len(base_models))) aligned_models = [] env.io.atom_files_directory = [self.out_folder + '/'] aln = alignment(env) aln.append_sequence(str(self.seqrecord.seq)) aln[0].code = str(self.seqrecord.id) for i, pdb_chain_file_path in enumerate(base_models, 1): # TODO sacar parseo feo code = pdb_chain_file_path.split("/")[-1].replace(".ent", "").replace( "pdb", "") m = model(env, file=code) aln.append_model(m, align_codes=code) aln[i].code = code aligned_models.append(code) aln.malign() aln.id_table(matrix_file=self.seqrecord.id + '_family.mat') aln.write(file=self.model_directory() + "/" + self.seqrecord.id + '.ali', alignment_format='PIR') assert os.path.exists(self.model_directory() + "/" + self.seqrecord.id + '.ali'), "NOOOOOOOOOOOO!!!!: " + os.getcwd( ) + "/" + self.seqrecord.id + '.ali' aln.write(file=self.model_directory() + self.seqrecord.id + '.pap', alignment_format='PAP') return aligned_models
the alignment sequence `seq`.""" # Read all non-comment and non-blank lines from the file: f = file(profile_file) vals = [] for line in f: if not line.startswith('#') and len(line) > 10: spl = line.split() vals.append(float(spl[-1])) # Insert gaps into the profile corresponding to those in seq: for n, res in enumerate(seq.residues): for gap in range(res.get_leading_gaps()): vals.insert(n, None) # Add a gap at position '0', so that we effectively count from 1: vals.insert(0, None) return vals e = modeller.environ() a = modeller.alignment(e, file='TvLDH-1bdmA.ali') template = get_profile('1bdmA.profile', a['1bdmA']) model = get_profile('TvLDH.profile', a['TvLDH']) # Plot the template and model profiles in the same plot for comparison: pylab.figure(1, figsize=(10,6)) pylab.xlabel('Alignment position') pylab.ylabel('DOPE per-residue score') pylab.plot(model, color='red', linewidth=2, label='Model') pylab.plot(template, color='green', linewidth=2, label='Template') pylab.legend() pylab.savefig('dope_profile.png', dpi=65)
def align_res_nums(key_pdb_file, key_chain_id, value_pdb_file, value_chain_id): """Determine which residues in one PDB file correspond to which in another PDB file. Parameters ---------- key_pdb_file : string The location of the pdb file whose residue numbers will be keys in the returned dictionary. key_chain_id : string The chain of key_pdb_file that will be aligned. value_pdb_file : string The location of the pdb file whose residue numbers will be values in the returned dictionary. value_chain_id : string The chain of value_pdb_file that will be aligned. Returns ------- dict_residue_nums : dictionary{string : string} The keys and values are string-typed residue numbers (from key_pdb_file and value_pdb_file). Any residues that are missing from value_pdb_file will be assigned the value "NA". If any residues in key_pdb_file are classified as HETATMs, then they will only included in dict_residue_nums if they are MSE, MEX, or ABU. This matches MODELLER's behavior. """ # A temporary directory to store the output of Modeller's alignment. temp_dir_path = tempfile.mkdtemp() env = modeller.environ() aln = modeller.alignment(env) key_model = modeller.model(env, file=key_pdb_file, model_segment=("FIRST:%s" % (key_chain_id), "LAST:%s" % (key_chain_id))) aln.append_model(key_model, atom_files=key_pdb_file, align_codes="key%s" % (key_chain_id)) value_model = modeller.model(env, file=value_pdb_file, model_segment=("FIRST:%s" % (value_chain_id), "LAST:%s" % (value_chain_id))) aln.append_model(value_model, atom_files=value_pdb_file, align_codes="value%s" % (value_chain_id)) aln.salign() salign_out_loc = temp_dir_path + "key%s_value%s_salign_output.ali" % ( key_chain_id, value_chain_id) aln.write(file=salign_out_loc, alignment_format="PIR") with open(salign_out_loc, "r") as alignment_opened: alignment_lines = alignment_opened.readlines() # Ignore the header lines. The format requires a 2-line header; there may be a # blank line before this. if alignment_lines[0][0] == ">": line_index = 2 else: line_index = 3 key_sequence_aligned = "" while True: next_line = alignment_lines[line_index].strip() key_sequence_aligned += next_line if next_line[len(next_line) - 1] == "*": key_sequence_aligned = key_sequence_aligned[:-1] break line_index += 1 if alignment_lines[line_index + 1][0] == ">": line_index += 3 else: line_index += 4 value_sequence_aligned = "" while True: next_line = alignment_lines[line_index].strip() value_sequence_aligned += next_line if next_line[len(next_line) - 1] == "*": value_sequence_aligned = value_sequence_aligned[:-1] break line_index += 1 shutil.rmtree(temp_dir_path) key_pdb_res_numbers = get_numbers_from_pdb(key_pdb_file, key_chain_id) value_pdb_res_numbers = get_numbers_from_pdb(value_pdb_file, value_chain_id) dict_residue_nums = {} # value_residues_passed is incremented whenever the iteration reaches a spot in the # alignment where the value sequence has a residue. value_residues_passed = 0 key_residues_passed = 0 for i in range(len(value_sequence_aligned)): # If both key_sequence_aligned and value_sequence_aligned have residues at # the position, then add a dictionary entry mapping the residue number in key # to the residue number in value. if (key_sequence_aligned[i] != "-") and (value_sequence_aligned[i] != "-"): current_key_resnum = key_pdb_res_numbers[key_residues_passed] current_value_resnum = value_pdb_res_numbers[value_residues_passed] dict_residue_nums[current_key_resnum] = current_value_resnum value_residues_passed += 1 key_residues_passed += 1 # If key_sequence_aligned has a residue where value_sequence_aligned has a gap, # then create a dictionary entry with value NA. elif (key_sequence_aligned[i] != "-") and (value_sequence_aligned[i] == "-"): dict_residue_nums[key_pdb_res_numbers[key_residues_passed]] = "NA" key_residues_passed += 1 # If key_sequence_aligned has a gap where value_sequence_aligned has a residue, # then don't add a dictionary entry. elif (key_sequence_aligned[i] == "-") and (value_sequence_aligned[i] != "-"): value_residues_passed += 1 return dict_residue_nums
def runmodeller(target,database_path='default',models_path='default',templates_path='default',working='default',\ mod_per_temp=20,excludes=[],max_seq_id=0.95,min_seq_id=0.25,max_eval=0.01,num_iter=1,gaps=False): '''File requirement: A .ali Sequence File and a pdb_95.pir database file Parameters: target: a .ali file path specifying the sequence of the protein models_path: the folder to save the models to. Defalut is ./models templates_path: the folder to save the templates to. Default is ./templates database: the path to the pdb_95.pir folder the default is ./database working: the working dir. The default is ./working mod_per_temp: number of models to be generated by modeller, the default is 20 excludes: list of strings specifying PDB files to be excluded from the templates max_seq_id: Maximum of the sequence identity for a template to be considered. Ranges from 0 to 1. Default is 0.95 min_seq_id: Minimum of the sequence identity for a template to be considered. Ranges from 0 to 1. Default is 0.25 This function returns a list of paths of the generated models ''' #set log to verbose modeller.log.verbose() env = modeller.environ() entering_dir = os.getcwd() #set paths if database_path == 'default': database_dir = os.path.abspath('./database') else: database_dir = os.path.abspath(database_path) target_file = os.path.basename(target) target_dir = os.path.dirname(os.path.abspath(target)) if target_file.endswith('.ali'): target_name = target_file[:-4] if working == 'default': if not os.path.exists('./working'): os.mkdir('./working') working_dir = os.path.abspath('./working') else: working_dir = os.path.abspath(working) if not os.path.exists(working_dir): os.mkdir(working_dir) if models_path == 'default': if not os.path.exists('./models'): os.mkdir('./models') models_dir = os.path.abspath('./models') else: models_dir = os.path.abspath(models_path) if not os.path.exists(models_dir): os.mkdir(models_dir) if templates_path == 'default': if not os.path.exists('./templates'): os.mkdir('./templates') template_dir = os.path.abspath('./templates') else: template_dir = os.path.abspath(templates_path) if not os.path.exists(templates_path): os.mkdir(templates_path) #cd to woring. script will cd back at the end os.chdir(working_dir) #-- Prepare the input files #-- Read in the sequence database sdb = modeller.sequence_db(env) sdb.read(seq_database_file=database_dir + '/pdb_95.pir', seq_database_format='PIR', chains_list='ALL', minmax_db_seq_len=(30, 4000), clean_sequences=True) #-- Write the sequence database in binary form sdb.write(seq_database_file=database_dir + '/pdb_95.bin', seq_database_format='BINARY', chains_list='ALL') #-- Now, read in the binary database sdb.read(seq_database_file=database_dir + '/pdb_95.bin', seq_database_format='BINARY', chains_list='ALL') #-- Read in the target sequence/alignment target_aln = modeller.alignment(env) target_aln.append(file=target_dir + '/' + target_file, alignment_format='PIR', align_codes='ALL') #-- Convert the input sequence/alignment into # profile format target_prf = target_aln.to_profile() #-- Scan sequence database to pick up homologous sequences target_prf.build(sdb, matrix_offset=-450, rr_file='${LIB}/blosum62.sim.mat', gap_penalties_1d=(-500, -50), n_prof_iterations=num_iter, check_profile=True, max_aln_evalue=max_eval, gaps_in_target=gaps) #-- Write out the profile in text format target_prf.write(file=working_dir + '/' + target_name + '_profile.prf', profile_format='TEXT') #-- Convert the profile back to alignment format target_aln = target_prf.to_alignment() #-- Write out the alignment file target_aln.write(file=working_dir + '/' + target_name + '_profile.ali', alignment_format='PIR') #CLEAN UP del sdb, target_aln, target_prf, env #Read the name of the templates templates = [] txt_input = open(working_dir + '/' + target_name + '_profile.prf', 'r') for eachline in txt_input: if eachline.lstrip(' ')[0] == '#': continue entries = eachline.split() if len(entries) != 13: continue if entries[2] != 'X': continue name = entries[1] seqid = float(entries[10]) templates.append(template(name, seqid)) txt_input.close() del entries, eachline, txt_input, name, seqid #Select templates i = 0 while (i < len(templates)): if( (templates[i].code in excludes) \ or (templates[i].seqid > max_seq_id*100) \ or (templates[i].seqid < min_seq_id*100) \ ): templates.pop(i) else: i += 1 #Download templates pdb for eachtemplate in templates: pdbname = eachtemplate.code.upper() url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbname pdb_download = open(template_dir + '/' + pdbname.lower() + '.pdb', 'w') pdb_download.write(urllib.urlopen(url).read()) pdb_download.close() del pdb_download, pdbname, url #model alignment alnlist = [] for i in range(len(templates)): env = modeller.environ() aln = modeller.alignment(env) mdl = modeller.model(env, file=template_dir + '/' + templates[i].code, model_segment=('FIRST:' + templates[i].chain, 'LAST:' + templates[i].chain)) aln.append_model(mdl, align_codes=templates[i].name, atom_files=templates[i].filename) aln.append(file=target_dir + '/' + target_file, align_codes=target_name) aln.align2d() aln.write(file=working_dir + '/' + target_name + '-' + templates[i].name + '.ali', alignment_format='pir') aln.write(file=working_dir + '/' + target_name + '-' + templates[i].name + '.pap', alignment_format='pap') alnlist.append(working_dir + '/' + target_name + '-' + templates[i].name + '.ali') del i, aln, env #Make models filelist = [] for i in range(len(alnlist)): env = modeller.environ() env.io.atom_files_directory = [target_dir, working_dir, template_dir] a = modeller.automodel.automodel( env, alnfile=alnlist[i], knowns=templates[i].name, sequence=target_name, assess_methods=(modeller.automodel.assess.DOPE, modeller.automodel.assess.GA341)) a.starting_model = 1 a.ending_model = mod_per_temp a.make() for j in range(1, mod_per_temp + 1): scrname = target_name + '.B9999' + str(j).zfill(4) + '.pdb' tgtname = models_dir + '/' + target_name + '_' + templates[ i].code + '.B9999' + str(j).zfill(4) + '.pdb' os.rename(scrname, tgtname) filelist.append(tgtname) del env, a, scrname, tgtname, i, j os.chdir(entering_dir) return filelist
"""Read `profile_file` into a Python array, and add gaps corresponding to the alignment sequence `seq`.""" #CG# read all non-comment and non-blank lines from the file: f = file(profile_file) vals = [] for line in f: if not line.startswith('#') and len(line) > 10: spl = line.split() vals.append(float(spl[-1])) #CG# insert gaps into the profile corresponding to those in seq: for n, res in enumerate(seq.residues): for gap in range(res.get_leading_gaps()): vals.insert(n, None) #CG# add a gap at position '0', so that we effectively count from 1: vals.insert(0, None) return vals e = modeller.environ() a = modeller.alignment(e, file='res_align.ali') template = get_profile('4GRV.profile', a['4GRV']) model = get_profile('orexin.profile', a['orexin']) #CG# plot the template and model profiles in the same plot for comparison: pylab.figure(1, figsize=(10, 6)) pylab.xlabel('Alignment position') pylab.ylabel('DOPE per-residue score') pylab.plot(model, color='red', linewidth=2, label='Model') pylab.plot(template, color='green', linewidth=2, label='Template') pylab.legend() pylab.savefig('dope_profile.png', dpi=65)
# Read all non-comment and non-blank lines from the file: f = open(profile_file) vals = [] for line in f: if not line.startswith('#') and len(line) > 10: spl = line.split() vals.append(float(spl[-1])) # Insert gaps into the profile corresponding to those in seq: for n, res in r_enumerate(seq.residues): for gap in range(res.get_leading_gaps()): vals.insert(n, None) # Add a gap at position '0', so that we effectively count from 1: vals.insert(0, None) return vals e = modeller.environ() a = modeller.alignment(e, file='W2T758-3n2gD.ali') template = get_profile('3n2gD.profile', a['3n2gD']) model = get_profile('W2T758.profile', a['W2T758']) # Plot the template and model profiles in the same plot for comparison: pylab.figure(1, figsize=(10, 6)) pylab.xlabel('Alignment position') pylab.ylabel('DOPE per-residue score') pylab.plot(model, color='red', linewidth=2, label='Model') pylab.plot(template, color='green', linewidth=2, label='Template') pylab.legend() pylab.savefig('dope_profile.png', dpi=65)
def modelMissingAtoms(self, pdbFilename, outputFilename, chain=' ', debug = False, allHydrogen = False): """Model missing atoms/residues in a specified PDB file using MODELLER. REQUIRED ARGUMENTS pdbFilename - the filename of the PDB file to model missing atoms and residues for outputFilename - the filename for the desired final model OPTIONAL ARGUMENTS chain - the one-character chain ID of the chain to model (default ' ') debug - flag to print extra debug output and leave temporary directory (default False) NOTES The specified chain from pdbFilename is processed through MODELLER to build missing atoms and residues specified in the SEQRES entry of the PDB file but not present in the PDB file. This procedure is loosely based on the protocol appearing at http://salilab.org/modeller/wiki/Missing_residues The complete sequence is read from the SEQRES fields, and the DBREF field used to determine the span of residues described in the SEQRES fields. A heavy-atom topology as constructed in MODELLER for the complete sequence, coordinates present in the PDB file transferred, and the remaining heavy-atom coordinates built from ideal geometry. Finally, a single standard simulated-annealing-based modeling step is performed using the standard automodel protocol but allowing only the atoms and residues that were undefined in the PDB file to move. """ # Ensure specified PDB file exists. import os.path if not os.path.exists(pdbFilename): raise ParameterException, "Specified PDB file %s not found." % pdbFilename # Append full path to pdbFilename and outputFilename import os.path pdbFilename = os.path.abspath(pdbFilename) outputFilename = os.path.abspath(outputFilename) # Create a temporary directory for running MODELLER. import tempfile import os.path tmpdir = tempfile.mkdtemp() if debug: print "tmpdir = %s" % tmpdir # Get the complete sequence without chain breaks from the SEQRES/DBREF fields of the source PDB file. first_residue_id, complete_sequence = self.getCompleteSequence(pdbFilename, chain) nresidues = len(complete_sequence) last_residue_id = first_residue_id + nresidues - 1 # Get the sequence of residues that are at least partially present in the PDB file as a dictionary. # present_sequence_dict[residue_id] is the one-letter-code of the residue residue_id, if there are any ATOM records for this residue. present_sequence_dict = self.getPresentSequence(pdbFilename, chain) # Generate alignment of the template sequence (residues for which any coordinates are defined) against the target (complete sequence from SEQRES/DBREF) present_sequence = "" for residue_id in range(first_residue_id, first_residue_id + nresidues): if present_sequence_dict.has_key(residue_id): # TODO: Check integrity against complete_sequence. present_sequence += present_sequence_dict[residue_id] else: present_sequence += '-' # Change working directory to temporary directory. import os olddir = os.getcwd() os.chdir(tmpdir) # Generate alignment file for MODELLER. import os alignment_filename = os.path.join(tmpdir, 'model.ali') alignment_file = open(alignment_filename, 'w') print >> alignment_file, ">P1;%s" % "template" print >> alignment_file, "%s:%s:%d:%s:%d:%s:%s:%s:%s:%s" % ( "structure", pdbFilename, min(present_sequence_dict.keys()), chain, max(present_sequence_dict.keys()), chain, " ", " ", " ", " " ) print >> alignment_file, "%s*" % present_sequence print >> alignment_file, "" print >> alignment_file, ">P1;%s" % "target" print >> alignment_file, "%s:%s:%d:%s:%d:%s:%s:%s:%s:%s" % ( "sequence", "target", first_residue_id, chain, last_residue_id, chain, " ", " ", " ", " " ) print >> alignment_file, "%s*" % complete_sequence alignment_file.close() if debug: import commands print "alignment file:" print commands.getoutput('cat %(alignment_filename)s' % vars()) # Call MODELLER to generate topology, transfer coordinates, and build from internal coordinates. import modeller import modeller.automodel # Create a new environemnt. env = modeller.environ() # Specify the topology and parameters to use. # TODO: Is this necessary, or can we rely on the defaults? env.libs.topology.read(file='$(LIB)/top_heav.lib') env.libs.parameters.read(file='$(LIB)/par.lib') # Read in alignment. aln = modeller.alignment(env) print alignment_filename aln.append(file=alignment_filename, align_codes='all') # Create a model. model = modeller.model(env) # Generate the topology from the target sequence. model.generate_topology(aln['target']) # Transfer defined coordinates from template. model.transfer_xyz(aln) # Determine which atoms are undefined because they are missing in the template, and create a selection from them. missing_atom_indices = [] for atom_index in range(len(model.atoms)): atom = model.atoms[atom_index] if atom.x == -999: missing_atom_indices.append(atom_index) # DEBUG: Write model coordinates to a PDB file. model.write(file=os.path.join(tmpdir,'transferred.pdb')) # Build the remaining undefined atomic coordinates from ideal internal coordinates stored in residue topology files. model.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES') # DEBUG: Write model coordinates to a PDB file. if debug: model.write(file=os.path.join(tmpdir,'built.pdb')) # Override the 'select_atoms' routine in the 'automodel' class to select only the atoms with undefined atomic coordinates in template PDB. if (allHydrogen): class mymodel(modeller.automodel.allhmodel): def select_atoms(self): missing_atoms = modeller.selection() for atom_index in missing_atom_indices: missing_atoms.add(self.atoms[atom_index]) return missing_atoms else: class mymodel(modeller.automodel.automodel): def select_atoms(self): missing_atoms = modeller.selection() for atom_index in missing_atom_indices: missing_atoms.add(self.atoms[atom_index]) return missing_atoms # Ensure selected atoms feel all nonbonded interactions. env.edat.nonbonded_sel_atoms = 1 # Set up automodel. #a = mymodel(env, inifile='built.pdb', alnfile=alignment_filename, knowns='template', sequence='target') a = mymodel(env, alnfile=alignment_filename, knowns='template', sequence='target') # Set parameters for automodel. # Build only one model. # TODO: Have more models built by default (perhaps 50?) a.starting_model = 1 a.ending_model = 1 # Generate model(s). a.make() # TODO: Rescore models and select the best one. # For now, we only use the first model. final_model_summary = a.outputs[0] # Copy resulting model to desired output PDB filename. import shutil shutil.copy(final_model_summary['name'], outputFilename) # Restore working directory. os.chdir(olddir) # Clean up temporary directory. if (not debug): for filename in os.listdir(tmpdir): os.remove(os.path.join(tmpdir,filename)) os.rmdir(tmpdir) return
'-t', '--target', help='mobile pdb structure file to transfer sequence on', type=str) parser.add_argument( '-r', '--ref', help='reference pdb structure file with sequence to transfer', type=str) args = parser.parse_args() env = modeller.environ() lib = '/usr/lib/modeller9.23/modlib' env.libs.topology.read(file=f'{lib}/top_heav.lib') env.libs.parameters.read(file=f'{lib}/par.lib') aln = modeller.alignment(env) target = modeller.model(env, file=args.target) target_name = os.path.basename(args.target).split('.')[0] aln.append_model(target, align_codes=target_name) ref = modeller.model(env, file=args.ref) ref_name = os.path.basename(args.ref).split('.')[0] aln.append_model(ref, align_codes=ref_name) aln.align() # aln.align3d() alnfile = f'{target_name}_{ref_name}.seq' aln.write(file=alnfile) mdl = modeller.model(env)
def _build_models(structfname, basedir, nmodels, refstructure, verbose, seq_rep_list): """ Builds replicate structural models of a list of protein sequences. seq_rep_list is a list of (sequence,replicates) pairs, giving each sequence object to be modeled and the number of replicates needed for that sequence object SIDE EFFECT: models are placed in basedir/sequence_id directory """ # set up path links, assuming current working directory workingdir = os.getcwd() structfname = os.path.normpath(os.path.join(workingdir, structfname)) basedir = os.path.normpath(os.path.join(workingdir, basedir)) # calculate total number of reps for each sequence id reps_per_id = {} for seq,reps in seq_rep_list: if seq.identifier in reps_per_id.keys(): reps_per_id[seq.identifier] += reps else: reps_per_id[seq.identifier] = reps for seq,reps in seq_rep_list: # calculate some information on total reps for this id and how many # models to build for this particular sequence total_reps_needed = reps_per_id[seq.identifier] models_per_rep = round(nmodels / total_reps_needed) if models_per_rep < 1: models_per_rep = 1 mynmodels = models_per_rep * reps # check this sequence's existing structures; bail out if done mindex = 1 outdir = basedir + os.path.sep + seq.identifier if not os.path.isdir(outdir): os.makedirs(outdir) else: existing_fnames = [ x.split(os.path.sep)[-1] for x in \ glob.glob(outdir + os.path.sep + 'rep*.pdb') ] existing_reps = [ int(x.split('rep')[1].split('.pdb')[0]) for \ x in existing_fnames] if existing_reps: existing_reps.sort(reverse=True) last_rep = existing_reps[0] if last_rep < total_reps_needed: mindex = existing_reps[0] + 1 else: continue # set up temporary directory for modeller execution with tempfile.TemporaryDirectory(prefix=dnameprefix) as tempdir: os.chdir(tempdir) # set up modeller environment if verbose: modeller.log.verbose() else: modeller.log.none() env = modeller.environ() env.io.atom_files_directory = [workingdir] # set up complete alignment aln = modeller.alignment(env) aln.append(file=structfname, remove_gaps=False) knowns = [s.code for s in aln] aln.append_sequence(seq.sequence) aln[-1].code = seq.identifier # write alignment - modeller doesn't like alignment in memory full_aln_fname = 'structaligntemp.ali' aln.write(full_aln_fname, alignment_format='PIR') # set up model assessments ASSESS_METHODS = [modeller.automodel.assess.DOPE, modeller.automodel.assess.DOPEHR] ASSESS_NAMES = ["DOPE score", "DOPE-HR score"] a = modeller.automodel.dope_loopmodel(env, alnfile=full_aln_fname, knowns=knowns, sequence=seq.identifier, assess_methods=ASSESS_METHODS) a.starting_model = 1 # index of the first model a.ending_model = mynmodels # index of the last model # adjust optimization parameters a.library_schedule = modeller.automodel.autosched.slow a.md_level = modeller.automodel.refine.slow a.make() # do homology modeling # evaluate structural models ok_models = [ x for x in a.outputs if x["failure"] is None ] score_results = [] for data in ok_models: fname = data["name"] myscrs = [] for score_name in ASSESS_NAMES: myscrs.append(data[score_name]) ave_score = sum(myscrs) / len(myscrs) score_results.append((ave_score, fname, myscrs)) score_results.sort() best_models = score_results[:reps] rest_models = score_results[reps:] # map to reference structure refseq = aln[0] if refstructure: refseq = aln[refstructure] refcode = refseq.code refpdbf = refseq.atom_file refrange = refseq.range refmdl = modeller.model(env, file=refpdbf, model_segment=refrange) refpos = modeller.selection(refmdl).only_atom_types('CA') # get best models final_files = [] for (score,infname,scores) in best_models: outfname = outdir + os.path.sep + 'rep{}.pdb'.format(mindex) final_files.append(outfname) # build alignment myaln = modeller.alignment(env) myaln.append(file=structfname, align_codes=(refcode), remove_gaps=False) myaln.append_sequence(seq.sequence) myaln[-1].code = seq.identifier myaln[-1].atom_file = infname # read pdb file mymodel = modeller.model(env, file=infname) # translate to reference coordinates r = refpos.superpose(mymodel, myaln) # write translated pdb file mymodel.write(file=outfname) mindex += 1 os.chdir(workingdir) return
def peptide_rebuild_modeller(name, selection='all', hetatm=0, sequence=None, nmodels=1, hydro=0, quiet=1, *, _self=cmd): ''' DESCRIPTION Remodel the given selection using modeller. This is useful for example to build incomplete sidechains. More complicated modelling tasks are not the intention of this simple interface. Side effects: Alters "type" property for MSE residues in selection (workaround for bug #3512313). USAGE peptide_rebuild_modeller name [, selection [, hetatm [, sequence ]]] ARGUMENTS name = string: new object name selection = string: atom selection hetatm = 0/1: read and model HETATMs (ligands) {default: 0} sequence = string: if provided, use this sequence instead of the template sequence {default: None} nmodels = int: number of models (states) to generate {default: 1} ''' import modeller from modeller.automodel import automodel, allhmodel import tempfile, shutil, os _assert_package_import() from .editing import update_identifiers nmodels, hetatm, quiet = int(nmodels), int(hetatm), int(quiet) if int(hydro): automodel = allhmodel tempdir = tempfile.mkdtemp() pdbfile = os.path.join(tempdir, 'template.pdb') alnfile = os.path.join(tempdir, 'aln.pir') cwd = os.getcwd() os.chdir(tempdir) if not quiet: print(' Notice: PWD=%s' % (tempdir)) try: modeller.log.none() env = modeller.environ() env.io.hetatm = hetatm # prevent PyMOL to put TER records before MSE residues (bug #3512313) _self.alter('(%s) and polymer' % (selection), 'type="ATOM"') _self.save(pdbfile, selection) mdl = modeller.model(env, file=pdbfile) aln = modeller.alignment(env) aln.append_model(mdl, align_codes='foo', atom_files=pdbfile) # get sequence from non-present atoms if not sequence and _self.count_atoms('(%s) & !present' % (selection)): sequence = get_seq(selection) if sequence: aln.append_sequence(sequence) aln[-1].code = 'bar' aln.malign() aln.write(alnfile) a = automodel(env, alnfile=alnfile, sequence=aln[-1].code, knowns=[s.code for s in aln if s.prottyp.startswith('structure')]) a.max_ca_ca_distance = 30.0 if nmodels > 1: a.ending_model = nmodels from multiprocessing import cpu_count ncpu = min(cpu_count(), nmodels) if ncpu > 1: from modeller import parallel job = parallel.job(parallel.local_slave() for _ in range(ncpu)) a.use_parallel_job(job) a.make() for output in a.outputs: _self.load(output['name'], name, quiet=quiet) finally: os.chdir(cwd) shutil.rmtree(tempdir) _self.align(name, selection, cycles=0) if not sequence: update_identifiers(name, selection, _self=_self) if not quiet: print(' peptide_rebuild_modeller: done')
def align_res_nums(apo_pdb_file, apo_pdb_id, apo_chain_id, holo_pdb_file, holo_pdb_id, holo_chain_id): env = modeller.environ() aln = modeller.alignment(env) apo_model = modeller.model(env, file=apo_pdb_file, model_segment=("FIRST:%s" % (apo_chain_id), "LAST:%s" % (apo_chain_id))) aln.append_model(apo_model, atom_files=apo_pdb_id, align_codes="%s%s" % (apo_pdb_id, apo_chain_id)) holo_model = modeller.model(env, file=holo_pdb_file, model_segment=("FIRST:%s" % (holo_chain_id), "LAST:%s" % (holo_chain_id))) aln.append_model(holo_model, atom_files=holo_pdb_id, align_codes="%s%s" % (holo_pdb_id, holo_chain_id)) aln.salign() alignment_filename = "%s%s_%s%s_salign_output.ali" % ( apo_pdb_id, apo_chain_id, holo_pdb_id, holo_chain_id) aln.write(file=alignment_filename, alignment_format="PIR") with open(alignment_filename, "r") as alignment_opened: alignment_lines = alignment_opened.readlines() # Ignore the header lines. The format requires a 2-line header; there may be a blank line before this. if alignment_lines[0][0] == ">": line_index = 2 else: line_index = 3 apo_sequence_aligned = "" while True: next_line = alignment_lines[line_index].strip() apo_sequence_aligned += next_line if next_line[len(next_line) - 1] == "*": apo_sequence_aligned = apo_sequence_aligned[:-1] break line_index += 1 if alignment_lines[line_index + 1][0] == ">": line_index += 3 else: line_index += 4 holo_sequence_aligned = "" while True: next_line = alignment_lines[line_index].strip() holo_sequence_aligned += next_line if next_line[len(next_line) - 1] == "*": holo_sequence_aligned = holo_sequence_aligned[:-1] break line_index += 1 os.remove(alignment_filename) apo_pdb_res_numbers = get_numbers_from_pdb(apo_pdb_file, apo_chain_id) holo_pdb_res_numbers = get_numbers_from_pdb(holo_pdb_file, holo_chain_id) dict_key_apo_val_holo = {} holo_residues_passed = 0 # incremented whenever the iteration reaches a spot in the alignment where the holo sequence has a residue. apo_residues_passed = 0 for i in range(len(holo_sequence_aligned)): if (apo_sequence_aligned[i] != "-") and (holo_sequence_aligned[i] != "-"): #print(len(apo_pdb_res_numbers), apo_residues_passed, len(holo_pdb_res_numbers), holo_residues_passed) #print(apo_pdb_res_numbers, holo_pdb_res_numbers) #print(len(apo_sequence_aligned), len(holo_sequence_aligned), "len") #print(apo_sequence_aligned, holo_sequence_aligned) dict_key_apo_val_holo[apo_pdb_res_numbers[ apo_residues_passed]] = holo_pdb_res_numbers[ holo_residues_passed] holo_residues_passed += 1 apo_residues_passed += 1 elif (apo_sequence_aligned[i] != "-") and (holo_sequence_aligned[i] == "-"): dict_key_apo_val_holo[ apo_pdb_res_numbers[apo_residues_passed]] = "NA" apo_residues_passed += 1 elif (apo_sequence_aligned[i] == "-") and (holo_sequence_aligned[i] != "-"): holo_residues_passed += 1 print(dict_key_apo_val_holo) print(apo_sequence_aligned) print(holo_sequence_aligned) return dict_key_apo_val_holo
vals.append(float(spl[-1])) # Insert gaps into the profile corresponding to those in seq: for t, res in r_enumerate(seq.residues): for gap in range(res.get_leading_gaps()): vals.insert(t, None) # Add a gap at position '0', so that we effectively count from 1: vals.insert(0, None) return vals #* ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Plot all DOPE scores ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *# import modeller k = modeller.environ() y = modeller.alignment(k, file=str(iD) + '-mult.ali') model = get_profile(str(iD) + '_multi_model.profile', y[str(iD)]) # multi model template = get_profile(str(iD) + '_single_model.profile', y[str(iD)]) # single model loop_refined = get_profile( str(iD) + '_multi_model_loop_refined.profile', y[str(iD)]) #* Plot the template and model profiles in the same plot for comparison *# pylab.figure(1, figsize=(10, 6)) pylab.xlabel('Alignment position') pylab.ylabel('DOPE per-residue score') pylab.plot(model,
vals = [] for line in f: if not line.startswith('#') and len(line) > 10: spl = line.split() vals.append(float(spl[-1])) # Insert gaps into the profile corresponding to those in seq: for n, res in r_enumerate(seq.residues): for gap in range(res.get_leading_gaps()): vals.insert(n, None) # Add a gap at position '0', so that we effectively count from 1: vals.insert(0, None) return vals e = modeller.environ() a = modeller.alignment(e, file='gabar_MM.01chimeric.pir') template = get_profile('chimeric.profile', a['chimeric']) models = [1, 34, 53, 60, 42, 24, 49] models_mod = [] for model in models: model_mod = get_profile('model_{0:02d}.profile'.format(model), a['gabar_MM.01']) models_mod.append(model_mod) profile_mod = open('dope_profile_model_{0:02d}.dat'.format(model), 'w') for i, res in enumerate(model_mod): profile_mod.write(str(i) + ' ' + str(res) + '\n') profile_mod.close()
def analyse_target_template_pairs(self): """ Check the compatibility between the target and model sequences and then extracts delta_d data from the target-template pairs in a series .csv files. These HDDR parameters files will be used by the 'rebuild_restraints_file' method of the 'Automodel_custom_restraints' class to edit the default restraints file of MODELLER. """ aln = self.read_alignment() #------------------------------------------ # Compare the model and target sequences. - #------------------------------------------ # Get the model sequence. modeller_mod_seq = aln[self.sequence] if len(modeller_mod_seq.chains) > 1: raise NotImplementedError( "Optimal restraints with multiple chain models are currently not implemented in altMOD." ) mod_seq = "".join([r.code for r in modeller_mod_seq.residues]) # Get the target sequence. modeller_tar_obj = complete_pdb(self.env, self.target_filepath) if len(modeller_tar_obj.chains) > 1: if self.target_chain == None: raise ValueError( "The selected target structure has more than chain (%s). In order to extract optimal restraints, provide to the 'set_target_structure' the chain corresponding to the model." % len(modeller_tar_obj.chains)) modeller_tar_obj = modeller_tar_obj.chains[self.target_chain] tar_seq = "".join([r.code for r in modeller_tar_obj.residues]) # Check if they are compatible (by aligning them through salign). new_aln = alignment(self.env) new_aln.append_sequence(tar_seq) new_aln.append_sequence(mod_seq) new_aln.salign(gap_penalties_1d=( -900.0, -50.0)) # The as1.sim.mat similarity matrix is used by default. tar_aliseq = "".join([ _get_modeller_res_code(p.get_residue(new_aln[0])) for p in new_aln.positions ]) mod_aliseq = "".join([ _get_modeller_res_code(p.get_residue(new_aln[1])) for p in new_aln.positions ]) ''' import random gr = lambda i: i if random.random() > 0.3 else random.choice("QWERTYIPASDFGHKLCVNM" + "-"*5) mod_aliseq = "".join([gr(i) for i in mod_aliseq]) print (mod_aliseq) ''' # Computes the sequence identity between the model and target sequences. matches_count = 0 identities_count = 0 for mod_p, tar_p in zip(mod_aliseq, tar_aliseq): if mod_p != "-" and tar_p != "-": if mod_p == tar_p: identities_count += 1 matches_count += 1 mod_tar_seqid = identities_count / float(matches_count) # Allows only a small fraction of mismatches. if mod_tar_seqid < self.mod_tar_seqid_threshold: message = "The target and model sequence do not correspond:\n* Tar: %s\n* Mod: %s" % ( tar_aliseq, mod_aliseq) raise ValueError(message) # Find the correspondance between the model and target residues. mod_c = 0 tar_c = 0 mod_tar_res_dict = {} for mod_pos, tar_pos in zip(mod_aliseq, tar_aliseq): if mod_pos != "-" and tar_pos != "-": mod_tar_res_dict[mod_c] = tar_c if mod_pos != "-": mod_c += 1 if tar_pos != "-": tar_c += 1 #--------------------------------------------- # Analyse each of the target-template pairs. - #--------------------------------------------- template_filepaths = self._get_template_filepaths(aln) for tem_idx, tem_name in enumerate(self.knowns): print("\n* Analysing target-tem_%s (%s) pair." % (tem_idx, tem_name)) t1 = time.time() modeller_tem_seq = aln[tem_name] # Get the model-template matches from the 'Alignment' object from MODELLER (here, match # is defined as any couple of aligned residue). Each match is a tuple containing two # 'Residue' objects from MODELLER (the first from the template, the second from the # model). matches = [] matches_dict = {} mod_c = 0 for pos in aln.positions: mod_pos = pos.get_residue(modeller_mod_seq) tem_pos = pos.get_residue(modeller_tem_seq) if mod_pos != None and tem_pos != None: matches.append((tem_pos, mod_pos)) matches_dict[mod_pos.index] = (mod_pos, tem_pos) if mod_pos != None: # Assign an index (starting from 0) to the model residue. mod_pos._id = mod_c mod_c += 1 ''' for res in modeller_mod_seq.residues: print res, res.index ''' # Iterate through the HDDRs found in the MODELLER restraints file. results_list = [] for atm_1, atm_2 in self.hddr_dict["all"]: # Get atom types of the atoms engaged in the HDDRs. atm_1_type = self.atm_type_dict[atm_1] atm_2_type = self.atm_type_dict[atm_2] # Get the model and the equivalent template residues. try: mod_res_1, tem_res_1 = matches_dict[ self.atm_to_res_dict[atm_1]] mod_res_2, tem_res_2 = matches_dict[ self.atm_to_res_dict[atm_2]] except KeyError: continue # Check if the model residue is also present in the target. if not mod_res_1._id in mod_tar_res_dict: continue if not mod_res_2._id in mod_tar_res_dict: continue # Get the target residues corresponding to the model residues. tar_res_1 = modeller_tar_obj.residues[mod_tar_res_dict[ mod_res_1._id]] tar_res_2 = modeller_tar_obj.residues[mod_tar_res_dict[ mod_res_2._id]] # Get the target and template residues. tem_atm_1 = get_modeller_atom(tem_res_1, atm_1_type) tem_atm_2 = get_modeller_atom(tem_res_2, atm_2_type) tar_atm_1 = get_modeller_atom(tar_res_1, atm_1_type) tar_atm_2 = get_modeller_atom(tar_res_2, atm_2_type) # Get the interatomic distances between all the heavy atoms of the two template # residues. if tem_atm_1 != None and tem_atm_2 != None: grp_dt = get_modeller_dist(tem_atm_1, tem_atm_2) # The template residue may have different atoms with respect to the target/model residue. else: continue # Get the iteratomic distances between the target heavy atoms. if tar_atm_1 != None and tar_atm_2 != None: grp_dn = get_modeller_dist(tar_atm_1, tar_atm_2) else: continue # Assigns the MODELLER code for the type of restraint. if atm_1_type == "CA" and atm_2_type == "CA": grp_name = "9" elif (atm_1_type == "N" and atm_2_type == "O") or (atm_1_type == "O" and atm_2_type == "N"): grp_name = "10" else: if atm_1_type in main_chain_atoms or atm_2_type in main_chain_atoms: grp_name = "23" else: grp_name = "26" # Get the delta_d value. grp_dd = grp_dn - grp_dt # if abs(grp_dd) >= self.max_delta_d_abs_val: # continue # Prepare the main columns. pair_results = { "RST_GRP": grp_name, "GRP_DN": grp_dn, "GRP_DT": grp_dt, "GRP_DD": grp_dd, "MOD_ATOM_TYPE_I": atm_1_type, "MOD_ATOM_TYPE_J": atm_2_type, "MOD_ATOM_INDEX_I": atm_1, "MOD_ATOM_INDEX_J": atm_2, } # Prepare additional columns. base_pair_results = { "MOD_RES_PDB_ID_I": mod_res_1.index, "MOD_RES_PDB_ID_J": mod_res_2.index, "MOD_RES_NAME_I": mod_res_1.code, "MOD_RES_NAME_J": mod_res_2.code, "TAR_RES_PDB_ID_I": tar_res_1.num, "TAR_RES_PDB_ID_J": tar_res_2.num, "TAR_RES_NAME_I": tar_res_1.code, "TAR_RES_NAME_J": tar_res_2.code, "TEM_RES_PDB_ID_I": tem_res_1.num, "TEM_RES_PDB_ID_J": tem_res_2.num, "TEM_RES_NAME_I": tem_res_1.code, "TEM_RES_NAME_J": tem_res_2.code, } pair_results.update(base_pair_results) # Add a row in the results .csv file. results_list.append(pair_results) #------------------------------------------------------- # Writes a results file for each target-template pair. - #------------------------------------------------------- t2 = time.time() print("- It took %s." % (t2 - t1), len(results_list)) analysis_filename = "%s_tar_tem_%s.csv" % (self.sequence, tem_idx) with open(analysis_filename, "w") as c_fh: if len(results_list) != 0: column_names = list(sorted(results_list[0].keys())) writer = csv.DictWriter(c_fh, fieldnames=column_names) writer.writeheader() for pair_results in results_list: writer.writerow(pair_results) # Sets the custom HDDR params files of the class. self.hddr_params_filepaths[tem_idx] = analysis_filename
the alignment sequence `seq`.""" # Read all non-comment and non-blank lines from the file: f = open(profile_file) vals = [] for line in f: if not line.startswith('#') and len(line) > 10: spl = line.split() vals.append(float(spl[-1])) # Insert gaps into the profile corresponding to those in seq: for n, res in r_enumerate(seq.residues): for gap in range(res.get_leading_gaps()): vals.insert(n, None) # Add a gap at position '0', so that we effectively count from 1: vals.insert(0, None) return vals a = modeller.alignment(env, file='TMPRSS2_254_2a31A.ali') template = get_profile('2a31.profile', a['2a31A']) plt.figure(1, figsize=(10, 6)) plt.xlabel('Alignment position') plt.ylabel('DOPE per-residue score') plt.plot(template, color='red', linewidth=2, label='Template') for fileName in pdb_lyst[1:]: model = get_profile(fileName + '.profile', a['TMPRSS2_254']) plt.plot(model, linewidth=2, label=fileName) plt.legend() plt.savefig('dope_profile_TMPRSS2_254_2a31A_single.png', dpi=100)
the alignment sequence `seq`.""" # Read all non-comment and non-blank lines from the file: f = open(profile_file) vals = [] for line in f: if not line.startswith('#') and len(line) > 10: spl = line.split() vals.append(float(spl[-1])) # Insert gaps into the profile corresponding to those in seq: for n, res in r_enumerate(seq.residues): for gap in range(res.get_leading_gaps()): vals.insert(n, None) # Add a gap at position '0', so that we effectively count from 1: vals.insert(0, None) return vals a = modeller.alignment(env, file='TMPRSS2_254-mult.ali') plt.figure(1, figsize=(10, 6)) plt.xlabel('Alignment position') plt.ylabel('DOPE per-residue score') for fileName in pdb_lyst[:3]: model = get_profile(fileName + '.profile', a[fileName + 'A']) plt.plot(model, linewidth=2, label=fileName) for fileName in pdb_lyst[3:]: model = get_profile(fileName + '.profile', a['TMPRSS2_254']) plt.plot(model, linewidth=2, label=fileName) plt.legend() plt.savefig('dope_profile_TMPRSS2_254_multi.png', dpi=100)