def run_modeller(knowns = ('2B4C', '4NCO'), seq = 'CAP45', num_mod = 1): seq_dir = '../sequence_files/' pdb_dir = '../pdb_files/' j = job() j.append(local_slave()) j.append(local_slave()) j.append(local_slave()) j.append(local_slave()) j.append(local_slave()) # Modeller environment env = environ() # Output log.none() #input dir env.io.atom_files_directory = [pdb_dir] #input file ali_file = seq_dir + 'align' + seq if isinstance(knowns, list): for i in range(len(knowns)): ali_file += '_' + knowns[i] ali_file += '.ali' else: ali_file += '_' + str(knowns) + '.ali' mod = automodel(env, alnfile = ali_file, knowns = knowns, sequence = seq, assess_methods = (assess.DOPE)) mod.starting_model = 1 mod.ending_model = num_mod mod.use_parallel_job(j) mod.make() # Get list of all built models ok_models = [x for x in mod.outputs if x['failure'] is None] # Rank the models by DOPE score ok_models.sort(key = lambda mod: mod['DOPE score']) # Get top model m = ok_models[0] print("The best model {} with a dope of {}".format(m['name'], m['DOPE score']))
def modeller_automodel(self, query: SeqRecord, results: Path, num_align: int, atom_files_dir: Path): from modeller import environ from modeller.automodel import automodel for model_index, r in enumerate( np.load(results, allow_pickle=True)[:num_align]): try: aln = AlignIO.read(StringIO(r[-2][0]), 'clustal') except: logging.error( f'Failed to parse alignment: {r[0]} -> {r[2]} -> {r[4]} -> {r[6]}' ) continue assert query.id == aln[0].id and aln[-1].id == r[-3] q_rec, t_rec = self._remove_gaps(aln[0], aln[-1]) try: t_rec = self._remove_missing_res( t_rec, (atom_files_dir / aln[-1].id[2:4] / f'{aln[-1].id}.ent').resolve().as_posix()) except FileNotFoundError as e: logging.exception(e) continue q_rec.name, t_rec.name = '', '' q_rec.description = f'sequence:{q_rec.id}::::::::' t_rec.description = f'structureX:{t_rec.id}::{t_rec.id[-2].upper()}::{t_rec.id[-2].upper()}::::' aln = MultipleSeqAlignment([q_rec, t_rec]) out_d = results.resolve().parent if (out_d / f'{aln[0].id}_{model_index+1}.pdb').exists(): continue cwd = os.getcwd() with tempfile.TemporaryDirectory() as tmpdir: try: os.chdir(tmpdir) AlignIO.write(aln, 'aln.pir', 'pir') env = environ() env.io.atom_files_directory = [ (atom_files_dir / aln[1].id[2:4]).resolve().as_posix() ] mod = automodel(env, 'aln.pir', knowns=[aln[1].id], sequence=aln[0].id) mod.make() shutil.copy( list(Path().glob('*.B*.pdb'))[0], out_d / f'{aln[0].id}_{model_index+1}.pdb') except Exception as e: logging.error( f'knowns=[{aln[1].id}], sequence={aln[0].id}') logging.exception(e) finally: os.chdir(cwd)
def _automodel_run(self, alin_file, knowns): amdl = automodel(self.env, alnfile=alin_file, knowns=knowns, sequence='target', assess_methods=(assess.DOPE, assess.GA341)) amdl.starting_model = 1 amdl.ending_model = 1 orig_dir = os.getcwd() os.chdir(self.tmpdir) amdl.make() os.chdir(orig_dir) return amdl.outputs[0]
def create_models(alnfile, knownid, sequenceid, pdbfile, model_number=5, dir=''): if dir: cdir = os.getcwd() ndir = cdir + '/' + dir os.mkdir(ndir) os.chdir(ndir) sp.check_call("cp ../" + pdbfile + ' ' + pdbfile, shell=True) try: from modeller import environ from modeller.automodel import automodel except ImportError: print( 'Make Sure Python Modeller is installed. Double check License Key is in Modeller config.py file' ) sys.exit() env = environ() if dir: a = automodel(env, alnfile='../' + alnfile, knowns=knownid, sequence=sequenceid) else: a = automodel(env, alnfile=alnfile, knowns=knownid, sequence=sequenceid) a.starting_model = 1 a.ending_model = model_number a.make() if dir: os.chdir(cdir)
def model3D(fic, ALLPDB, pdb_extension='.cif'): if _MODELLER_MESSAGE != "": raise ImportError(_MODELLER_MESSAGE) seqs = readFastaMul(fic) if len(seqs) < 2: raise Exception("There aren't template sequences in %s." % fic) seq = seqs[0][0].split('\n')[0] seq = seq.split(';')[1] modeller.log.verbose() # request verbose output env = modeller.environ() # create a new MODELLER environment to build ... # ... this model in env.io.atom_files_directory = ['.'] # ['.', ALLPDB] knowns = [] for i in range(1, len(seqs)): tmp = seqs[i][0].split('\n')[0] tmp = tmp.split(';')[1] knowns.append(tmp) kn = tmp.split('_')[0].lower() # upper ? base_name = kn + pdb_extension nam = base_name # + '.gz' if not os.path.isfile(kn + pdb_extension): # shutil.copyfile(ALLPDB + nam, "./" + nam) shutil.copy2(os.path.join(ALLPDB, nam), nam) # # os.system("gunzip ./" + nam) # with gzip.open(nam, 'rb') as f_in: # with open(base_name, 'wb') as f_out: # shutil.copyfileobj(f_in, f_out) # splitChainsPDB('pdb' + kn + pdb_extension, kn, 'pdb') knowns = tuple(knowns) a = automodel.automodel(env, alnfile=fic, knowns=knowns, sequence=seq) a.max_molpdf = 1e12 a.starting_model = 1 # index of the first model a.ending_model = 1 # index of the last model # (determines how many models to calculate) cdir = os.getcwd() print("cdir = ", cdir) a.make() # do the actual homology modeling return 1
def run(self, seqId): assert (self.check(seqId)) from modeller import log, environ from modeller.automodel import automodel #log.verbose() #log.none() log.level(output=0, errors=0, notes=0, warnings=0, memory=0) env = environ() env.io.atom_files_directory = [self.template[0]] a = automodel(env, alnfile=self.pir, knowns=self.template[1], sequence=seqId) a.auto_align() a.make() fn = '{}.B99990001.pdb'.format(seqId) assert (os.path.isfile(fn)) return os.path.abspath(fn)
def modeller_automodel(self, query: SeqRecord, results: Path, num_align: int, atom_files_dir: Path): from modeller import environ, log from modeller.automodel import automodel raw_df = pandas.read_csv( 'data/delta_new_hits_thresh10_xdug10_xdg10_xdgf10.csv') for row in tqdm(raw_df.itertuples(), total=raw_df.shape[0]): try: aln = SearchIO.read(StringIO(row.XML), 'blast-xml').hsps[0].aln except IndexError: continue assert aln[0].id == row.Query and aln[1].id == row.Hit q_rec, t_rec = self._remove_gaps(aln[0], aln[-1]) q_rec.name = '' t_rec.name = '' q_rec.description = f'sequence:{row.Query}::::::::' t_rec.description = f'structureX:{row.Hit}::{row.Hit[-2].upper()}::{row.Hit[-2].upper()}::::' aln = MultipleSeqAlignment([q_rec, t_rec]) out_d = results.resolve() cwd = os.getcwd() with tempfile.TemporaryDirectory() as tmpdir: try: os.chdir(tmpdir) AlignIO.write(aln, 'aln.pir', 'pir') log.none() env = environ() env.io.atom_files_directory = [ (atom_files_dir / aln[1].id[2:4]).resolve().as_posix() ] mod = automodel(env, 'aln.pir', knowns=[aln[1].id], sequence=aln[0].id) mod.make() shutil.copy( list(Path().glob('*.B*.pdb'))[0], out_d / f'{aln[0].id}_{aln[1].id}.pdb') except: pass finally: os.chdir(cwd)
def main(args): mod.log.verbose() env = mod.environ(rand_seed=args.seed) env.io.atom_files_directory = [".", args.dir, "../" + args.dir] seq = args.target.replace(".ali", "") alnfile = os.path.join(args.dir, seq + "-" + args.template.replace(".pdb", ".ali")) # in order to use the soap assess method, refer to https://salilab.org/SOAP/ and # download the SOAP-Protein library file. Put this file in # your-installation-path/lib/modeller-9.25/modlib/ # otherwise, just comment the respective line a = automodel.automodel( env, alnfile=alnfile, knowns=args.template.replace(".pdb", ""), sequence=seq, assess_methods=( automodel.assess.DOPE, automodel.assess.GA341, # soap_protein_od.Scorer(), ), ) a.starting_model, a.ending_model = 1, args.num_models a.make() # get list of all successfully built models from a.outputs models = [m for m in a.outputs if m["failure"] is None] key = "DOPE score" models.sort(key=lambda a: a[key]) # print and return top model DOPE score top_model = models[0] print("Top model: %s (DOPE score %.3f)" % (top_model["name"], top_model[key])) return top_model["name"]
def process(req, rep): '''Processes a single request to the server, storing the result in `rep`''' from modeller.automodel.assess import DOPE, GA341 from modeller.automodel import automodel logger = logging.getLogger('modeller_server') logger.info('Processing job=%s, recipient=%s, alignments=%d' % (req.identifier, req.recipient, len(req.alignments))) # In order to prevent filename collisions, independent runs of modeller # are executed in separate directories curr_dir = os.getcwd() work_dir = tempfile.mkdtemp() os.chdir(work_dir) # Populate required fields in the response rep.recipient = req.recipient rep.identifier = req.identifier # N * M (model, alignment, score) tuples, where N = #alignments and M = #models per alignment candidates = [] for alignment in req.alignments: query_id = 'query' templ_id = str(alignment.templ_pdb + alignment.templ_chain) # Write template structure templ_file = templ_id + '.pdb' with open(templ_file, 'w') as file: file.write('%s\n' % alignment.templ_structure) # Write alignment alignment_file = templ_id + '.ali' with open(alignment_file, 'w') as file: params = { 'query_id' : query_id, 'query_start' : alignment.query_start, 'query_stop' : alignment.query_stop, 'query_align' : alignment.query_align } query_line = query_alignment.safe_substitute(params) file.write('%s\n' % query_line) params = { 'templ_id' : templ_id, 'templ_pdb' : templ_file, 'templ_chain' : alignment.templ_chain, 'templ_start' : alignment.templ_start, 'templ_stop' : alignment.templ_stop, 'templ_align' : alignment.templ_align } templ_line = templ_alignment.safe_substitute(params) file.write('%s\n' % templ_line) # Run modeler modeller.log.verbose() env = modeller.environ() env.io.atom_files_directory = ['.'] am = automodel(env, alnfile = alignment_file, knowns = templ_id, sequence = query_id, assess_methods = (DOPE, GA341)) am.starting_model = 1 am.ending_model = FLAGS.models_per_alignment am.make() # Rank successful predictions by DOPE score models = [x for x in am.outputs if x['failure'] is None] models.sort(key = lambda x: x['DOPE score']) logger.info('Produced %d models for alignment %s' % (len(models), templ_id)) for model in models: with open(model['name']) as file: coords = '' for line in file: coords += line entry = (coords, alignment, model['DOPE score']) candidates.append(entry) # Sort all N * M candidate models in increasing order of DOPE score, returning the top K candidates.sort(key = operator.itemgetter(-1)) for (i, entry) in enumerate(candidates): coords, alignment, score = entry selection = rep.selected.add() selection.rank = i + 1 # Append alignment information to bottom of PDB file selection.model = coords selection.model += 'Source: %s\n' % alignment.method selection.model += 'Template: %s\n' % templ_id selection.model += 'Query alignment: %s\n' % alignment.query_align selection.model += 'Templ alignment: %s\n' % alignment.templ_align # Message types cannot be assigned directory (e.g. x.field = field). # For additional details, read the "Singular Message Fields" section in: # https://developers.google.com/protocol-buffers/docs/reference/python-generated#fields selection.alignment.ParseFromString(alignment.SerializeToString()) if (selection.rank == FLAGS.max_models_to_return): break os.chdir(curr_dir) shutil.rmtree(work_dir) logger.info('Completed job=%s, recipient=%s' % (req.identifier, req.recipient))
def peptide_rebuild_modeller(name, selection='all', hetatm=0, sequence=None, nmodels=1, hydro=0, quiet=1, *, _self=cmd): ''' DESCRIPTION Remodel the given selection using modeller. This is useful for example to build incomplete sidechains. More complicated modelling tasks are not the intention of this simple interface. Side effects: Alters "type" property for MSE residues in selection (workaround for bug #3512313). USAGE peptide_rebuild_modeller name [, selection [, hetatm [, sequence ]]] ARGUMENTS name = string: new object name selection = string: atom selection hetatm = 0/1: read and model HETATMs (ligands) {default: 0} sequence = string: if provided, use this sequence instead of the template sequence {default: None} nmodels = int: number of models (states) to generate {default: 1} ''' import modeller from modeller.automodel import automodel, allhmodel import tempfile, shutil, os _assert_package_import() from .editing import update_identifiers nmodels, hetatm, quiet = int(nmodels), int(hetatm), int(quiet) if int(hydro): automodel = allhmodel tempdir = tempfile.mkdtemp() pdbfile = os.path.join(tempdir, 'template.pdb') alnfile = os.path.join(tempdir, 'aln.pir') cwd = os.getcwd() os.chdir(tempdir) if not quiet: print(' Notice: PWD=%s' % (tempdir)) try: modeller.log.none() env = modeller.environ() env.io.hetatm = hetatm # prevent PyMOL to put TER records before MSE residues (bug #3512313) _self.alter('(%s) and polymer' % (selection), 'type="ATOM"') _self.save(pdbfile, selection) mdl = modeller.model(env, file=pdbfile) aln = modeller.alignment(env) aln.append_model(mdl, align_codes='foo', atom_files=pdbfile) # get sequence from non-present atoms if not sequence and _self.count_atoms('(%s) & !present' % (selection)): sequence = get_seq(selection) if sequence: aln.append_sequence(sequence) aln[-1].code = 'bar' aln.malign() aln.write(alnfile) a = automodel(env, alnfile=alnfile, sequence=aln[-1].code, knowns=[s.code for s in aln if s.prottyp.startswith('structure')]) a.max_ca_ca_distance = 30.0 if nmodels > 1: a.ending_model = nmodels from multiprocessing import cpu_count ncpu = min(cpu_count(), nmodels) if ncpu > 1: from modeller import parallel job = parallel.job(parallel.local_slave() for _ in range(ncpu)) a.use_parallel_job(job) a.make() for output in a.outputs: _self.load(output['name'], name, quiet=quiet) finally: os.chdir(cwd) shutil.rmtree(tempdir) _self.align(name, selection, cycles=0) if not sequence: update_identifiers(name, selection, _self=_self) if not quiet: print(' peptide_rebuild_modeller: done')
def run_modeller(structure, alignment, temp_dir: Union[str, Path, Callable]): """Run Modeller to create a homology model. Args: structure: Structure of the template protein. alignment_file: Alignment of the target sequence(s) to chain(s) of the template structure. temp_dir: Location to use for storing Modeller temporary files and output. Returns: results: A dictionary of model properties. Of particular interest are the followng: `name`: The name of the generated PDB structure. `Normalized DOPE score`: DOPE score that should be comparable between structures. `GA341 score`: GA341 score that should be comparable between structures. """ import modeller from modeller.automodel import assess, automodel, autosched if isinstance(structure, (str, Path)): structure = PDB.load(structure) if callable(temp_dir): temp_dir = Path(temp_dir()) else: temp_dir = Path(temp_dir) assert len(alignment) == 2 target_id = alignment[0].id template_id = alignment[1].id PDB.save(structure, temp_dir.joinpath(f"{template_id}.pdb")) alignment_file = temp_dir.joinpath(f"{template_id}-{target_id}.aln") structure_tools.write_pir_alignment(alignment, alignment_file) # Don't display log messages modeller.log.none() # Create a new MODELLER environment env = modeller.environ() # Directories for input atom files env.io.atom_files_directory = [str(temp_dir)] env.schedule_scale = modeller.physical.values(default=1.0, soft_sphere=0.7) # Selected atoms do not feel the neighborhood # env.edat.nonbonded_sel_atoms = 2 env.io.hetatm = True # read in HETATM records from template PDBs env.io.water = True # read in WATER records (including waters marked as HETATMs) a = automodel( env, # alignment filename alnfile=str(alignment_file), # codes of the templates knowns=(str(template_id)), # code of the target sequence=str(target_id), # wich method for validation should be calculated assess_methods=(assess.DOPE, assess.normalized_dope, assess.GA341), ) a.starting_model = 1 # index of the first model a.ending_model = 1 # index of the last model # Very thorough VTFM optimization: a.library_schedule = autosched.slow a.max_var_iterations = 300 # Thorough MD optimization: # a.md_level = refine.slow a.md_level = None # a.repeat_optimization = 2 # Stop if the objective function is higher than this value a.max_molpdf = 2e6 with py_tools.log_print_statements(logger), system_tools.switch_paths(temp_dir): a.make() assert len(a.outputs) == 1 return a.outputs[0]
def __run_modeller(self, alignFile, loopRefinement): """. Parameters ---------- alignFile : string File containing the input data result : list The successfully calculated models are stored in this list loopRefinement : boolean If `True`, perform loop refinements Returns ------- list Successfully calculated models """ log.none() # instructs Modeller to display no log output. env = environ() # create a new MODELLER environment to build this model in # Directories for input atom files env.io.atom_files_directory = [str(self.filePath.rstrip('/')), ] env.schedule_scale = physical.values(default=1.0, soft_sphere=0.7) # Selected atoms do not feel the neighborhood # env.edat.nonbonded_sel_atoms = 2 env.io.hetatm = True # read in HETATM records from template PDBs env.io.water = True # read in WATER records (including waters marked as HETATMs) logger.debug( 'Performing loop refinement in addition to regular modelling: {}' .format(loopRefinement) ) if not loopRefinement: a = automodel( env, # alignment filename alnfile=str(alignFile), # codes of the templates knowns=(str(self.templateID)), # code of the target sequence=str(self.seqID), # wich method for validation should be calculated assess_methods=(assess.DOPE, assess.normalized_dope) ) else: a = dope_loopmodel( env, # alignment filename alnfile=str(alignFile), # codes of the templates knowns=(str(self.templateID)), # code of the target sequence=str(self.seqID), # wich method for validation should be calculated assess_methods=(assess.DOPE, assess.normalized_dope), loop_assess_methods=(assess.DOPE, assess.normalized_dope) ) # index of the first loop model a.loop.starting_model = self.loopStart # index of the last loop model a.loop.ending_model = self.loopEnd # loop refinement method; this yields a.loop.md_level = refine.slow a.starting_model = self.start # index of the first model a.ending_model = self.end # index of the last model # Very thorough VTFM optimization: a.library_schedule = autosched.slow a.max_var_iterations = 300 # Thorough MD optimization: # a.md_level = refine.slow a.md_level = None # Repeat the whole cycle 2 times and do not stop unless obj.func. > 1E6 # a.repeat_optimization = 2 a.max_molpdf = 2e5 # with helper.print_heartbeats(): # use 'long_wait' in .travis.yml with helper.log_print_statements(logger): a.make() # do the actual homology modeling # The output produced by modeller is stored in a.loop.outputs or a.outputs # it is a dictionary # Check for each model if it was successfully calculated, i.e. # for each "normal" model and each loop model and append the # assessment score to a list which is used to return the best model result = [] loop = False failures = [] # Add the normal output for i in range(len(a.outputs)): if not a.outputs[i]['failure']: model_filename = a.outputs[i]['name'] model_dope_score = a.outputs[i]['Normalized DOPE score'] logger.debug( 'Success! model_filename: {}, model_dope_score: {}' .format(model_filename, model_dope_score)) result.append((model_filename, model_dope_score)) else: failure = a.outputs[i]['failure'] logger.debug('Failure! {}'.format(failure)) failures.append(a.outputs[i]['failure']) # Add the loop refinement output if loopRefinement: logger.debug('Modeller loop outputs:') for i in range(len(a.loop.outputs)): if not a.loop.outputs[i]['failure']: model_filename = a.loop.outputs[i]['name'] model_dope_score = a.loop.outputs[i]['Normalized DOPE score'] logger.debug( 'Success! model_filename: {}, model_dope_score: {}' .format(model_filename, model_dope_score)) result.append((model_filename, model_dope_score)) loop = True else: failure = a.loop.outputs[i]['failure'] logger.debug('Failure! {}'.format(failure)) failures.append(failure) # Return the successfully calculated models and a loop flag indicating # whether the returned models are loop refined or not return result, loop, failures
def peptide_rebuild_modeller(name, selection='all', hetatm=0, sequence=None, nmodels=1, hydro=0, quiet=1): ''' DESCRIPTION Remodel the given selection using modeller. This is useful for example to build incomplete sidechains. More complicated modelling tasks are not the intention of this simple interface. Side effects: Alters "type" property for MSE residues in selection (workaround for bug #3512313). USAGE peptide_rebuild_modeller name [, selection [, hetatm [, sequence ]]] ARGUMENTS name = string: new object name selection = string: atom selection hetatm = 0/1: read and model HETATMs (ligands) {default: 0} sequence = string: if provided, use this sequence instead of the template sequence {default: None} nmodels = int: number of models (states) to generate {default: 1} ''' try: import modeller from modeller.automodel import automodel, allhmodel except ImportError: print(' Error: failed to import "modeller"') raise CmdException import tempfile, shutil, os from .editing import update_identifiers nmodels, hetatm, quiet = int(nmodels), int(hetatm), int(quiet) if int(hydro): automodel = allhmodel tempdir = tempfile.mkdtemp() pdbfile = os.path.join(tempdir, 'template.pdb') alnfile = os.path.join(tempdir, 'aln.pir') cwd = os.getcwd() os.chdir(tempdir) if not quiet: print(' Notice: PWD=%s' % (tempdir)) try: modeller.log.none() env = modeller.environ() env.io.hetatm = hetatm # prevent PyMOL to put TER records before MSE residues (bug #3512313) cmd.alter('(%s) and polymer' % (selection), 'type="ATOM"') cmd.save(pdbfile, selection) mdl = modeller.model(env, file=pdbfile) aln = modeller.alignment(env) aln.append_model(mdl, align_codes='foo', atom_files=pdbfile) if sequence: aln.append_sequence(sequence) aln[-1].code = 'bar' aln.malign() aln.write(alnfile) a = automodel(env, alnfile=alnfile, sequence=aln[-1].code, knowns=[s.code for s in aln if s.prottyp.startswith('structure')]) a.max_ca_ca_distance = 30.0 if nmodels > 1: a.ending_model = nmodels from multiprocessing import cpu_count ncpu = min(cpu_count(), nmodels) if ncpu > 1: from modeller import parallel job = parallel.job(parallel.local_slave() for _ in range(ncpu)) a.use_parallel_job(job) a.make() for output in a.outputs: cmd.load(output['name'], name, quiet=quiet) finally: os.chdir(cwd) shutil.rmtree(tempdir) cmd.align(name, selection, cycles=0) if not sequence: update_identifiers(name, selection) if not quiet: print(' peptide_rebuild_modeller: done')
def calculate_modeller_score(self, res_path): """ * This function constructs a single comparative model for the query sequence from the known template structure, using alignment.ali, a PIR format alignment of query and template. The final model is written into the PDB file. * This function also returns the DOPE assessed score of the model generated by MODELLER. It actually returns the opposite (multiplied by -1) since it is an energy score. This is to simplify the min/max normalization done afterwards. DOPE is the most reliable score at separating native-like models from decoys (lower, i.e, more negative, DOPE scores tend to correlate with more native-like models). Args: res_path (str): Path to the results folder. Returns: score(float): The DOPE score (multiplied by -1) of the model generated by MODELLER. """ root_dir = os.getcwd() modeller_out_dir = res_path + "/modeller/" ali_dir = "alignments/" pathlib.Path(modeller_out_dir + ali_dir).mkdir(parents=True, exist_ok=True) # MODELLER generates the result files in his current directory, so we must # go to the results directory and come back to root dir afterwards. os.chdir(modeller_out_dir) path_to_atm = root_dir + "/data/pdb/" + self.template.name # We reindex all the PDB files to avoid any problem with modeller self.template.reindex_pdb(1, path_to_atm, True) # Parse the new PDB to get new residues and their coordinates generated by MODELLER self.template.parse_pdb(path_to_atm + "/" + self.template.reindexed_pdb + ".atm") # Write Modeller's alignment PIR file self.write_alignment_for_modeller("./alignments/") # Redirect Modeller's verbose into nothingness, nil, chaos and abysses ! with contextlib.redirect_stdout(None): # create a new MODELLER environment to build this model in m.env = m.environ() # directories for input atom files m.env.io.atom_files_directory = [path_to_atm] a_model = am.automodel( m.env, # alignment filename alnfile=ali_dir + self.template.name + '.ali', # codes of the templates knowns=self.template.reindexed_pdb, # code of the target sequence='query_' + self.template.name, # DOPEHR is very similar to DOPEHR but is obtained at # Higher Resolution (using a bin size of 0.125Å # rather than 0.5Å). assess_methods=assess.DOPEHR) a_model.very_fast() # index of the first and last model (determines how many models to calculate) a_model.starting_model = 1 a_model.ending_model = 1 modeller_dope_score = 0 # Catch any errors that Modeller can raise and write them in the log file try: a_model.make() except m.ModellerError as err: logging.warning( "Modeller error with " + self.template.name + " | " + self.template.pdb, str(err)) new_model_pdb = a_model.outputs[0]["name"] modeller_dope_score = a_model.outputs[0]["DOPE-HR score"] self.template.modeller_pdb = self.template.pdb + "_mod" # Move the new model to the PDB directory and rename it os.rename(new_model_pdb, path_to_atm + "/" + self.template.modeller_pdb + ".atm") # Parse the new model generated by MODELLER to get the residues and their coordinates self.template.parse_pdb(path_to_atm + "/" + self.template.modeller_pdb + ".atm") # Go back to root directory os.chdir(root_dir) return modeller_dope_score * (-1)
def __run_modeller(self, alignFile, loopRefinement): """. Parameters ---------- alignFile : string File containing the input data result : list The successfully calculated models are stored in this list loopRefinement : boolean If `True`, perform loop refinements Returns ------- list Successfully calculated models """ log.none() # instructs Modeller to display no log output. env = environ( ) # create a new MODELLER environment to build this model in # Directories for input atom files env.io.atom_files_directory = [ str(self.filePath.rstrip("/")), ] env.schedule_scale = physical.values(default=1.0, soft_sphere=0.7) # Selected atoms do not feel the neighborhood # env.edat.nonbonded_sel_atoms = 2 env.io.hetatm = True # read in HETATM records from template PDBs env.io.water = True # read in WATER records (including waters marked as HETATMs) logger.debug( "Performing loop refinement in addition to regular modelling: {}". format(loopRefinement)) if not loopRefinement: a = automodel( env, # alignment filename alnfile=str(alignFile), # codes of the templates knowns=(str(self.templateID)), # code of the target sequence=str(self.seqID), # wich method for validation should be calculated assess_methods=(assess.DOPE, assess.normalized_dope), ) else: a = dope_loopmodel( env, # alignment filename alnfile=str(alignFile), # codes of the templates knowns=(str(self.templateID)), # code of the target sequence=str(self.seqID), # wich method for validation should be calculated assess_methods=(assess.DOPE, assess.normalized_dope), loop_assess_methods=(assess.DOPE, assess.normalized_dope), ) # index of the first loop model a.loop.starting_model = self.loopStart # index of the last loop model a.loop.ending_model = self.loopEnd # loop refinement method; this yields a.loop.md_level = refine.slow a.starting_model = self.start # index of the first model a.ending_model = self.end # index of the last model # Very thorough VTFM optimization: a.library_schedule = autosched.slow a.max_var_iterations = 300 # Thorough MD optimization: # a.md_level = refine.slow a.md_level = None # Repeat the whole cycle 2 times and do not stop unless obj.func. > 1E6 # a.repeat_optimization = 2 a.max_molpdf = 2e5 # with helper.print_heartbeats(): # use 'long_wait' in .travis.yml with helper.log_print_statements(logger): a.make() # do the actual homology modeling # The output produced by modeller is stored in a.loop.outputs or a.outputs # it is a dictionary # Check for each model if it was successfully calculated, i.e. # for each "normal" model and each loop model and append the # assessment score to a list which is used to return the best model result = [] loop = False failures = [] # Add the normal output for i in range(len(a.outputs)): if not a.outputs[i]["failure"]: model_filename = a.outputs[i]["name"] model_dope_score = a.outputs[i]["Normalized DOPE score"] logger.debug( "Success! model_filename: {}, model_dope_score: {}".format( model_filename, model_dope_score)) result.append((model_filename, model_dope_score)) else: failure = a.outputs[i]["failure"] logger.debug("Failure! {}".format(failure)) failures.append(a.outputs[i]["failure"]) # Add the loop refinement output if loopRefinement: logger.debug("Modeller loop outputs:") for i in range(len(a.loop.outputs)): if not a.loop.outputs[i]["failure"]: model_filename = a.loop.outputs[i]["name"] model_dope_score = a.loop.outputs[i][ "Normalized DOPE score"] logger.debug( "Success! model_filename: {}, model_dope_score: {}". format(model_filename, model_dope_score)) result.append((model_filename, model_dope_score)) loop = True else: failure = a.loop.outputs[i]["failure"] logger.debug("Failure! {}".format(failure)) failures.append(failure) # Return the successfully calculated models and a loop flag indicating # whether the returned models are loop refined or not return result, loop, failures