def __init__(self, scwrl_exe=None, workdir=None): self.workdir = workdir if self.workdir is None: self.workdir = os.getcwd() if not ample_util.is_exe(scwrl_exe): raise RuntimeError( "scwrl_exe {0} cannot be found.".format(scwrl_exe)) self.scwrl_exe = scwrl_exe
def align_mustang(models, mustang_exe=None, work_dir=None): if not ample_util.is_exe(mustang_exe): msg = "Cannot find mustang executable: {0}".format(mustang_exe) raise RuntimeError(msg) owd = os.getcwd() if not work_dir: work_dir = owd work_dir = os.path.abspath(work_dir) if not os.path.isdir(work_dir): os.mkdir(work_dir) os.chdir(work_dir) logfile = os.path.join(work_dir, 'mustang.log') basename = 'mustang' cmd = [mustang_exe, '-F', 'fasta', '-o', basename, '-i' ] + models rtn = ample_util.run_command(cmd, logfile=logfile, directory=work_dir) if not rtn == 0: msg = "Error running mustang. Check logfile: {0}".format(logfile) raise RuntimeError(msg) alignment_file = os.path.join(work_dir, basename + ".afasta") if not os.path.isfile(alignment_file): msg = "Could not find alignment file: {0} after running mustang!".format(alignment_file) raise RuntimeError(msg) os.chdir(owd) # always need to go back to original directory return alignment_file
def align_mustang(models, mustang_exe=None, work_dir=None): if not ample_util.is_exe(mustang_exe): msg = "Cannot find mustang executable: {0}".format(mustang_exe) raise RuntimeError(msg) owd = os.getcwd() if not work_dir: work_dir = owd work_dir = os.path.abspath(work_dir) if not os.path.isdir(work_dir): os.mkdir(work_dir) os.chdir(work_dir) logfile = os.path.join(work_dir, 'mustang.log') basename = 'mustang' cmd = [mustang_exe, '-F', 'fasta', '-o', basename, '-i'] + models rtn = ample_util.run_command(cmd, logfile=logfile, directory=work_dir) if not rtn == 0: msg = "Error running mustang. Check logfile: {0}".format(logfile) raise RuntimeError(msg) alignment_file = os.path.join(work_dir, basename + ".afasta") if not os.path.isfile(alignment_file): msg = "Could not find alignment file: {0} after running mustang!".format( alignment_file) raise RuntimeError(msg) os.chdir(owd) # always need to go back to original directory return alignment_file
def process_models(self, optd): process_models.extract_and_validate_models(optd) # Need to check if Quark and handle things accordingly if optd['quark_models']: # We always add sidechains to QUARK models if SCWRL is installed if ample_util.is_exe(optd['scwrl_exe']): optd['use_scwrl'] = True else: # No SCWRL so don't do owt with the side chains logger.info('Using QUARK models but SCWRL is not installed ' 'so only using %s sidechains', UNMODIFIED) optd['side_chain_treatments'] = [UNMODIFIED] ample_util.save_amoptd(optd)
def process_models(self, optd): process_models.extract_and_validate_models(optd) # Need to check if Quark and handle things accordingly if optd['quark_models']: # We always add sidechains to QUARK models if SCWRL is installed if ample_util.is_exe(optd['scwrl_exe']): optd['use_scwrl'] = True else: # No SCWRL so don't do owt with the side chains logger.info( 'Using QUARK models but SCWRL is not installed ' 'so only using %s sidechains', UNMODIFIED) optd['side_chain_treatments'] = [UNMODIFIED] ample_util.save_amoptd(optd)
def align_gesamt(models, gesamt_exe=None, work_dir=None): if not ample_util.is_exe(gesamt_exe): msg = "Cannot find gesamt executable: {0}".format(gesamt_exe) raise RuntimeError(msg) owd = os.getcwd() if not work_dir: work_dir = owd work_dir = os.path.abspath(work_dir) if not os.path.isdir(work_dir): os.mkdir(work_dir) os.chdir(work_dir) # Need to map chain name to pdb model2chain = {} for m in models: seqd = sequence_util.sequence(m) if len(seqd) != 1: msg = "Model {0} does not contain a single chain, got: {1}".format( *seqd.keys()) raise RuntimeError(msg) model2chain[m] = seqd.keys()[0] basename = 'gesamt' logfile = os.path.join(work_dir, 'gesamt.log') alignment_file = os.path.join(work_dir, basename + ".afasta") # Build up command-line cmd = [gesamt_exe] # We iterate through the models to make sure the order stays the same for m in models: cmd += [m, '-s', model2chain[m]] cmd += ['-o', '{0}.pdb'.format(basename), '-a', alignment_file] rtn = ample_util.run_command(cmd, logfile=logfile, directory=work_dir) if not rtn == 0: msg = "Error running gesamt. Check logfile: {0}".format(logfile) raise RuntimeError(msg) if not os.path.isfile(alignment_file): msg = "Gesamt did not generate an alignment file.\nPlease check the logfile: {0}".format( logfile) raise RuntimeError(msg) if sys.platform.startswith("win"): alignment_file = _gesamt_aln_windows_fix(alignment_file) os.chdir(owd) # always need to go back to original directory return alignment_file
def align_gesamt(models, gesamt_exe=None, work_dir=None): if not ample_util.is_exe(gesamt_exe): msg = "Cannot find gesamt executable: {0}".format(gesamt_exe) raise RuntimeError(msg) owd = os.getcwd() if not work_dir: work_dir = owd work_dir = os.path.abspath(work_dir) if not os.path.isdir(work_dir): os.mkdir(work_dir) os.chdir(work_dir) # Need to map chain name to pdb model2chain = {} for m in models: seqd = pdb_edit.sequence(m) if len(seqd) != 1: msg = "Model {0} does not contain a single chain, got: {1}".format(*seqd.keys()) raise RuntimeError(msg) model2chain[m] = seqd.keys()[0] basename = 'gesamt' logfile = os.path.join(work_dir, 'gesamt.log') alignment_file = os.path.join(work_dir, basename + ".afasta") # Build up command-line cmd = [gesamt_exe] # We iterate through the models to make sure the order stays the same for m in models: cmd += [ m, '-s', model2chain[m] ] cmd += ['-o', '{0}.pdb'.format(basename), '-a', alignment_file] rtn = ample_util.run_command(cmd, logfile=logfile, directory=work_dir) if not rtn == 0: msg = "Error running gesamt. Check logfile: {0}".format(logfile) raise RuntimeError(msg) if not os.path.isfile(alignment_file): msg = "Gesamt did not generate an alignment file.\nPlease check the logfile: {0}".format(logfile) raise RuntimeError(msg) if sys.platform.startswith("win"): alignment_file = _gesamt_aln_windows_fix(alignment_file) os.chdir(owd) # always need to go back to original directory return alignment_file
def modelling(self, optd, rosetta_modeller=None): if not (optd['import_models'] or optd['make_frags'] or optd['make_models'] or optd['nmr_remodel']): return # Set the direcotry where the final models will end up optd['models_dir'] = os.path.join(optd['work_dir'], 'models') if not os.path.isdir(optd['models_dir']): os.mkdir(optd['models_dir']) if not rosetta_modeller: rosetta_modeller = options_processor.process_rosetta_options(optd) # Make Rosetta fragments if optd['make_frags']: rosetta_modeller.generate_fragments(optd) optd['frags_3mers'] = rosetta_modeller.frags_3mers optd['frags_9mers'] = rosetta_modeller.frags_9mers optd['psipred_ss2'] = rosetta_modeller.psipred_ss2 if optd["use_contacts"] and not optd['restraints_file']: con_util = contact_util.ContactUtil( optd['fasta'], 'fasta', contact_file=optd['contact_file'], contact_format=optd['contact_format'], bbcontacts_file=optd['bbcontacts_file'], bbcontacts_format=optd["bbcontacts_format"], cutoff_factor=optd['restraints_factor'], distance_to_neighbor=optd['distance_to_neighbour']) optd["contacts_dir"] = os.path.join(optd["work_dir"], "contacts") if not os.path.isdir(optd["contacts_dir"]): os.mkdir(optd["contacts_dir"]) if con_util.require_contact_prediction: if con_util.found_ccmpred_contact_prediction_deps: con_util.predict_contacts_from_sequence( wdir=optd["contacts_dir"]) optd["contact_file"] = con_util.contact_file optd["contact_format"] = con_util.contact_format if con_util.do_contact_analysis: plot_file = os.path.join(optd['contacts_dir'], optd['name'] + ".cm.png") if optd['native_pdb'] and optd['native_pdb_std']: structure_file = optd['native_pdb_std'] elif optd["native_pdb"]: structure_file = optd['native_std'] else: structure_file = None optd['contact_map'], optd['contact_ppv'] = con_util.summarize( plot_file, structure_file, 'pdb', optd['native_cutoff']) restraints_file = os.path.join(optd['contacts_dir'], optd['name'] + ".cst") optd['restraints_file'] = con_util.write_restraints( restraints_file, optd['restraints_format'], optd['energy_function']) else: con_util = None else: con_util = None if optd['make_models'] and optd['restraints_file']: rosetta_modeller.restraints_file = optd['restraints_file'] if optd['make_models']: logger.info('----- making Rosetta models--------') if optd['nmr_remodel']: try: optd['models'] = rosetta_modeller.nmr_remodel( models=optd['models'], ntimes=optd['nmr_process'], alignment_file=optd['alignment_file'], remodel_fasta=optd['nmr_remodel_fasta'], monitor=monitor) except Exception as e: msg = "Error remodelling NMR ensemble: {0}".format(e) exit_util.exit_error(msg, sys.exc_info()[2]) else: logger.info('making %s models...', optd['nmodels']) try: optd['models'] = rosetta_modeller.ab_initio_model( monitor=monitor) except Exception as e: msg = "Error running ROSETTA to create models: {0}".format( e) exit_util.exit_error(msg, sys.exc_info()[2]) if not pdb_edit.check_pdb_directory(optd['models_dir'], sequence=optd['sequence']): msg = "Problem with rosetta pdb files - please check the log for more information" exit_util.exit_error(msg) logger.info('Modelling complete - models stored in: %s\n', optd['models_dir']) elif optd['import_models']: logger.info('Importing models from directory: %s\n', optd['models_dir']) if optd['homologs']: optd['models'] = ample_util.extract_and_validate_models( optd, sequence=None, single=True, allsame=False) else: optd['models'] = ample_util.extract_and_validate_models(optd) # Need to check if Quark and handle things accordingly if optd['quark_models']: # We always add sidechains to QUARK models if SCWRL is installed if ample_util.is_exe(optd['scwrl_exe']): optd['use_scwrl'] = True else: # No SCWRL so don't do owt with the side chains logger.info( 'Using QUARK models but SCWRL is not installed ' 'so only using %s sidechains', UNMODIFIED) optd['side_chain_treatments'] = [UNMODIFIED] # Sub-select the decoys using contact information if con_util and optd['subselect_mode'] and not (optd['nmr_model_in'] or optd['nmr_remodel']): logger.info('Subselecting models from directory using ' 'provided contact information') subselect_data = con_util.subselect_decoys( optd['models'], 'pdb', mode=optd['subselect_mode'], **optd) optd['models'] = zip(*subselect_data)[0] optd['subselect_data'] = dict(subselect_data) ample_util.save_amoptd(optd)
def calculate_truncations(self, models=None, truncation_method=None, percent_truncation=None, percent_fixed_intervals=None, truncation_pruning=None, residue_scores=None, alignment_file=None, homologs=False): """Returns a list of Truncation objects, one for each truncation level. This method doesn't do any truncating - it just calculates the data for each truncation level. """ assert (len(models) > 1 or residue_scores), "Cannot truncate as < 2 models!" assert truncation_method and percent_truncation, "Missing arguments: {0} : {1}".format( truncation_method, percent_truncation) assert ample_util.is_exe(self.theseus_exe), "Cannot find theseus_exe: {0}".format(self.theseus_exe) # Create the directories we'll be working in assert self.work_dir and os.path.isdir(self.work_dir), "truncate_models needs a self.work_dir" os.chdir(self.work_dir) self.models = models # Calculate variances between pdb and align them (we currently only require the aligned models for homologs) if truncation_method != TRUNCATION_METHODS.SCORES: run_theseus = theseus.Theseus(work_dir=self.work_dir, theseus_exe=self.theseus_exe) try: run_theseus.superpose_models(self.models, homologs=homologs, alignment_file=alignment_file) self.aligned_models = run_theseus.aligned_models except RuntimeError as e: logger.critical(e) return [] if homologs: # If using homologs, now trim down to the core. We only do this here so that we are using the aligned models from # theseus, which makes it easier to see what the truncation is doing. models = model_core_from_fasta( self.aligned_models, alignment_file=alignment_file, work_dir=os.path.join(self.work_dir, 'core_models')) # Unfortunately Theseus doesn't print all residues in its output format, so we can't use the variances we calculated before and # need to calculate the variances of the core models try: run_theseus.superpose_models(models, homologs=homologs, basename='homologs_core') self.models = run_theseus.aligned_models self.aligned_models = run_theseus.aligned_models except RuntimeError as e: logger.critical(e) return [] if truncation_method == TRUNCATION_METHODS.SCORES: var_by_res = self._convert_residue_scores(residue_scores) else: var_by_res = run_theseus.var_by_res if len(var_by_res) <= 0: raise RuntimeError("Error reading residue variances!") logger.info('Using truncation method: %s', truncation_method) # Calculate which residues to keep under the different methods if truncation_method in [ TRUNCATION_METHODS.PERCENT, TRUNCATION_METHODS.PERCENT_FIXED, TRUNCATION_METHODS.SCORES ]: truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs = calculate_residues_by_percent( var_by_res, percent_truncation=percent_truncation, percent_fixed_intervals=percent_fixed_intervals) elif truncation_method == TRUNCATION_METHODS.FOCUSED: truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs = calculate_residues_focussed( var_by_res) else: raise RuntimeError("Unrecognised ensembling mode: {}".format(truncation_method)) # Somewhat of a hack to save the data so we can put it in the amoptd self.truncation_levels = truncation_levels self.truncation_variances = truncation_variances self.truncation_nresidues = [len(r) for r in truncation_residues] truncations = [] for tlevel, tvar, tresidues, tresidue_idxs in zip(truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs): # Prune singletone/doubletone etc. residues if required if truncation_pruning == 'single': logger.debug("truncation_pruning: %s", truncation_pruning) tresidue_idxs, pruned_residues = prune_residues(tresidue_idxs, chunk_size=1, allowed_gap=2) if pruned_residues: logger.debug("prune_residues removing: %s", pruned_residues) elif truncation_pruning is None: pass else: raise RuntimeError("Unrecognised truncation_pruning: {0}".format(truncation_pruning)) # Skip if there are no residues if not tresidue_idxs: logger.debug("Skipping truncation level %s with variance %s as no residues", tlevel, tvar) continue truncation = Truncation() truncation.method = truncation_method truncation.percent = percent_truncation truncation.level = tlevel truncation.variances = tvar truncation.residues = tresidues truncation.residues_idxs = tresidue_idxs truncations.append(truncation) return truncations
def process_modelling_options(optd): """ Modelling and ensemble options""" # Set default name for modelling directory optd['models_dir'] = os.path.join(optd['work_dir'], "models") # Check if importing ensembles if optd['ensembles']: # checks are made in ensembles.import_ensembles optd['import_ensembles'] = True optd['make_frags'] = False optd['make_models'] = False elif optd['cluster_dir']: if not os.path.isdir(optd['cluster_dir']): raise RuntimeError("Import cluster cannot find directory: {0}".format(optd['cluster_dir'])) models = glob.glob(os.path.join(optd['cluster_dir'], "*.pdb")) if not models: raise RuntimeError("Import cluster cannot find pdbs in directory: {0}".format(optd['cluster_dir'])) logger.info("Importing pre-clustered models from directory: %s\n", optd['cluster_dir']) optd['cluster_method'] = 'import' optd['models'] = optd['cluster_dir'] optd['make_frags'] = False optd['make_models'] = False elif optd['ideal_helices']: optd['make_frags'] = False optd['make_models'] = False elif optd['homologs']: optd['make_frags'] = False optd['make_models'] = False if not os.path.isfile(str(optd['alignment_file'])): # We need to use gesamt or mustang to do the alignment if optd['homolog_aligner'] == 'gesamt': if not ample_util.is_exe(str(optd['gesamt_exe'])): optd['gesamt_exe'] = os.path.join(os.environ['CCP4'], 'bin', 'gesamt' + ample_util.EXE_EXT) if not ample_util.is_exe(str(optd['gesamt_exe'])): raise RuntimeError('Using homologs without an alignment file and cannot find gesamt_exe: {0}'.format( optd['gesamt_exe'])) elif optd['homolog_aligner'] == 'mustang': if not ample_util.is_exe(str(optd['mustang_exe'])): raise RuntimeError('Using homologs without an alignment file and cannot find mustang_exe: {0}'.format( optd['mustang_exe'])) else: raise RuntimeError('Unknown homolog_aligner: {0}'.format(optd['homolog_aligner'])) if not os.path.isdir(str(optd['models'])): raise RuntimeError("Homologs option requires a directory of pdb models to be supplied\n" + \ "Please supply the models with the -models flag") optd['import_models'] = True elif optd['models']: if not os.path.exists(optd['models']): raise RuntimeError("Cannot find -models path: {}".format(optd['models'])) optd['import_models'] = True optd['make_frags'] = False optd['make_models'] = False elif optd['single_model']: optd['cluster_method'] = "skip" optd['make_frags'] = False optd['make_models'] = False optd['single_model_mode'] = True # Check import flags if optd['import_ensembles'] and (optd['import_models']): raise RuntimeError("Cannot import both models and ensembles/clusters!") # NMR Checks if optd['nmr_model_in']: logger.info("Using nmr_model_in file: %s", optd['nmr_model_in']) if not os.path.isfile(optd['nmr_model_in']): msg = "nmr_model_in flag given, but cannot find file: {0}".format(optd['nmr_model_in']) exit_util.exit_error(msg) if optd['nmr_remodel']: optd['make_models'] = True if optd['nmr_remodel_fasta']: if not os.path.isfile(optd['nmr_remodel_fasta']): raise RuntimeError("Cannot find nmr_remodel_fasta file: {0}".format(optd['nmr_remodel_fasta'])) else: optd['nmr_remodel_fasta'] = optd['fasta'] msg = "NMR model will be remodelled with ROSETTA using the sequence from: {0}".format( optd['nmr_remodel_fasta']) logger.info(msg) if not (optd['frags_3mers'] and optd['frags_9mers']): optd['make_frags'] = True msg = "nmr_remodel - will be making our own fragment files" logger.info(msg) else: if not (os.path.isfile(optd['frags_3mers']) and os.path.isfile(optd['frags_9mers'])): raise RuntimeError("frags_3mers and frag_9mers files given, but cannot locate them:\n{0}\n{1}\n".format( optd['frags_3mers'], optd['frags_9mers'])) optd['make_frags'] = False else: optd['make_frags'] = False optd['make_models'] = False msg = "Running in NMR truncate only mode" logger.info(msg) elif optd['make_models']: if not os.path.isdir(optd['models_dir']): os.mkdir(optd['models_dir']) # If the user has given both fragment files we check they are ok and unset make_frags if optd['frags_3mers'] and optd['frags_9mers']: if not os.path.isfile(optd['frags_3mers']) or not os.path.isfile(optd['frags_9mers']): raise RuntimeError("frags_3mers and frag_9mers files given, but cannot locate them:\n{0}\n{1}\n".format( optd['frags_3mers'], optd['frags_9mers'])) optd['make_frags'] = False if optd['make_frags'] and (optd['frags_3mers'] or optd['frags_9mers']): raise RuntimeError("make_frags set to true, but you have given the path to the frags_3mers or frags_9mers") if not optd['make_frags'] and not (optd['frags_3mers'] and optd['frags_9mers']): msg = """*** Missing fragment files! *** Please supply the paths to the fragment files using the -frags_3mers and -frags_9mers flags. These can be generated using the Robetta server: http://robetta.bakerlab.org Please see the AMPLE documentation for further information.""" raise RuntimeError(msg) if optd['make_frags']: if optd['use_homs']: logger.info('Making fragments (including homologues)') else: logger.info('Making fragments EXCLUDING HOMOLOGUES') else: logger.info('NOT making Fragments') if optd['make_models']: logger.info('\nMaking Rosetta Models') else: logger.info('NOT making Rosetta Models')
def __init__(self, scwrl_exe=None, workdir=None ): self.workdir = workdir if self.workdir is None: self.workdir = os.getcwd() if not ample_util.is_exe(scwrl_exe): raise RuntimeError("scwrl_exe {0} cannot be found.".format(scwrl_exe)) self.scwrl_exe = scwrl_exe
def find_maxcluster(amoptd): """Return path to maxcluster binary. If we can't find one in the path, we create a $HOME/.ample directory and downlod it to there """ if amoptd['maxcluster_exe'] and ample_util.is_exe( amoptd['maxcluster_exe']): return amoptd['maxcluster_exe'] if not amoptd['maxcluster_exe']: if sys.platform.startswith("win"): amoptd['maxcluster_exe'] = 'maxcluster.exe' else: amoptd['maxcluster_exe'] = 'maxcluster' try: maxcluster_exe = ample_util.find_exe(amoptd['maxcluster_exe'], dirs=[amoptd['rcdir']]) except ample_util.FileNotFoundError: # Cannot find so we need to try and download it rcdir = amoptd['rcdir'] logger.info( "Cannot find maxcluster binary in path so attempting to download it directory: {0}" .format(rcdir)) if not os.path.isdir(rcdir): logger.info( "No ample rcdir found so creating in: {0}".format(rcdir)) os.mkdir(rcdir) url = None maxcluster_exe = os.path.join(rcdir, 'maxcluster') if sys.platform.startswith("linux"): bit = platform.architecture()[0] if bit == '64bit': url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster64bit' elif bit == '32bit': url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster' else: msg = "Unrecognised system type: {0} {1}".format( sys.platform, bit) exit_util.exit_error(msg) elif sys.platform.startswith("darwin"): url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster_i686_32bit.bin' #OSX PPC: http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster_PPC_32bit.bin elif sys.platform.startswith("win"): url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster.exe' maxcluster_exe = os.path.join(rcdir, 'maxcluster.exe') else: msg = "Unrecognised system type: {0}".format(sys.platform) exit_util.exit_error(msg) logger.info( "Attempting to download maxcluster binary from: {0}".format(url)) try: urllib.urlretrieve(url, maxcluster_exe) except Exception, e: msg = "Error downloading maxcluster executable: {0}\n{1}".format( url, e) exit_util.exit_error(msg) # make executable os.chmod(maxcluster_exe, 0o777)
def calculate_truncations(self, models=None, truncation_method=None, percent_truncation=None, percent_fixed_intervals=None, truncation_pruning=None, residue_scores=None, alignment_file=None, homologs=False): """Returns a list of Truncation objects, one for each truncation level. This method doesn't do any truncating - it just calculates the data for each truncation level. """ assert (len(models) > 1 or residue_scores), "Cannot truncate as < 2 models!" assert truncation_method and percent_truncation, "Missing arguments: {0} : {1}".format( truncation_method, percent_truncation) assert ample_util.is_exe(self.theseus_exe), "Cannot find theseus_exe: {0}".format(self.theseus_exe) # Create the directories we'll be working in assert self.work_dir and os.path.isdir(self.work_dir), "truncate_models needs a self.work_dir" os.chdir(self.work_dir) self.models = models # Calculate variances between pdb and align them (we currently only require the aligned models for homologs) if truncation_method != TRUNCATION_METHODS.SCORES: run_theseus = theseus.Theseus(work_dir=self.work_dir, theseus_exe=self.theseus_exe) try: run_theseus.superpose_models(self.models, homologs=homologs, alignment_file=alignment_file) self.aligned_models = run_theseus.aligned_models except RuntimeError as e: logger.critical(e) return [] if homologs: # If using homologs, now trim down to the core. We only do this here so that we are using the aligned models from # theseus, which makes it easier to see what the truncation is doing. models = model_core_from_fasta( self.aligned_models, alignment_file=alignment_file, work_dir=os.path.join(self.work_dir, 'core_models')) # Unfortunately Theseus doesn't print all residues in its output format, so we can't use the variances we calculated before and # need to calculate the variances of the core models try: run_theseus.superpose_models(models, homologs=homologs, basename='homologs_core') self.models = run_theseus.aligned_models self.aligned_models = run_theseus.aligned_models except RuntimeError as e: logger.critical(e) return [] if truncation_method == TRUNCATION_METHODS.SCORES: var_by_res = self._convert_residue_scores(residue_scores) else: var_by_res = run_theseus.var_by_res if len(var_by_res) <= 0: raise RuntimeError("Error reading residue variances!") logger.info('Using truncation method: %s', truncation_method) # Calculate which residues to keep under the different methods if truncation_method in [ TRUNCATION_METHODS.PERCENT, TRUNCATION_METHODS.PERCENT_FIXED, TRUNCATION_METHODS.SCORES ]: truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs = calculate_residues_by_percent( var_by_res, percent_truncation=percent_truncation, percent_fixed_intervals=percent_fixed_intervals) elif truncation_method == TRUNCATION_METHODS.FOCUSED: truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs = calculate_residues_focussed( var_by_res) else: raise RuntimeError("Unrecognised ensembling mode: {}".format(truncation_method)) # Somewhat of a hack to save the data so we can put it in the amoptd self.truncation_levels = truncation_levels self.truncation_variances = truncation_variances self.truncation_nresidues = [len(r) for r in truncation_residues] truncations = [] for tlevel, tvar, tresidues, tresidue_idxs in zip(truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs): # Prune singletone/doubletone etc. residues if required logger.debug("truncation_pruning: %s", truncation_pruning) if truncation_pruning == 'single': tresidue_idxs, pruned_residues = prune_residues(tresidue_idxs, chunk_size=1, allowed_gap=2) if pruned_residues: logger.debug("prune_residues removing: %s", pruned_residues) elif truncation_pruning is None: pass else: raise RuntimeError("Unrecognised truncation_pruning: {0}".format(truncation_pruning)) # Skip if there are no residues if not tresidue_idxs: logger.debug("Skipping truncation level %s with variance %s as no residues", tlevel, tvar) continue truncation = Truncation() truncation.method = truncation_method truncation.percent = percent_truncation truncation.level = tlevel truncation.variances = tvar truncation.residues = tresidues truncation.residues_idxs = tresidue_idxs truncations.append(truncation) return truncations
def process_modelling_options(optd): """ Modelling and ensemble options""" # Set default name for modelling directory optd['models_dir'] = os.path.join(optd['work_dir'], "models") # Check if importing ensembles if optd['ensembles']: # checks are made in ensembles.import_ensembles optd['import_ensembles'] = True optd['make_frags'] = False optd['make_models'] = False elif optd['cluster_dir']: if not os.path.isdir(optd['cluster_dir']): raise RuntimeError( "Import cluster cannot find directory: {0}".format( optd['cluster_dir'])) models = glob.glob(os.path.join(optd['cluster_dir'], "*.pdb")) if not models: raise RuntimeError( "Import cluster cannot find pdbs in directory: {0}".format( optd['cluster_dir'])) logger.info("Importing pre-clustered models from directory: %s\n", optd['cluster_dir']) optd['cluster_method'] = 'import' optd['models'] = optd['cluster_dir'] optd['make_frags'] = False optd['make_models'] = False elif optd['ideal_helices']: optd['make_frags'] = False optd['make_models'] = False elif optd['homologs']: optd['make_frags'] = False optd['make_models'] = False if not os.path.isfile(str(optd['alignment_file'])): # We need to use gesamt or mustang to do the alignment if optd['homolog_aligner'] == 'gesamt': if not ample_util.is_exe(str(optd['gesamt_exe'])): optd['gesamt_exe'] = os.path.join( os.environ['CCP4'], 'bin', 'gesamt' + ample_util.EXE_EXT) if not ample_util.is_exe(str(optd['gesamt_exe'])): raise RuntimeError( 'Using homologs without an alignment file and cannot find gesamt_exe: {0}' .format(optd['gesamt_exe'])) elif optd['homolog_aligner'] == 'mustang': if not ample_util.is_exe(str(optd['mustang_exe'])): raise RuntimeError( 'Using homologs without an alignment file and cannot find mustang_exe: {0}' .format(optd['mustang_exe'])) else: raise RuntimeError('Unknown homolog_aligner: {0}'.format( optd['homolog_aligner'])) if not os.path.isdir(str(optd['models'])): raise RuntimeError( "Homologs option requires a directory of pdb models to be supplied\n" + "Please supply the models with the -models flag") optd['import_models'] = True elif optd['models']: if not os.path.exists(optd['models']): raise RuntimeError("Cannot find -models path: {}".format( optd['models'])) optd['import_models'] = True optd['make_frags'] = False optd['make_models'] = False elif optd['single_model']: optd['cluster_method'] = "skip" optd['make_frags'] = False optd['make_models'] = False optd['single_model_mode'] = True # Check import flags if optd['import_ensembles'] and (optd['import_models']): raise RuntimeError("Cannot import both models and ensembles/clusters!") # NMR Checks if optd['nmr_model_in']: logger.info("Using nmr_model_in file: %s", optd['nmr_model_in']) if not os.path.isfile(optd['nmr_model_in']): msg = "nmr_model_in flag given, but cannot find file: {0}".format( optd['nmr_model_in']) exit_util.exit_error(msg) if optd['nmr_remodel']: optd['make_models'] = True if optd['nmr_remodel_fasta']: if not os.path.isfile(optd['nmr_remodel_fasta']): raise RuntimeError( "Cannot find nmr_remodel_fasta file: {0}".format( optd['nmr_remodel_fasta'])) else: optd['nmr_remodel_fasta'] = optd['fasta'] msg = "NMR model will be remodelled with ROSETTA using the sequence from: {0}".format( optd['nmr_remodel_fasta']) logger.info(msg) if not (optd['frags_3mers'] and optd['frags_9mers']): optd['make_frags'] = True msg = "nmr_remodel - will be making our own fragment files" logger.info(msg) else: if not (os.path.isfile(optd['frags_3mers']) and os.path.isfile(optd['frags_9mers'])): raise RuntimeError( "frags_3mers and frag_9mers files given, but cannot locate them:\n{0}\n{1}\n" .format(optd['frags_3mers'], optd['frags_9mers'])) optd['make_frags'] = False else: optd['make_frags'] = False optd['make_models'] = False msg = "Running in NMR truncate only mode" logger.info(msg) elif optd['make_models']: if not os.path.isdir(optd['models_dir']): os.mkdir(optd['models_dir']) # If the user has given both fragment files we check they are ok and unset make_frags if optd['frags_3mers'] and optd['frags_9mers']: if not os.path.isfile(optd['frags_3mers']) or not os.path.isfile( optd['frags_9mers']): raise RuntimeError( "frags_3mers and frag_9mers files given, but cannot locate them:\n{0}\n{1}\n" .format(optd['frags_3mers'], optd['frags_9mers'])) optd['make_frags'] = False if optd['make_frags'] and (optd['frags_3mers'] or optd['frags_9mers']): raise RuntimeError( "make_frags set to true, but you have given the path to the frags_3mers or frags_9mers" ) if not optd['make_frags'] and not (optd['frags_3mers'] and optd['frags_9mers']): msg = """*** Missing fragment files! *** Please supply the paths to the fragment files using the -frags_3mers and -frags_9mers flags. These can be generated using the Robetta server: http://robetta.bakerlab.org Please see the AMPLE documentation for further information.""" raise RuntimeError(msg) if optd['make_frags']: if optd['use_homs']: logger.info('Making fragments (including homologues)') else: logger.info('Making fragments EXCLUDING HOMOLOGUES') else: logger.info('NOT making Fragments') if optd['make_models']: logger.info('\nMaking Rosetta Models') else: logger.info('NOT making Rosetta Models')
logger.info( "Importing pre-clustered models from directory: {0}\n".format( optd['cluster_dir'])) optd['cluster_method'] = 'import' optd['make_frags'] = False optd['make_models'] = False elif optd['ideal_helices']: optd['make_frags'] = False optd['make_models'] = False elif optd['homologs']: optd['make_frags'] = False optd['make_models'] = False if not os.path.isfile(str(optd['alignment_file'])): # We need to use gesamt or mustang to do the alignment if optd['homolog_aligner'] == 'gesamt': if not ample_util.is_exe(str(optd['gesamt_exe'])): optd['gesamt_exe'] = os.path.join( os.environ['CCP4'], 'bin', 'gesamt' + ample_util.EXE_EXT) if not ample_util.is_exe(str(optd['gesamt_exe'])): msg = 'Using homologs without an alignment file and cannot find gesamt_exe: {0}'.format( optd['gesamt_exe']) exit_util.exit_error(msg) elif optd['homolog_aligner'] == 'mustang': if not ample_util.is_exe(str(optd['mustang_exe'])): msg = 'Using homologs without an alignment file and cannot find mustang_exe: {0}'.format( optd['mustang_exe']) exit_util.exit_error(msg) else: msg = 'Unknown homolog_aligner: {0}'.format( optd['homolog_aligner'])