Пример #1
0
 def benchmarking(self, optd):
     if optd['submit_cluster']:
         # Pickle dictionary so it can be opened by the job to get the parameters
         ample_util.save_amoptd(optd)
         script = benchmark_util.cluster_script(optd)
         workers_util.run_scripts(
             job_scripts=[script],
             monitor=monitor,
             nproc=optd['nproc'],
             job_time=43200,
             job_name='benchmark',
             submit_cluster=optd['submit_cluster'],
             submit_qtype=optd['submit_qtype'],
             submit_queue=optd['submit_queue'],
             submit_pe_lsf=optd['submit_pe_lsf'],
             submit_pe_sge=optd['submit_pe_sge'],
             submit_array=optd['submit_array'],
             submit_max_array=optd['submit_max_array'],
         )
         # queue finished so unpickle results
         optd.update(ample_util.read_amoptd(optd['results_path']))
     else:
         benchmark_util.analyse(optd)
         ample_util.save_amoptd(optd)
     return
Пример #2
0
    def benchmarking(self, optd):
        if optd['submit_qtype'] != 'local':
            # Pickle dictionary so it can be opened by the job to get the parameters
            ample_util.save_amoptd(optd)
            script = benchmark_util.cluster_script(optd)
            with TaskFactory(
                    optd['submit_qtype'],
                    script,
                    cwd=optd['work_dir'],
                    environment=optd['submit_pe'],
                    run_time=43200,
                    name='benchmark',
                    nprocesses=optd['nproc'],
                    max_array_size=optd['submit_max_array'],
                    queue=optd['submit_queue'],
                    shell="/bin/bash",
            ) as task:
                task.run()
                task.wait(interval=5, monitor_f=monitor)

            # queue finished so unpickle results
            optd.update(ample_util.read_amoptd(optd['results_path']))
        else:
            benchmark_util.analyse(optd)
            ample_util.save_amoptd(optd)
        return
Пример #3
0
 def process_models(self, optd):
     process_models.extract_and_validate_models(optd)
     # Need to check if Quark and handle things accordingly
     if optd['quark_models']:
         # We always add sidechains to QUARK models if SCWRL is installed
         if ample_util.is_exe(optd['scwrl_exe']):
             optd['use_scwrl'] = True
         else:
             # No SCWRL so don't do owt with the side chains
             logger.info('Using QUARK models but SCWRL is not installed '
                         'so only using %s sidechains', UNMODIFIED)
             optd['side_chain_treatments'] = [UNMODIFIED]
     ample_util.save_amoptd(optd)
Пример #4
0
 def process_models(self, optd):
     process_models.extract_and_validate_models(optd)
     # Need to check if Quark and handle things accordingly
     if optd['quark_models']:
         # We always add sidechains to QUARK models if SCWRL is installed
         if ample_util.is_exe(optd['scwrl_exe']):
             optd['use_scwrl'] = True
         else:
             # No SCWRL so don't do owt with the side chains
             logger.info(
                 'Using QUARK models but SCWRL is not installed '
                 'so only using %s sidechains', UNMODIFIED)
             optd['side_chain_treatments'] = [UNMODIFIED]
     ample_util.save_amoptd(optd)
Пример #5
0
 def benchmarking(self, optd):
     if optd['submit_cluster']:
         # Pickle dictionary so it can be opened by the job to get the parameters
         ample_util.save_amoptd(optd)
         script = benchmark_util.cluster_script(optd)
         workers_util.run_scripts(
             job_scripts=[script],
             monitor=monitor,
             nproc=optd['nproc'],
             job_time=43200,
             job_name='benchmark',
             submit_cluster=optd['submit_cluster'],
             submit_qtype=optd['submit_qtype'],
             submit_queue=optd['submit_queue'],
             submit_pe_lsf=optd['submit_pe_lsf'],
             submit_pe_sge=optd['submit_pe_sge'],
             submit_array=optd['submit_array'],
             submit_max_array=optd['submit_max_array'])
         # queue finished so unpickle results
         optd.update(ample_util.read_amoptd(optd['results_path']))
     else:
         benchmark_util.analyse(optd)
         ample_util.save_amoptd(optd)
     return
Пример #6
0
    def main(self, args=None):
        """Main AMPLE routine.

        We require this as the multiprocessing module (only on **!!*%$$!! Windoze)
        requires that the main module can be imported. We there need ample to be
        a python script that can be imported, hence the main routine with its
        calling protected by the if __name__=="__main__":...

        args is an option argument that can contain the command-line arguments
        for the program - required for testing.
        """
        argso = argparse_util.process_command_line(args=args)

        self.amopt = amopt = config_util.AMPLEConfigOptions()
        amopt.populate(argso)

        # Setup things like logging, file structure, etc...
        amopt.d = self.setup(amopt.d)
        rosetta_modeller = options_processor.process_rosetta_options(amopt.d)

        # Display the parameters used
        logger.debug(amopt.prettify_parameters())

        amopt.write_config_file()
        #######################################################
        # SCRIPT PROPER STARTS HERE
        time_start = time.time()

        # Create function for monitoring jobs - static function decorator?
        if self.ample_output:

            def monitor():
                return self.ample_output.display_results(amopt.d)
        else:
            monitor = None

        if amopt.d['benchmark_mode'] and amopt.d['native_pdb']:
            # Process the native before we do anything else
            benchmark_util.analysePdb(amopt.d)

        # Create constituent models from an NMR ensemble
        if amopt.d['nmr_model_in']:
            nmr_mdir = os.path.join(amopt.d['work_dir'], 'nmr_models')
            amopt.d['modelling_workdir'] = nmr_mdir
            logger.info(
                'Splitting NMR ensemble into constituent models in directory: {0}'
                .format(nmr_mdir))
            amopt.d['models'] = pdb_edit.split_pdb(amopt.d['nmr_model_in'],
                                                   directory=nmr_mdir,
                                                   strip_hetatm=True,
                                                   same_size=True)
            logger.info('NMR ensemble contained {0} models'.format(
                len(amopt.d['models'])))

        # Modelling business happens here
        self.modelling(amopt.d, rosetta_modeller)
        amopt.write_config_file()

        # Ensembling business next
        if amopt.d['make_ensembles']:
            self.ensembling(amopt.d)
            amopt.write_config_file()

        # Some MR here
        if amopt.d['do_mr']:
            self.molecular_replacement(amopt.d)
            amopt.write_config_file()

        # Timing data
        time_stop = time.time()
        elapsed_time = time_stop - time_start
        run_in_min = elapsed_time / 60
        run_in_hours = run_in_min / 60
        msg = os.linesep + \
            'All processing completed  (in {0:6.2F} hours)'.format(
                run_in_hours) + os.linesep
        msg += '----------------------------------------' + os.linesep
        logging.info(msg)

        # Benchmark mode
        if amopt.d['benchmark_mode']:
            self.benchmarking(amopt.d)
            amopt.write_config_file()

        amopt.write_config_file()
        # Flag to show that we reached the end without error - useful for integration testing
        amopt.d['AMPLE_finished'] = True
        ample_util.save_amoptd(amopt.d)

        logger.info("AMPLE finished at: %s",
                    time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
        ref_mgr = reference_manager.ReferenceManager(amopt.d)
        ref_mgr.save_citations_to_file(amopt.d)
        logger.info(ref_mgr.citations_as_text)
        logger.info(reference_manager.footer)

        # Finally update pyrvapi results
        if self.ample_output:
            self.ample_output.display_results(amopt.d)
            self.ample_output.rvapi_shutdown(amopt.d)

        self.cleanup(amopt.d)
        return
Пример #7
0
    def molecular_replacement(self, optd):

        if not optd['mrbump_scripts']:
            # MRBUMP analysis of the ensembles
            logger.info('----- Running MRBUMP on ensembles--------\n\n')
            if len(optd['ensembles']) < 1:
                msg = "ERROR! Cannot run MRBUMP as there are no ensembles!"
                exit_util.exit_error(msg)

            if optd['mrbump_dir'] is None:
                bump_dir = os.path.join(optd['work_dir'], 'MRBUMP')
                optd['mrbump_dir'] = bump_dir
            else:
                bump_dir = optd['mrbump_dir']
            if not os.path.exists(bump_dir):
                os.mkdir(bump_dir)

            optd['mrbump_results'] = []
            logger.info("Running MRBUMP jobs in directory: %s", bump_dir)

            # Set an ensemble-specific phaser_rms if required
            if optd['phaser_rms'] == 'auto':
                ensembler.set_phaser_rms_from_subcluster_score(optd)

            # Sort the ensembles in a favourable way
            logger.info("Sorting ensembles")
            sort_keys = [
                'cluster_num', 'truncation_level',
                'subcluster_radius_threshold', 'side_chain_treatment'
            ]
            ensemble_pdbs_sorted = ensembler.sort_ensembles(
                optd['ensembles'],
                optd['ensembles_data'],
                keys=sort_keys,
                prioritise=True)

            # Create job scripts
            logger.info("Generating MRBUMP runscripts")
            optd['mrbump_scripts'] = mrbump_util.write_mrbump_files(
                ensemble_pdbs_sorted,
                optd,
                job_time=mrbump_util.MRBUMP_RUNTIME,
                ensemble_options=optd['ensemble_options'],
                directory=bump_dir)

        # Create function for monitoring jobs - static function decorator?
        if self.ample_output:

            def monitor():
                r = mrbump_util.ResultsSummary()
                r.extractResults(optd['mrbump_dir'], purge=bool(optd['purge']))
                optd['mrbump_results'] = r.results
                return self.ample_output.display_results(optd)
        else:
            monitor = None

        # Save results here so that we have the list of scripts and mrbump directory set
        ample_util.save_amoptd(optd)

        # Change to mrbump directory before running
        os.chdir(optd['mrbump_dir'])
        ok = workers_util.run_scripts(
            job_scripts=optd['mrbump_scripts'],
            monitor=monitor,
            check_success=mrbump_util.checkSuccess,
            early_terminate=optd['early_terminate'],
            nproc=optd['nproc'],
            job_time=mrbump_util.MRBUMP_RUNTIME,
            job_name='mrbump',
            submit_cluster=optd['submit_cluster'],
            submit_qtype=optd['submit_qtype'],
            submit_queue=optd['submit_queue'],
            submit_pe_lsf=optd['submit_pe_lsf'],
            submit_pe_sge=optd['submit_pe_sge'],
            submit_array=optd['submit_array'],
            submit_max_array=optd['submit_max_array'])

        if not ok:
            msg = "An error code was returned after running MRBUMP on the ensembles!\n" + \
                  "For further information check the logs in directory: {0}".format(optd['mrbump_dir'])
            logger.critical(msg)

        # Collect the MRBUMP results
        results_summary = mrbump_util.ResultsSummary()
        optd['mrbump_results'] = results_summary.extractResults(
            optd['mrbump_dir'], purge=bool(optd['purge']))
        optd['success'] = results_summary.success
        ample_util.save_amoptd(optd)
        summary = mrbump_util.finalSummary(optd)
        logger.info(summary)
Пример #8
0
    def modelling(self, optd, rosetta_modeller=None):
        if not (optd['import_models'] or optd['make_frags']
                or optd['make_models'] or optd['nmr_remodel']):
            return
        # Set the direcotry where the final models will end up
        optd['models_dir'] = os.path.join(optd['work_dir'], 'models')
        if not os.path.isdir(optd['models_dir']):
            os.mkdir(optd['models_dir'])
        if not rosetta_modeller:
            rosetta_modeller = options_processor.process_rosetta_options(optd)
        # Make Rosetta fragments
        if optd['make_frags']:
            rosetta_modeller.generate_fragments(optd)
            optd['frags_3mers'] = rosetta_modeller.frags_3mers
            optd['frags_9mers'] = rosetta_modeller.frags_9mers
            optd['psipred_ss2'] = rosetta_modeller.psipred_ss2

        if optd["use_contacts"] and not optd['restraints_file']:
            con_util = contact_util.ContactUtil(
                optd['fasta'],
                'fasta',
                contact_file=optd['contact_file'],
                contact_format=optd['contact_format'],
                bbcontacts_file=optd['bbcontacts_file'],
                bbcontacts_format=optd["bbcontacts_format"],
                cutoff_factor=optd['restraints_factor'],
                distance_to_neighbor=optd['distance_to_neighbour'])

            optd["contacts_dir"] = os.path.join(optd["work_dir"], "contacts")
            if not os.path.isdir(optd["contacts_dir"]):
                os.mkdir(optd["contacts_dir"])
            if con_util.require_contact_prediction:
                if con_util.found_ccmpred_contact_prediction_deps:
                    con_util.predict_contacts_from_sequence(
                        wdir=optd["contacts_dir"])
                    optd["contact_file"] = con_util.contact_file
                    optd["contact_format"] = con_util.contact_format

            if con_util.do_contact_analysis:
                plot_file = os.path.join(optd['contacts_dir'],
                                         optd['name'] + ".cm.png")
                if optd['native_pdb'] and optd['native_pdb_std']:
                    structure_file = optd['native_pdb_std']
                elif optd["native_pdb"]:
                    structure_file = optd['native_std']
                else:
                    structure_file = None
                optd['contact_map'], optd['contact_ppv'] = con_util.summarize(
                    plot_file, structure_file, 'pdb', optd['native_cutoff'])

                restraints_file = os.path.join(optd['contacts_dir'],
                                               optd['name'] + ".cst")
                optd['restraints_file'] = con_util.write_restraints(
                    restraints_file, optd['restraints_format'],
                    optd['energy_function'])
            else:
                con_util = None
        else:
            con_util = None

        if optd['make_models'] and optd['restraints_file']:
            rosetta_modeller.restraints_file = optd['restraints_file']

        if optd['make_models']:
            logger.info('----- making Rosetta models--------')
            if optd['nmr_remodel']:
                try:
                    optd['models'] = rosetta_modeller.nmr_remodel(
                        models=optd['models'],
                        ntimes=optd['nmr_process'],
                        alignment_file=optd['alignment_file'],
                        remodel_fasta=optd['nmr_remodel_fasta'],
                        monitor=monitor)
                except Exception as e:
                    msg = "Error remodelling NMR ensemble: {0}".format(e)
                    exit_util.exit_error(msg, sys.exc_info()[2])
            else:
                logger.info('making %s models...', optd['nmodels'])
                try:
                    optd['models'] = rosetta_modeller.ab_initio_model(
                        monitor=monitor)
                except Exception as e:
                    msg = "Error running ROSETTA to create models: {0}".format(
                        e)
                    exit_util.exit_error(msg, sys.exc_info()[2])
                if not pdb_edit.check_pdb_directory(optd['models_dir'],
                                                    sequence=optd['sequence']):
                    msg = "Problem with rosetta pdb files - please check the log for more information"
                    exit_util.exit_error(msg)
                logger.info('Modelling complete - models stored in: %s\n',
                            optd['models_dir'])

        elif optd['import_models']:
            logger.info('Importing models from directory: %s\n',
                        optd['models_dir'])
            if optd['homologs']:
                optd['models'] = ample_util.extract_and_validate_models(
                    optd, sequence=None, single=True, allsame=False)
            else:
                optd['models'] = ample_util.extract_and_validate_models(optd)
                # Need to check if Quark and handle things accordingly
                if optd['quark_models']:
                    # We always add sidechains to QUARK models if SCWRL is installed
                    if ample_util.is_exe(optd['scwrl_exe']):
                        optd['use_scwrl'] = True
                    else:
                        # No SCWRL so don't do owt with the side chains
                        logger.info(
                            'Using QUARK models but SCWRL is not installed '
                            'so only using %s sidechains', UNMODIFIED)
                        optd['side_chain_treatments'] = [UNMODIFIED]

        # Sub-select the decoys using contact information
        if con_util and optd['subselect_mode'] and not (optd['nmr_model_in'] or
                                                        optd['nmr_remodel']):
            logger.info('Subselecting models from directory using '
                        'provided contact information')
            subselect_data = con_util.subselect_decoys(
                optd['models'], 'pdb', mode=optd['subselect_mode'], **optd)
            optd['models'] = zip(*subselect_data)[0]
            optd['subselect_data'] = dict(subselect_data)

        ample_util.save_amoptd(optd)
Пример #9
0
    def ensembling(self, optd):
        if optd['import_ensembles']:
            ensembler.import_ensembles(optd)
        elif optd['ideal_helices']:
            ample_util.ideal_helices(optd)
            logger.info("*** Using ideal helices to solve structure ***")
        else:
            # Import the models here instead of cluster_util.
            if optd['cluster_method'] is 'import':
                # HACK - this is certainly not how we want to do it. One flag for all (-models) in future
                optd['models'] = optd['cluster_dir']
                optd['models'] = ample_util.extract_and_validate_models(optd)

            # Check we have some models to work with
            if not (optd['single_model_mode'] or optd['models']):
                ample_util.save_amoptd(optd)
                msg = "ERROR! Cannot find any pdb files in: {0}".format(
                    optd['models_dir'])
                exit_util.exit_error(msg)
            optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok')
            if optd['submit_cluster']:
                # Pickle dictionary so it can be opened by the job to get the parameters
                ample_util.save_amoptd(optd)
                script = ensembler.cluster_script(optd)
                ensembler_timeout = ensembler.get_ensembler_timeout(optd)
                workers_util.run_scripts(
                    job_scripts=[script],
                    monitor=monitor,
                    nproc=optd['nproc'],
                    job_time=ensembler_timeout,
                    job_name='ensemble',
                    submit_cluster=optd['submit_cluster'],
                    submit_qtype=optd['submit_qtype'],
                    submit_queue=optd['submit_queue'],
                    submit_pe_lsf=optd['submit_pe_lsf'],
                    submit_pe_sge=optd['submit_pe_sge'],
                    submit_array=optd['submit_array'],
                    submit_max_array=optd['submit_max_array'])
                # queue finished so unpickle results
                optd.update(ample_util.read_amoptd(optd['results_path']))
            else:
                try:
                    ensembler.create_ensembles(optd)
                except Exception as e:
                    msg = "Error creating ensembles: {0}".format(e)
                    exit_util.exit_error(msg, sys.exc_info()[2])

            # Check we have something to work with
            if not os.path.isfile(
                    optd['ensemble_ok']) or 'ensembles' not in optd.keys(
                    ) or not len(optd['ensembles']):
                msg = "Problem generating ensembles!"
                exit_util.exit_error(msg)

            if not (optd['homologs'] or optd['single_model_mode']):
                ensemble_summary = ensembler.ensemble_summary(
                    optd['ensembles_data'])
                logger.info(ensemble_summary)

        # Save the results
        ample_util.save_amoptd(optd)

        # Bail here if we didn't create anything
        if not len(optd['ensembles']):
            msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..."
            exit_util.exit_error(msg)

        # Update results view
        if self.ample_output:
            self.ample_output.display_results(optd)
        return
Пример #10
0
    else:
        return path


# Run unit tests
if __name__ == "__main__":

    # This runs the benchmarking starting from a pickled file containing an amopt dictionary.
    # - used when submitting the modelling jobs to a cluster
    if len(sys.argv) != 2 or not os.path.isfile(sys.argv[1]):
        print(
            "benchmark script requires the path to a pickled amopt dictionary!"
        )
        sys.exit(1)

    # Get the amopt dictionary
    amoptd = ample_util.read_amoptd(sys.argv[1])

    # Set up logging - could append to an existing log?
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    fl = logging.FileHandler(os.path.join(amoptd['work_dir'], "benchmark.log"))
    fl.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fl.setFormatter(formatter)
    logger.addHandler(fl)

    analyse(amoptd)
    ample_util.save_amoptd(amoptd)
Пример #11
0
if not restart and not ('models' in optd and optd['models'] and os.path.exists(optd['models'])):
    msg = 'AMPLE ensembler requires a -models argument with a file/directory of pdbs'
    exit_util.exit_error(msg, sys.exc_info()[2])

# Set up the working directory if one doesn't already exist
if not ('work_dir' in optd and optd['work_dir']):
    optd['work_dir'] = os.path.join(os.path.abspath(os.path.curdir), ENSEMBLE_DIRNAME)
if not os.path.isdir(optd['work_dir']):
    try:
        os.mkdir(optd['work_dir'])
    except OSError as e:
        msg = 'Error making ensemble workdir {0} : {1}'.format(optd['work_dir'], e)
        exit_util.exit_error(msg, sys.exc_info()[2])

assert os.path.isdir(optd['work_dir'])

# Start logging to a file
logging_util.setup_file_logging(os.path.join(optd['work_dir'], "ensemble.log"))
try:
    if not restart:
        results = process_models.extract_and_validate_models(optd)
        process_models.handle_model_import(optd, results)
        process_ensemble_options(optd)
        optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok')
        optd['results_path'] = os.path.join(optd['work_dir'], AMPLE_PKL)
    ensembler.create_ensembles(optd)
    ample_util.save_amoptd(optd)
except Exception as e:
    msg = "Error running ensembling: {0}".format(e.message)
    exit_util.exit_error(msg, sys.exc_info()[2])
Пример #12
0
    def main(self, args=None):
        """Main AMPLE routine.

        We require this as the multiprocessing module (only on **!!*%$$!! Windoze)
        requires that the main module can be imported. We there need ample to be
        a python script that can be imported, hence the main routine with its
        calling protected by the if __name__=="__main__":...

        args is an option argument that can contain the command-line arguments
        for the program - required for testing.
        """
        argso = argparse_util.process_command_line(args=args)
        # Work directory and loggers need to be setup before we do anything else
        self.setup_workdir(argso)
        global logger
        logger = logging_util.setup_logging(argso)

        # Logging and work directories in place so can start work
        self.amopt = amopt = config_util.AMPLEConfigOptions()
        amopt.populate(argso)
        amopt.d = self.setup(amopt.d)
        rosetta_modeller = options_processor.process_rosetta_options(amopt.d)
        logger.debug(
            amopt.prettify_parameters())  # Display the parameters used
        amopt.write_config_file()
        time_start = time.time()
        if self.ample_output:

            def monitor():
                return self.ample_output.display_results(amopt.d)

        else:
            monitor = None

        # Highlight deprecated command line arguments
        if amopt.d['submit_cluster']:
            message = "-%s has been deprecated and will be removed in version %s!" % (
                'submit_cluster', 1.6)
            warnings.warn(message, DeprecationWarning)
        if amopt.d["submit_pe_lsf"]:
            message = "-%s has been deprecated and will be removed in version %s! Use -submit_pe instead" % (
                'submit_pe_lsf', 1.6)
            warnings.warn(message, DeprecationWarning)
        if amopt.d["submit_pe_sge"]:
            message = "-%s has been deprecated and will be removed in version %s! Use -submit_pe instead" % (
                'submit_pe_sge', 1.6)
            warnings.warn(message, DeprecationWarning)

        # Process any files we may have been given
        model_results = process_models.extract_and_validate_models(amopt.d)
        if model_results:
            process_models.handle_model_import(amopt.d, model_results)
        if amopt.d['benchmark_mode'] and amopt.d['native_pdb']:
            # Process the native before we do anything else
            benchmark_util.analysePdb(amopt.d)

        # Create constituent models from an NMR ensemble
        if amopt.d['nmr_model_in']:
            nmr_mdir = os.path.join(amopt.d['work_dir'], 'nmr_models')
            amopt.d['modelling_workdir'] = nmr_mdir
            logger.info(
                'Splitting NMR ensemble into constituent models in directory: {0}'
                .format(nmr_mdir))
            amopt.d['processed_models'] = pdb_edit.split_pdb(
                amopt.d['nmr_model_in'],
                directory=nmr_mdir,
                strip_hetatm=True,
                same_size=True)
            logger.info('NMR ensemble contained {0} models'.format(
                len(amopt.d['processed_models'])))

        # Modelling business happens here
        if self.modelling_required(amopt.d):
            self.modelling(amopt.d, rosetta_modeller)
            ample_util.save_amoptd(amopt.d)
            amopt.write_config_file()

        # Ensembling business next
        if amopt.d['make_ensembles']:
            self.ensembling(amopt.d)
            amopt.write_config_file()

        # Some MR here
        if amopt.d['do_mr']:
            self.molecular_replacement(amopt.d)
            amopt.write_config_file()

        # Timing data
        time_stop = time.time()
        elapsed_time = time_stop - time_start
        run_in_min = elapsed_time / 60
        run_in_hours = run_in_min / 60
        msg = os.linesep + 'All processing completed  (in {0:6.2F} hours)'.format(
            run_in_hours) + os.linesep
        msg += '----------------------------------------' + os.linesep
        logging.info(msg)

        # Benchmark mode
        if amopt.d['benchmark_mode']:
            self.benchmarking(amopt.d)
            amopt.write_config_file()

        amopt.write_config_file()
        # Flag to show that we reached the end without error - useful for integration testing
        amopt.d['AMPLE_finished'] = True
        ample_util.save_amoptd(amopt.d)

        logger.info("AMPLE finished at: %s",
                    time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
        ref_mgr = reference_manager.ReferenceManager(amopt.d)
        ref_mgr.save_citations_to_file(amopt.d)
        logger.info(ref_mgr.citations_as_text)
        logger.info(reference_manager.footer)

        # Finally update pyrvapi results
        if self.ample_output:
            self.ample_output.display_results(amopt.d)
            self.ample_output.rvapi_shutdown(amopt.d)

        self.cleanup(amopt.d)
        return
Пример #13
0
    def ensembling(self, optd):
        if optd['import_ensembles']:
            ensembler.import_ensembles(optd)
        elif optd['ideal_helices']:
            ample_util.ideal_helices(optd)
            logger.info(
                "*** Attempting to solve the structure using ideal helices ***"
            )
            logger.warning(
                'If ideal helices do not solve the structure, you may want to use -helical_ensembles in '
                'place of -ideal_helices. AMPLE will then use a new set of helical ensembles which has been '
                'very successful on solving challenging cases!')
        elif optd['helical_ensembles']:
            ample_util.ideal_helices(optd)
            logger.info(
                "*** Attempting to solve the structure using %s set of helical ensembles ***"
                % optd['helical_ensembles_set'])
        else:
            # Check we have some models to work with
            if not (optd['single_model_mode'] or optd['processed_models']):
                ample_util.save_amoptd(optd)
                msg = "ERROR! Cannot find any pdb files in: {0}".format(
                    optd['models_dir'])
                exit_util.exit_error(msg)
            optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok')
            if optd['submit_qtype'] != 'local':
                # Pickle dictionary so it can be opened by the job to get the parameters
                ample_util.save_amoptd(optd)
                script = ensembler.cluster_script(optd)
                ensembler_timeout = ensembler.get_ensembler_timeout(optd)
                with TaskFactory(
                        optd['submit_qtype'],
                        script,
                        cwd=optd['work_dir'],
                        environment=optd['submit_pe'],
                        run_time=ensembler_timeout,
                        name='benchmark',
                        nprocesses=optd['nproc'],
                        max_array_size=optd['submit_max_array'],
                        queue=optd['submit_queue'],
                        shell="/bin/bash",
                ) as task:
                    task.run()
                    task.wait(interval=5, monitor_f=monitor)
                # queue finished so unpickle results
                optd.update(ample_util.read_amoptd(optd['results_path']))
            else:
                try:
                    ensembler.create_ensembles(optd)
                except Exception as e:
                    msg = "Error creating ensembles: {0}".format(e)
                    exit_util.exit_error(msg, sys.exc_info()[2])

            # Check we have something to work with
            if not os.path.isfile(
                    optd['ensemble_ok']) or 'ensembles' not in optd.keys(
                    ) or not len(optd['ensembles']):
                msg = "Problem generating ensembles!"
                exit_util.exit_error(msg)

            if not (optd['homologs'] or optd['single_model_mode']):
                ensemble_summary = ensembler.ensemble_summary(
                    optd['ensembles_data'])
                logger.info(ensemble_summary)

        # Save the results
        ample_util.save_amoptd(optd)

        # Bail here if we didn't create anything
        if not len(optd['ensembles']):
            msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..."
            exit_util.exit_error(msg)

        # Update results view
        if self.ample_output:
            self.ample_output.display_results(optd)
        return
Пример #14
0
    def main(self, args=None):
        """Main AMPLE routine.

        We require this as the multiprocessing module (only on **!!*%$$!! Windoze)
        requires that the main module can be imported. We there need ample to be
        a python script that can be imported, hence the main routine with its
        calling protected by the if __name__=="__main__":...

        args is an option argument that can contain the command-line arguments
        for the program - required for testing.
        """
        argso = argparse_util.process_command_line(args=args)
        # SWork directory and loggers need to be setup before we do anything else
        self.setup_workdir(argso)
        global logger
        logger = logging_util.setup_logging(argso)
        
        # Logging and work directories in place so can start work
        self.amopt = amopt = config_util.AMPLEConfigOptions()
        amopt.populate(argso)
        amopt.d = self.setup(amopt.d)
        rosetta_modeller = options_processor.process_rosetta_options(amopt.d)

        # Display the parameters used
        logger.debug(amopt.prettify_parameters())

        amopt.write_config_file()
        #######################################################
        # SCRIPT PROPER STARTS HERE
        time_start = time.time()

        # Create function for monitoring jobs - static function decorator?
        if self.ample_output:
            def monitor():
                return self.ample_output.display_results(amopt.d)
        else:
            monitor = None
            
        # Process any files we may have been given
        model_results = process_models.extract_and_validate_models(amopt.d)
        if model_results:
            process_models.handle_model_import(amopt.d, model_results)
        
        if amopt.d['benchmark_mode'] and amopt.d['native_pdb']:
            # Process the native before we do anything else
            benchmark_util.analysePdb(amopt.d)

        # Create constituent models from an NMR ensemble
        if amopt.d['nmr_model_in']:
            nmr_mdir = os.path.join(amopt.d['work_dir'], 'nmr_models')
            amopt.d['modelling_workdir'] = nmr_mdir
            logger.info('Splitting NMR ensemble into constituent models in directory: {0}'.format(nmr_mdir))
            amopt.d['processed_models'] = pdb_edit.split_pdb(
                amopt.d['nmr_model_in'], directory=nmr_mdir, strip_hetatm=True, same_size=True)
            logger.info('NMR ensemble contained {0} models'.format(len(amopt.d['processed_models'])))

        # Modelling business happens here
        if self.modelling_required(amopt.d):
            self.modelling(amopt.d, rosetta_modeller)
            ample_util.save_amoptd(amopt.d)
            amopt.write_config_file()

        # Ensembling business next
        if amopt.d['make_ensembles']:
            self.ensembling(amopt.d)
            amopt.write_config_file()

        # Some MR here
        if amopt.d['do_mr']:
            self.molecular_replacement(amopt.d)
            amopt.write_config_file()

        # Timing data
        time_stop = time.time()
        elapsed_time = time_stop - time_start
        run_in_min = elapsed_time / 60
        run_in_hours = run_in_min / 60
        msg = os.linesep + \
            'All processing completed  (in {0:6.2F} hours)'.format(
                run_in_hours) + os.linesep
        msg += '----------------------------------------' + os.linesep
        logging.info(msg)

        # Benchmark mode
        if amopt.d['benchmark_mode']:
            self.benchmarking(amopt.d)
            amopt.write_config_file()

        amopt.write_config_file()
        # Flag to show that we reached the end without error - useful for integration testing
        amopt.d['AMPLE_finished'] = True
        ample_util.save_amoptd(amopt.d)

        logger.info("AMPLE finished at: %s", time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
        ref_mgr = reference_manager.ReferenceManager(amopt.d)
        ref_mgr.save_citations_to_file(amopt.d)
        logger.info(ref_mgr.citations_as_text)
        logger.info(reference_manager.footer)

        # Finally update pyrvapi results
        if self.ample_output:
            self.ample_output.display_results(amopt.d)
            self.ample_output.rvapi_shutdown(amopt.d)
        
        self.cleanup(amopt.d)
        return
Пример #15
0
    def molecular_replacement(self, optd):
        mrbump_util.set_success_criteria(optd)
        if not optd['mrbump_scripts']:
            # MRBUMP analysis of the ensembles
            logger.info('----- Running MRBUMP on ensembles--------\n\n')
            if len(optd['ensembles']) < 1:
                msg = "ERROR! Cannot run MRBUMP as there are no ensembles!"
                exit_util.exit_error(msg)

            if optd['mrbump_dir'] is None:
                bump_dir = os.path.join(optd['work_dir'], 'MRBUMP')
                optd['mrbump_dir'] = bump_dir
            else:
                bump_dir = optd['mrbump_dir']
            if not os.path.exists(bump_dir):
                os.mkdir(bump_dir)

            optd['mrbump_results'] = []
            logger.info("Running MRBUMP jobs in directory: %s", bump_dir)

            # Set an ensemble-specific phaser_rms if required
            if optd['phaser_rms'] == 'auto':
                ensembler.set_phaser_rms_from_subcluster_score(optd)

            # Sort the ensembles in a favourable way
            logger.info("Sorting ensembles")
            sort_keys = ['cluster_num', 'truncation_level', 'subcluster_radius_threshold', 'side_chain_treatment']
            ensemble_pdbs_sorted = ensembler.sort_ensembles(
                optd['ensembles'], optd['ensembles_data'], keys=sort_keys, prioritise=True)

            # Create job scripts
            logger.info("Generating MRBUMP runscripts")
            optd['mrbump_scripts'] = mrbump_util.write_mrbump_files(
                ensemble_pdbs_sorted,
                optd,
                job_time=mrbump_util.MRBUMP_RUNTIME,
                ensemble_options=optd['ensemble_options'],
                directory=bump_dir)

        # Create function for monitoring jobs - static function decorator?
        if self.ample_output:
            def monitor():
                r = mrbump_util.ResultsSummary()
                r.extractResults(optd['mrbump_dir'], purge=bool(optd['purge']))
                optd['mrbump_results'] = r.results
                return self.ample_output.display_results(optd)
        else:
            monitor = None

        # Save results here so that we have the list of scripts and mrbump directory set
        ample_util.save_amoptd(optd)

        # Change to mrbump directory before running
        os.chdir(optd['mrbump_dir'])
        ok = workers_util.run_scripts(
            job_scripts=optd['mrbump_scripts'],
            monitor=monitor,
            check_success=mrbump_util.checkSuccess,
            early_terminate=optd['early_terminate'],
            nproc=optd['nproc'],
            job_time=mrbump_util.MRBUMP_RUNTIME,
            job_name='mrbump',
            submit_cluster=optd['submit_cluster'],
            submit_qtype=optd['submit_qtype'],
            submit_queue=optd['submit_queue'],
            submit_pe_lsf=optd['submit_pe_lsf'],
            submit_pe_sge=optd['submit_pe_sge'],
            submit_array=optd['submit_array'],
            submit_max_array=optd['submit_max_array'])

        if not ok:
            msg = "An error code was returned after running MRBUMP on the ensembles!\n" + \
                  "For further information check the logs in directory: {0}".format(optd['mrbump_dir'])
            logger.critical(msg)

        # Collect the MRBUMP results
        results_summary = mrbump_util.ResultsSummary()
        optd['mrbump_results'] = results_summary.extractResults(optd['mrbump_dir'], purge=bool(optd['purge']))
        optd['success'] = results_summary.success
        ample_util.save_amoptd(optd)
        summary = mrbump_util.finalSummary(optd)
        logger.info(summary)
Пример #16
0
    def ensembling(self, optd):
        if optd['import_ensembles']:
            ensembler.import_ensembles(optd)
        elif optd['ideal_helices']:
            ample_util.ideal_helices(optd)
            logger.info("*** Using ideal helices to solve structure ***")
        else:
            # Check we have some models to work with
            if not (optd['single_model_mode'] or optd['processed_models']):
                ample_util.save_amoptd(optd)
                msg = "ERROR! Cannot find any pdb files in: {0}".format(optd['models_dir'])
                exit_util.exit_error(msg)
            optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok')
            if optd['submit_cluster']:
                # Pickle dictionary so it can be opened by the job to get the parameters
                ample_util.save_amoptd(optd)
                script = ensembler.cluster_script(optd)
                ensembler_timeout = ensembler.get_ensembler_timeout(optd)
                workers_util.run_scripts(
                    job_scripts=[script],
                    monitor=monitor,
                    nproc=optd['nproc'],
                    job_time=ensembler_timeout,
                    job_name='ensemble',
                    submit_cluster=optd['submit_cluster'],
                    submit_qtype=optd['submit_qtype'],
                    submit_queue=optd['submit_queue'],
                    submit_pe_lsf=optd['submit_pe_lsf'],
                    submit_pe_sge=optd['submit_pe_sge'],
                    submit_array=optd['submit_array'],
                    submit_max_array=optd['submit_max_array'])
                # queue finished so unpickle results
                optd.update(ample_util.read_amoptd(optd['results_path']))
            else:
                try:
                    ensembler.create_ensembles(optd)
                except Exception as e:
                    msg = "Error creating ensembles: {0}".format(e)
                    exit_util.exit_error(msg, sys.exc_info()[2])

            # Check we have something to work with
            if not os.path.isfile(optd['ensemble_ok']) or 'ensembles' not in optd.keys() or not len(optd['ensembles']):
                msg = "Problem generating ensembles!"
                exit_util.exit_error(msg)

            if not (optd['homologs'] or optd['single_model_mode']):
                ensemble_summary = ensembler.ensemble_summary(optd['ensembles_data'])
                logger.info(ensemble_summary)

        # Save the results
        ample_util.save_amoptd(optd)

        # Bail here if we didn't create anything
        if not len(optd['ensembles']):
            msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..."
            exit_util.exit_error(msg)

        # Update results view
        if self.ample_output:
            self.ample_output.display_results(optd)
        return