Python JobRunner示例，bcftbx.JobRunner Python示例

示例#1

0

显示文件

文件： run_qc_pipeline.py 项目： golharam/genomics

        if dirn not in data_dirs:
            data_dirs.append(dirn)
        else:
            logging.warning("Directory '%s' already specified" % dirn)

    # Set logging format and level
    if arguments.debug:
        logging_level = logging.DEBUG
    else:
        logging_level = logging.INFO
    logging.basicConfig(format='%(levelname)8s %(message)s')
    logging.getLogger().setLevel(logging_level)

    # Set up job runner
    if arguments.runner == 'simple':
        runner = JobRunner.SimpleJobRunner(join_logs=True)
    elif arguments.runner == 'ge':
        if arguments.ge_args:
            ge_extra_args = str(arguments.ge_args).split(' ')
        else:
            ge_extra_args = None
        runner = JobRunner.GEJobRunner(queue=arguments.ge_queue,
                                       ge_extra_args=ge_extra_args)
    else:
        logging.error("Unknown job runner: '%s'" % arguments.runner)
        sys.exit(1)
    runner.set_log_dir(arguments.log_dir)

    # Set up and run pipeline
    pipeline = Pipeline.PipelineRunner(runner,
                                       max_concurrent_jobs=\

示例#2

0

显示文件

    # Report
    if dry_run:
        print "%d experiments defined:" % len(expts)
        for expt in expts:
            print "\tName   : %s" % expt.name
            print "\tType   : %s" % expt.type
            print "\tSample : %s" % expt.sample
            print "\tLibrary: %s" % expt.library
            print "\tOptions: %s" % expt.describe()
            print ""

    # Check we have run data
    if not len(expts.solid_runs):
        logging.error("No run data found!")
        sys.exit(1)

    # Build the analysis directory structure
    expts.buildAnalysisDirs(top_dir=top_dir,
                            dry_run=dry_run,
                            link_type=link_type,
                            naming_scheme=link_naming_scheme)

    # Run the pipeline script
    if pipeline_script:
        runner = JobRunner.GEJobRunner()
        pipeline = Pipeline.SolidPipelineRunner(runner, pipeline_script)
        for expt in expts:
            pipeline.addDir(os.path.abspath(expt.dirname(top_dir)))
        pipeline.run()
        print "%s" % pipeline.report()

示例#3

0

显示文件

文件： simple_scheduler.py 项目： nandr0id/auto_process_ngs

    def __init__(self,
                 runner=None,
                 reporter=None,
                 max_concurrent=None,
                 poll_interval=5,
                 job_interval=0.1,
                 max_restarts=1):
        """Create a new SimpleScheduler instance

        Arguments:
          runner: optional, default job runner to use
          reporter: optional, custom SchedulerReporter instance to use
            for messaging when jobs, groups etc are started and finished
          max_concurrent: optional, maximum number of concurrent
            processes the scheduler will run (default: no limit)
          poll_interval: optional, number of seconds to wait in
            between checking for completed jobs etc in the scheduler
            loop (default: 5 seconds)
          job_interval: optional, number of seconds to wait before
            submitting a job (default: 0.1 seconds)
          max_restarts: optional, if non-zero then attempt to restart
            jobs that are in an error state up to this many times. Set to
            zero to turn off restarting jobs in error states (they will
            be terminated instead). (default: 1)

        """

        threading.Thread.__init__(self)
        self.setDaemon(1)
        # Default job runner
        if runner is None:
            runner = JobRunner.SimpleJobRunner()
        self.__runner = runner
        # Maximum number of concurrent jobs
        self.__max_concurrent = max_concurrent
        # Length of time to wait between checking jobs
        self.__poll_interval = poll_interval
        # Length of time to wait before submitting job
        self.__job_interval = job_interval
        # Number of attempts to restart errored jobs
        self.__max_restarts = max_restarts
        # Internal job id counter
        self.__job_count = 0
        # Queue to add jobs
        self.__submitted = Queue.Queue()
        # List of scheduled (i.e.waiting) jobs
        self.__scheduled = []
        # List of running jobs
        self.__running = []
        # Dictionary with all jobs
        self.__jobs = dict()
        # Handle names
        self.__names = []
        self.__finished_names = []
        # Handle groups
        self.__active_groups = []
        self.__groups = dict()
        # Handle callbacks
        self.__callbacks = []
        # Error checking for jobs
        self.__enable_error_check = True
        self.__last_error_check_time = 0.0
        self.__error_check_interval = 60.0
        # Flag controlling whether scheduler is active
        self.__active = False
        # Default reporter
        if reporter is None:
            reporter = default_scheduler_reporter()
        self.__reporter = reporter

示例#4

0

显示文件

文件： simple_scheduler.py 项目： nandr0id/auto_process_ngs

class SchedulerTimeout(SchedulerException):
    """Timeout limit exceeded"""


#######################################################################
# Main program (example)
#######################################################################

from applications import Command
if __name__ == "__main__":
    # Examples
    ##logging.getLogger().setLevel(logging.DEBUG)
    #
    # Set up and start the scheduler
    sched = SimpleScheduler(max_concurrent=4,
                            runner=JobRunner.SimpleJobRunner(join_logs=True))
    sched.start()
    #
    # Add some jobs to run but don't wait
    # for them to complete
    sched.submit(Command('sh', '-c', 'echo $HOSTNAME'))
    #
    # Run a job where we do want to wait for
    # completion
    sched.submit(Command('sleep', '50'), name="sleeper_50")
    sched.submit(Command('du', '-sh', '..')).wait()
    sched.submit(Command('sh', '-c', 'echo Sleeper 50 finished'),
                 wait_for=('sleeper_50', ))
    sched.submit(Command('sleep', '10')).wait()
    # More jobs
    sched.submit(Command('ps', 'faux'))

示例#5

0

显示文件

    def __init__(self,
                 runner=None,
                 reporter=None,
                 max_concurrent=None,
                 max_slots=None,
                 poll_interval=5,
                 job_interval=0.1,
                 max_restarts=1,
                 submission_mode=SubmissionMode.RANDOM):
        """Create a new SimpleScheduler instance

        Arguments:
          runner: optional, default job runner to use
          reporter: optional, custom SchedulerReporter instance to use
            for messaging when jobs, groups etc are started and finished
          max_concurrent: optional, maximum number of concurrent
            processes the scheduler will run (default: no limit)
          max_slots: optional, if set then specifies the maximum number
            of slots (aka cores/processors/threads) available to the
            scheduler (default: no limit)
          poll_interval: optional, number of seconds to wait in
            between checking for completed jobs etc in the scheduler
            loop (default: 5 seconds)
          job_interval: optional, number of seconds to wait before
            submitting a job (default: 0.1 seconds)
          max_restarts: optional, if non-zero then attempt to restart
            jobs that are in an error state up to this many times. Set to
            zero to turn off restarting jobs in error states (they will
            be terminated instead). (default: 1)
          submission_mode: flag indicating how the scheduler will
            submit jobs (default is to submit in random order)

        """

        threading.Thread.__init__(self)
        self.setDaemon(1)
        # Default job runner
        if runner is None:
            runner = JobRunner.SimpleJobRunner()
        self.__runner = runner
        # Maximum number of concurrent jobs
        self.__max_concurrent = \
            (max_concurrent if max_concurrent != 0 else None)
        # Maximum number of available slots across
        # all runners
        self.__max_slots = (max_slots if max_slots != 0 else None)
        # Length of time to wait between checking jobs
        self.__poll_interval = poll_interval
        # Length of time to wait before submitting job
        self.__job_interval = job_interval
        # Number of attempts to restart errored jobs
        self.__max_restarts = max_restarts
        # Internal job id counter
        self.__job_count = 0
        # Queue to add jobs
        self.__submitted = queue.Queue()
        # List of scheduled (i.e.waiting) jobs
        self.__scheduled = []
        # List of running jobs
        self.__running = []
        # Dictionary with all jobs
        self.__jobs = dict()
        # Job submission mode
        self.__submission_mode = submission_mode
        # Handle names
        self.__names = []
        self.__finished_names = []
        # Handle groups
        self.__active_groups = []
        self.__groups = dict()
        # Handle callbacks
        self.__callbacks = []
        # Error checking for jobs
        self.__enable_error_check = True
        self.__last_error_check_time = 0.0
        self.__error_check_interval = 60.0
        # Flag controlling whether scheduler is active
        self.__active = False
        # Default reporter
        if reporter is None:
            reporter = default_scheduler_reporter()
        self.__reporter = reporter
        # Stop scheduler on exit
        atexit.register(cleanup_atexit,self)

示例#6

0

显示文件

文件： run_qc_pipeline.py 项目： mmoisse/genomics

            if dirn not in data_dirs:
                data_dirs.append(dirn)
            else:
                logging.warning("Directory '%s' already specified" % dirn)

    # Set logging format and level
    if options.debug:
        logging_level = logging.DEBUG
    else:
        logging_level = logging.INFO
    logging.basicConfig(format='%(levelname)8s %(message)s')
    logging.getLogger().setLevel(logging_level)

    # Set up job runner
    if options.runner == 'simple':
        runner = JobRunner.SimpleJobRunner(join_logs=True)
    elif options.runner == 'ge':
        if options.ge_args:
            ge_extra_args = str(options.ge_args).split(' ')
        else:
            ge_extra_args = None
        runner = JobRunner.GEJobRunner(queue=options.ge_queue,
                                       ge_extra_args=ge_extra_args)
    elif options.runner == 'drmaa':
        runner = JobRunner.DRMAAJobRunner(queue=options.ge_queue)
    else:
        logging.error("Unknown job runner: '%s'" % options.runner)
        sys.exit(1)
    runner.set_log_dir(options.log_dir)

    # Set up and run pipeline