def __init__( self, workflow, dag, cores, local_cores=1, dryrun=False, touch=False, cluster=None, cluster_status=None, cluster_config=None, cluster_sync=None, drmaa=None, drmaa_log_dir=None, kubernetes=None, container_image=None, tibanna=None, tibanna_sfn=None, precommand="", tibanna_config=False, jobname=None, quiet=False, printreason=False, printshellcmds=False, keepgoing=False, max_jobs_per_second=None, max_status_checks_per_second=100, latency_wait=3, greediness=1.0, force_use_threads=False, assume_shared_fs=True, keepincomplete=False, ): """ Create a new instance of KnapsackJobScheduler. """ from ratelimiter import RateLimiter self.cluster = cluster self.cluster_config = cluster_config self.cluster_sync = cluster_sync self.dag = dag self.workflow = workflow self.dryrun = dryrun self.touch = touch self.quiet = quiet self.keepgoing = keepgoing self.running = set() self.failed = set() self.finished_jobs = 0 self.greediness = 1 self.max_jobs_per_second = max_jobs_per_second self.keepincomplete = keepincomplete self.global_resources = { name: (sys.maxsize if res is None else res) for name, res in workflow.global_resources.items() } self.resources = dict(self.global_resources) use_threads = (force_use_threads or (os.name != "posix") or cluster or cluster_sync or drmaa) self._open_jobs = threading.Semaphore(0) self._lock = threading.Lock() self._errors = False self._finished = False self._job_queue = None self._submit_callback = self._noop self._finish_callback = partial( self._proceed, update_dynamic=not self.dryrun, print_progress=not self.quiet and not self.dryrun, ) self._local_executor = None if dryrun: self._executor = DryrunExecutor( workflow, dag, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, ) elif touch: self._executor = TouchExecutor( workflow, dag, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, ) elif cluster or cluster_sync or (drmaa is not None): if not workflow.immediate_submit: # No local jobs when using immediate submit! # Otherwise, they will fail due to missing input self._local_executor = CPUExecutor( workflow, dag, local_cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cores=local_cores, keepincomplete=keepincomplete, ) if cluster or cluster_sync: if cluster_sync: constructor = SynchronousClusterExecutor else: constructor = partial( GenericClusterExecutor, statuscmd=cluster_status, max_status_checks_per_second= max_status_checks_per_second, ) self._executor = constructor( workflow, dag, None, submitcmd=(cluster or cluster_sync), cluster_config=cluster_config, jobname=jobname, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, assume_shared_fs=assume_shared_fs, keepincomplete=keepincomplete, ) if workflow.immediate_submit: self._submit_callback = partial( self._proceed, update_dynamic=False, print_progress=False, update_resources=False, handle_job_success=False, ) else: self._executor = DRMAAExecutor( workflow, dag, None, drmaa_args=drmaa, drmaa_log_dir=drmaa_log_dir, jobname=jobname, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cluster_config=cluster_config, assume_shared_fs=assume_shared_fs, max_status_checks_per_second=max_status_checks_per_second, keepincomplete=keepincomplete, ) elif kubernetes: self._local_executor = CPUExecutor( workflow, dag, local_cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cores=local_cores, keepincomplete=keepincomplete, ) self._executor = KubernetesExecutor( workflow, dag, kubernetes, container_image=container_image, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cluster_config=cluster_config, keepincomplete=keepincomplete, ) elif tibanna: self._local_executor = CPUExecutor( workflow, dag, local_cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, use_threads=use_threads, latency_wait=latency_wait, cores=local_cores, keepincomplete=keepincomplete, ) self._executor = TibannaExecutor( workflow, dag, cores, tibanna_sfn, precommand=precommand, tibanna_config=tibanna_config, container_image=container_image, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, keepincomplete=keepincomplete, ) else: self._executor = CPUExecutor( workflow, dag, cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, use_threads=use_threads, latency_wait=latency_wait, cores=cores, keepincomplete=keepincomplete, ) if self.max_jobs_per_second and not self.dryrun: max_jobs_frac = Fraction( self.max_jobs_per_second).limit_denominator() self.rate_limiter = RateLimiter(max_calls=max_jobs_frac.numerator, period=max_jobs_frac.denominator) else: # essentially no rate limit self.rate_limiter = DummyRateLimiter() self._user_kill = None signal.signal(signal.SIGTERM, self.exit_gracefully) self._open_jobs.release()
def __init__( self, workflow, dag, local_cores=1, dryrun=False, touch=False, cluster=None, cluster_status=None, cluster_config=None, cluster_sync=None, drmaa=None, drmaa_log_dir=None, kubernetes=None, container_image=None, tibanna=None, tibanna_sfn=None, google_lifesciences=None, google_lifesciences_regions=None, google_lifesciences_location=None, google_lifesciences_cache=False, tes=None, precommand="", preemption_default=None, preemptible_rules=None, tibanna_config=False, jobname=None, quiet=False, printreason=False, printshellcmds=False, keepgoing=False, max_jobs_per_second=None, max_status_checks_per_second=100, latency_wait=3, greediness=1.0, force_use_threads=False, assume_shared_fs=True, keepincomplete=False, keepmetadata=True, scheduler_type=None, scheduler_ilp_solver=None, ): """Create a new instance of KnapsackJobScheduler.""" from ratelimiter import RateLimiter cores = workflow.global_resources["_cores"] self.cluster = cluster self.cluster_config = cluster_config self.cluster_sync = cluster_sync self.dag = dag self.workflow = workflow self.dryrun = dryrun self.touch = touch self.quiet = quiet self.keepgoing = keepgoing self.running = set() self.failed = set() self.finished_jobs = 0 self.greediness = 1 self.max_jobs_per_second = max_jobs_per_second self.keepincomplete = keepincomplete self.keepmetadata = keepmetadata self.scheduler_type = scheduler_type self.scheduler_ilp_solver = scheduler_ilp_solver self._tofinish = [] self.global_resources = { name: (sys.maxsize if res is None else res) for name, res in workflow.global_resources.items() } if workflow.global_resources["_nodes"] is not None: # Do not restrict cores locally if nodes are used (i.e. in case of cluster/cloud submission). self.global_resources["_cores"] = sys.maxsize self.resources = dict(self.global_resources) use_threads = ( force_use_threads or (os.name != "posix") or cluster or cluster_sync or drmaa ) self._open_jobs = threading.Semaphore(0) self._lock = threading.Lock() self._errors = False self._finished = False self._job_queue = None self._submit_callback = self._noop self._finish_callback = partial( self._proceed, update_dynamic=not self.dryrun, print_progress=not self.quiet and not self.dryrun, ) self._local_executor = None if dryrun: self._executor = DryrunExecutor( workflow, dag, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, ) elif touch: self._executor = TouchExecutor( workflow, dag, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, ) elif cluster or cluster_sync or (drmaa is not None): if not workflow.immediate_submit: # No local jobs when using immediate submit! # Otherwise, they will fail due to missing input self._local_executor = CPUExecutor( workflow, dag, local_cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cores=local_cores, keepincomplete=keepincomplete, keepmetadata=keepmetadata, ) if cluster or cluster_sync: if cluster_sync: constructor = SynchronousClusterExecutor else: constructor = partial( GenericClusterExecutor, statuscmd=cluster_status, max_status_checks_per_second=max_status_checks_per_second, ) self._executor = constructor( workflow, dag, None, submitcmd=(cluster or cluster_sync), cluster_config=cluster_config, jobname=jobname, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, assume_shared_fs=assume_shared_fs, keepincomplete=keepincomplete, keepmetadata=keepmetadata, ) if workflow.immediate_submit: self._submit_callback = partial( self._proceed, update_dynamic=False, print_progress=False, update_resources=False, handle_job_success=False, ) else: self._executor = DRMAAExecutor( workflow, dag, None, drmaa_args=drmaa, drmaa_log_dir=drmaa_log_dir, jobname=jobname, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cluster_config=cluster_config, assume_shared_fs=assume_shared_fs, max_status_checks_per_second=max_status_checks_per_second, keepincomplete=keepincomplete, keepmetadata=keepmetadata, ) elif kubernetes: self._local_executor = CPUExecutor( workflow, dag, local_cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cores=local_cores, keepincomplete=keepincomplete, keepmetadata=keepmetadata, ) self._executor = KubernetesExecutor( workflow, dag, kubernetes, container_image=container_image, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cluster_config=cluster_config, keepincomplete=keepincomplete, keepmetadata=keepmetadata, ) elif tibanna: self._local_executor = CPUExecutor( workflow, dag, local_cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, use_threads=use_threads, latency_wait=latency_wait, cores=local_cores, keepincomplete=keepincomplete, keepmetadata=keepmetadata, ) self._executor = TibannaExecutor( workflow, dag, cores, tibanna_sfn, precommand=precommand, tibanna_config=tibanna_config, container_image=container_image, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, keepincomplete=keepincomplete, keepmetadata=keepmetadata, ) elif google_lifesciences: self._local_executor = CPUExecutor( workflow, dag, local_cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cores=local_cores, ) self._executor = GoogleLifeSciencesExecutor( workflow, dag, cores, container_image=container_image, regions=google_lifesciences_regions, location=google_lifesciences_location, cache=google_lifesciences_cache, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, preemption_default=preemption_default, preemptible_rules=preemptible_rules, ) elif tes: self._local_executor = CPUExecutor( workflow, dag, local_cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cores=local_cores, keepincomplete=keepincomplete, ) self._executor = TaskExecutionServiceExecutor( workflow, dag, cores=local_cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, tes_url=tes, container_image=container_image, ) else: self._executor = CPUExecutor( workflow, dag, cores, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, use_threads=use_threads, latency_wait=latency_wait, cores=cores, keepincomplete=keepincomplete, keepmetadata=keepmetadata, ) if self.max_jobs_per_second and not self.dryrun: max_jobs_frac = Fraction(self.max_jobs_per_second).limit_denominator() self.rate_limiter = RateLimiter( max_calls=max_jobs_frac.numerator, period=max_jobs_frac.denominator ) else: # essentially no rate limit self.rate_limiter = DummyRateLimiter() # Choose job selector (greedy or ILP) self.job_selector = self.job_selector_greedy if scheduler_type == "ilp": import pulp if pulp.apis.LpSolverDefault is None: logger.warning( "Falling back to greedy scheduler because no default " "solver is found for pulp (you have to install either " "coincbc or glpk)." ) else: self.job_selector = self.job_selector_ilp self._user_kill = None try: signal.signal(signal.SIGTERM, self.exit_gracefully) except ValueError: # If this fails, it is due to scheduler not being invoked in the main thread. # This can only happen with --gui, in which case it is fine for now. pass self._open_jobs.release()