def start_execution(self, inline=True): """ Begin executing jobs """ LOG.info("Starting execution") self.running = True self.start_time = arrow.now() self.executor.initialize() # Seed initial jobs work_queue = self._work_queue next_jobs = self.get_jobs_to_run() map(self.add_to_work_queue, next_jobs) # Start completed jobs consumer if not inline executor = None if not inline: executor = builder.futures.ThreadPoolExecutor(max_workers=3) executor.submit(self._consume_completed_jobs, block=True) executor.submit(self._check_for_timeouts) executor.submit(self._check_for_passed_curfews) jobs_executed = 0 ONEYEAR = 365 * 24 * 60 * 60 tick = 0 while (not work_queue.empty() or not inline) and self.running: PROCESSING_LOG.debug("EXECUTION_LOOP => Getting job from the work queue. Tick {}".format(tick)) tick += 1 try: job_id = work_queue.get(True, timeout=1) except Queue.Empty: continue self.last_job_worked_on = arrow.now() TRANSITION_LOG.debug("EXECUTION_LOOP => Got job {} from work queue".format(job_id)) result = self.execute(job_id) #LOG.debug("EXECUTION_LOOP => Finished job {} from work queue".format(job_id)) jobs_executed += 1 if not isinstance(result, concurrent.futures.Future) and inline: if result.is_async(): raise NotImplementedError("Cannot run an async executor inline") self._consume_completed_jobs(block=False) elif inline: TRANSITION_LOG.debug("EXECUTION_LOOP => Waiting on execution to complete") result.result() # Wait for job to complete self._consume_completed_jobs(block=False) TRANSITION_LOG.debug("EXECUTION_LOOP => Finished consuming completed jobs for {}".format(job_id)) #else: It is an asynchronous result and we're running asynchronously, so let the _consume_completed_jobs # thread add new jobs LOG.debug("EXECUTION_LOOP => Executed {} jobs".format(jobs_executed)) LOG.debug("EXECUTION_LOOP[work_queue] => Execution is exiting") if executor is not None: executor.shutdown(wait=True)
def _consume_completed_jobs(self, block=False): LOG.info("COMPLETION_LOOP => Consuming completed jobs") complete_queue = self._complete_queue tick = 0 while (not complete_queue.empty() or block) and self.running: PROCESSING_LOG.debug("COMPLETION_LOOP => Getting job from the work queue. Tick {}".format(tick)) tick += 1 try: job_id = complete_queue.get(True, timeout=1) except Queue.Empty: continue self.last_job_completed_on = arrow.now() self.completed_jobs += 1 try: job = self.build.get_job(job_id) del self.execution_times[job] except KeyError: pass TRANSITION_LOG.debug("COMPLETION_LOOP => Completed job {}".format(job_id)) next_jobs = self.get_next_jobs_to_run(job_id) next_jobs = filter(lambda job_id: not self.build.get_job(job_id).is_running, next_jobs) TRANSITION_LOG.debug("COMPLETION_LOOP => Received completed job {}. Next jobs are {}".format(job_id, next_jobs)) map(self.add_to_work_queue, next_jobs) LOG.debug("COMPLETION_LOOP => Done consuming completed jobs")
def finish_job(self, job, result, update_job_cache=True): LOG.info("Job {} complete. Status: {}".format(job.get_id(), result.status)) LOG.debug("{}(stdout): {}".format(job.get_id(), result.stdout)) LOG.debug("{}(stderr): {}".format(job.get_id(), result.stderr)) # Mark this job as finished running job.last_run = arrow.now() job.retries += 1 job.is_running = False job.force = False if update_job_cache: target_ids = self.get_build_graph().get_target_ids(job.get_id()) self._execution_manager.update_targets(target_ids) job_id = job.unique_id self.get_execution_manager().update_parents_should_run(job_id) # update all of it's dependents for target_id in target_ids: dependent_ids = self.get_build_graph().get_dependent_ids(target_id) for dependent_id in dependent_ids: dependent = self.get_build_graph().get_job(dependent_id) dependent.invalidate() # check if it succeeded and set retries to 0 if not job.get_should_run_immediate(): job.force = False job.retries = 0 else: if job.retries >= self.get_execution_manager().max_retries: job.set_failed(True) job.invalidate() LOG.error("Maximum number of retries reached for {}".format(job)) self.get_execution_manager().add_to_complete_queue(job.get_id())
def update_build_graph(): # Add the job LOG.debug("SUBMISSION => Expanding build graph for submission {} {}".format(job_definition_id, build_context)) if self.build.rule_dependency_graph.is_job_definition(job_definition_id): build_update = self.build.add_job(job_definition_id, build_context, **kwargs) else: build_update = self.build.add_meta(job_definition_id, build_context, **kwargs) update_nodes = set() if update_topmost or update_all: for node_id in build_update.targets: if self.build.in_degree(node_id) == 0 or update_all: if self.build.is_target(node_id): update_nodes.add(node_id) LOG.debug("UPDATE_SUBMITTED => {}".format(update_nodes)) self.external_update_targets(update_nodes) LOG.debug("SUBMISSION => Build graph expansion complete") # Refresh all uncached existences LOG.debug("updating {} targets".format(len(build_update.new_targets))) self.update_targets(build_update.new_targets) # Invalidate the build graph for all child nodes newly_invalidated_job_ids = build_update.new_jobs | build_update.newly_forced LOG.debug("SUBMISSION => Newly invlidated job ids: {}".format(newly_invalidated_job_ids)) LOG.debug("SUBMISSION => Jobs expanded: {}".format(build_update.jobs)) for newly_invalidated_job_id in newly_invalidated_job_ids: self.update_parents_should_run(newly_invalidated_job_id) next_job_to_run_ids = self.get_next_jobs_to_run( newly_invalidated_job_id) LOG.debug("SUBMISSION => Next jobs {} -> {}".format(newly_invalidated_job_id, next_job_to_run_ids)) for next_job_to_run_id in next_job_to_run_ids: self.add_to_work_queue(next_job_to_run_id) self.last_job_submitted_on = arrow.now() self.submitted_jobs += 1