def rerun_failed(self): """ Makes this job rerun all its failed tasks. Tasks that are done or are currently running are left untouched """ num_restarted = 0 for task in self.tasks: if task.state == _WorkState.FAILED: task.state = None task.agent = None task.failures = 0 db.session.add(task) num_restarted += 1 self.completion_notify_sent = False self.update_state() db.session.add(self) if config.get("enable_statistics"): task_event_count = TaskEventCount(job_queue_id=self.job_queue_id, num_restarted=num_restarted) task_event_count.time_start = datetime.utcnow() task_event_count.time_end = datetime.utcnow() db.session.commit() for child in self.children: child.rerun_failed()
def alter_frame_range(self, start, end, by): # We have to import this down here instead of at the top to break a # circular dependency between the modules from pyfarm.scheduler.tasks import delete_task if end < start: raise ValueError("`end` must be greater than or equal to `start`") self.by = by required_frames = [] current_frame = start while current_frame <= end: required_frames.append(current_frame) current_frame += by existing_tasks = Task.query.filter_by(job=self).all() frames_to_create = required_frames num_created = 0 for task in existing_tasks: if task.frame not in required_frames: delete_task.delay(task.id) else: frames_to_create.remove(task.frame) for frame in frames_to_create: if self.num_tiles: for tile in range_(self.num_tiles - 1): num_created += 1 task = Task() task.job = self task.frame = frame task.tile = tile task.priority = self.priority db.session.add(task) else: num_created += 1 task = Task() task.job = self task.frame = frame task.priority = self.priority db.session.add(task) if frames_to_create: if self.state != WorkState.RUNNING: self.state = None if config.get("enable_statistics"): task_event_count = TaskEventCount(num_new=num_created, job_queue_id=self.job_queue_id) task_event_count.time_start = datetime.utcnow() task_event_count.time_end = datetime.utcnow() db.session.add(task_event_count)
def consolidate_task_events_for_queue(job_queue_id): logger.debug("Consolidating task events for queue %s now", job_queue_id) consolidate_interval = timedelta(**config.get( "task_event_count_consolidate_interval")) def add_task_count(consolidation_count, event_count, last_count): consolidation_count.num_new += event_count.num_new consolidation_count.num_deleted += event_count.num_deleted consolidation_count.num_restarted += event_count.num_restarted consolidation_count.num_started += event_count.num_started consolidation_count.num_failed += event_count.num_failed consolidation_count.num_done += event_count.num_done event_counts_query = TaskEventCount.query.filter_by( job_queue_id=job_queue_id).order_by(TaskEventCount.time_start) last_count = None open_consolidation_count = None for event_count in event_counts_query: # If current count is not consolidated yet if event_count.time_end - event_count.time_start < consolidate_interval: if not open_consolidation_count: open_consolidation_count = TaskEventCount( job_queue_id=job_queue_id, num_new=0, num_deleted=0, num_restarted=0, num_started=0, num_failed=0, num_done=0) open_consolidation_count.time_start = event_count.time_start open_consolidation_count.time_end = (event_count.time_start + consolidate_interval) add_task_count(open_consolidation_count, event_count, last_count) db.session.delete(event_count) else: # We know the event count does not fall into the period of the # next already existing consolidated count, because we sorted # the query by time_start, so the other consolidated count # would have come up before this unconsolidated one. while (event_count.time_start > open_consolidation_count.time_end): db.session.add(open_consolidation_count) new_consolidation_count = TaskEventCount( job_queue_id=job_queue_id, num_new=0, num_deleted=0, num_restarted=0, num_started=0, num_failed=0, num_done=0) new_consolidation_count.time_start = ( open_consolidation_count.time_end) new_consolidation_count.time_end = ( new_consolidation_count.time_start + consolidate_interval) open_consolidation_count = new_consolidation_count add_task_count(open_consolidation_count, event_count, last_count) db.session.delete(event_count) else: if not open_consolidation_count: open_consolidation_count = event_count else: if event_count.time_start < open_consolidation_count.time_end: add_task_count(open_consolidation_count, event_count, last_count) db.session.delete(event_count) else: db.session.add(open_consolidation_count) open_consolidation_count = event_count if open_consolidation_count: db.session.add(open_consolidation_count) db.session.commit()
def consolidate_task_events_for_queue(job_queue_id): logger.debug("Consolidating task events for queue %s now", job_queue_id) consolidate_interval = timedelta( **config.get("task_event_count_consolidate_interval")) def add_task_count(consolidation_count, event_count, last_count): consolidation_count.num_new += event_count.num_new consolidation_count.num_deleted += event_count.num_deleted consolidation_count.num_restarted += event_count.num_restarted consolidation_count.num_started += event_count.num_started consolidation_count.num_failed += event_count.num_failed consolidation_count.num_done += event_count.num_done event_counts_query = TaskEventCount.query.filter_by( job_queue_id=job_queue_id).order_by(TaskEventCount.time_start) last_count = None open_consolidation_count = None for event_count in event_counts_query: # If current count is not consolidated yet if event_count.time_end - event_count.time_start < consolidate_interval: if not open_consolidation_count: open_consolidation_count = TaskEventCount( job_queue_id=job_queue_id, num_new=0, num_deleted=0, num_restarted=0, num_started=0, num_failed=0, num_done=0) open_consolidation_count.time_start = event_count.time_start open_consolidation_count.time_end = (event_count.time_start + consolidate_interval) add_task_count(open_consolidation_count, event_count, last_count) db.session.delete(event_count) else: # We know the event count does not fall into the period of the # next already existing consolidated count, because we sorted # the query by time_start, so the other consolidated count # would have come up before this unconsolidated one. while (event_count.time_start > open_consolidation_count.time_end): db.session.add(open_consolidation_count) new_consolidation_count = TaskEventCount( job_queue_id=job_queue_id, num_new=0, num_deleted=0, num_restarted=0, num_started=0, num_failed=0, num_done=0) new_consolidation_count.time_start = ( open_consolidation_count.time_end) new_consolidation_count.time_end = ( new_consolidation_count.time_start + consolidate_interval) open_consolidation_count = new_consolidation_count add_task_count(open_consolidation_count, event_count, last_count) db.session.delete(event_count) else: if not open_consolidation_count: open_consolidation_count = event_count else: if event_count.time_start < open_consolidation_count.time_end: add_task_count(open_consolidation_count, event_count, last_count) db.session.delete(event_count) else: db.session.add(open_consolidation_count) open_consolidation_count = event_count if open_consolidation_count: db.session.add(open_consolidation_count) db.session.commit()