Exemplo n.º 1
0
    def __init__(self, parent, backend, job_id, name):
        logger.debug(
            'Starting Job instance constructor with name {0}'.format(name))
        super(Job, self).__init__()

        self.parent = parent
        self.backend = backend
        self.event_handler = self.parent.event_handler
        self.job_id = job_id
        self.name = name
        self.state = JobState()

        # tasks themselves aren't hashable, so we need a secondary lookup
        self.tasks = {}

        self.next_run = None
        self.cron_schedule = None
        self.cron_iter = None
        self.run_log = None
        self.completion_lock = threading.Lock()
        self.notes = None

        self.snapshot = None

        self._set_status('waiting')

        self.commit()
Exemplo n.º 2
0
    def _complete_task(self, task_name, **kwargs):
        """ Marks this task as completed. Kwargs are stored in the run log. """

        logger.debug('Job {0} marking task {1} as completed'.format(
            self.name, task_name))
        self.run_log['tasks'][task_name] = kwargs

        for node in self.downstream(task_name, self.snapshot):
            self._start_if_ready(node)

        try:
            self.backend.acquire_lock()
            self._commit_run_log()
        except:
            logger.exception("Error in handling events.")
        finally:
            self.backend.release_lock()

        if kwargs.get('success', None) == False:
            task = self.tasks[task_name]
            try:
                self.backend.acquire_lock()
                if self.event_handler:
                    self.event_handler.emit(
                        'task_failed', task._serialize(include_run_logs=True))
            except:
                logger.exception("Error in handling events.")
            finally:
                self.backend.release_lock()

        self._on_completion()
Exemplo n.º 3
0
 def delete(self):
     """ Delete this Dagobah instance from the Backend. """
     logger.debug('Deleting Dagobah instance with ID {0}'.format(
         self.dagobah_id))
     self.jobs = []
     self.created_jobs = 0
     self.backend.delete_dagobah(self.dagobah_id)
Exemplo n.º 4
0
    def schedule(self, cron_schedule, base_datetime=None):
        """ Schedules the job to run periodically using Cron syntax. """

        logger.debug('Scheduling job {0} with cron schedule {1}'.format(
            self.name, cron_schedule))
        if not self.state.allow_change_schedule:
            raise DagobahError(
                "job's schedule cannot be changed in state: %s" %
                self.state.status)

        if cron_schedule is None:
            self.cron_schedule = None
            self.cron_iter = None
            self.next_run = None

        else:
            if base_datetime is None:
                base_datetime = datetime.utcnow()
            self.cron_schedule = cron_schedule
            self.cron_iter = croniter(cron_schedule, base_datetime)
            self.next_run = self.cron_iter.get_next(datetime)

        logger.debug('Determined job {0} next run of {1}'.format(
            self.name, self.next_run))
        self.commit()
Exemplo n.º 5
0
    def start(self):
        """ Begins the job by kicking off all tasks with no dependencies. """

        logger.info('Job {0} starting job run'.format(self.name))
        if not self.state.allow_start:
            raise DagobahError('job cannot be started in its current state; ' +
                               'it is probably already running')

        self.initialize_snapshot()

        # don't increment if the job was run manually
        if self.cron_iter and datetime.utcnow() > self.next_run:
            self.next_run = self.cron_iter.get_next(datetime)

        self.run_log = {
            'job_id': self.job_id,
            'name': self.name,
            'parent_id': self.parent.dagobah_id,
            'log_id': self.backend.get_new_log_id(),
            'start_time': datetime.utcnow(),
            'tasks': {}
        }
        self._set_status('running')

        logger.debug('Job {0} resetting all tasks prior to start'.format(
            self.name))
        for task in self.tasks.values():
            task.reset()

        logger.debug('Job {0} seeding run logs'.format(self.name))
        for task_name in self.ind_nodes(self.snapshot):
            self._put_task_in_run_log(task_name)
            self.tasks[task_name].start()

        self._commit_run_log()
Exemplo n.º 6
0
 def _task_complete(self, **kwargs):
     """ Performs cleanup tasks and notifies Job that the Task finished. """
     logger.debug('Running _task_complete for task {0}'.format(self.name))
     with self.parent_job.completion_lock:
         self.completed_at = datetime.utcnow()
         self.successful = kwargs.get('success', None)
         self.parent_job._complete_task(self.name, **kwargs)
Exemplo n.º 7
0
    def __init__(self, parent_job, command, name,
                 soft_timeout=0, hard_timeout=0, hostname=None):
        logger.debug('Starting Task instance constructor with name {0}'.format(name))
        self.parent_job = parent_job
        self.backend = self.parent_job.backend
        self.event_handler = self.parent_job.event_handler
        self.command = command
        self.name = name
        self.hostname = hostname

        self.remote_channel = None
        self.process = None
        self.stdout = ""
        self.stderr = ""
        self.stdout_file = None
        self.stderr_file = None

        self.timer = None

        self.started_at = None
        self.completed_at = None
        self.successful = None

        self.terminate_sent = False
        self.kill_sent = False
        self.remote_failure = False

        self.set_soft_timeout(soft_timeout)
        self.set_hard_timeout(hard_timeout)

        self.parent_job.commit()
Exemplo n.º 8
0
    def check_complete(self):
        """ Runs completion flow for this task if it's finished. """
        logger.debug('Running check_complete for task {0}'.format(self.name))

        # Tasks not completed
        if self.remote_not_complete() or self.local_not_complete():
            self._start_check_timer()
            return

        return_code = self.completed_task()

        # Handle task errors
        if self.terminate_sent:
            self.stderr += '\nDAGOBAH SENT SIGTERM TO THIS PROCESS\n'
        if self.kill_sent:
            self.stderr += '\nDAGOBAH SENT SIGKILL TO THIS PROCESS\n'
        if self.remote_failure:
            return_code = -1
            self.stderr += '\nAn error occurred with the remote machine.\n'

        self.stdout_file = None
        self.stderr_file = None

        self._task_complete(success=True if return_code == 0 else False,
                            return_code=return_code,
                            stdout=self.stdout,
                            stderr=self.stderr,
                            start_time=self.started_at,
                            complete_time=datetime.utcnow())
Exemplo n.º 9
0
    def update_job_notes(self, notes):
        logger.debug('Job {0} updating notes'.format(self.name))
        if not self.state.allow_edit_job:
            raise DagobahError('job cannot be edited in its current state')

        setattr(self, 'notes', notes)

        self.parent.commit(cascade=True)
Exemplo n.º 10
0
 def from_backend(self, dagobah_id):
     """ Reconstruct this Dagobah instance from the backend. """
     logger.debug('Reconstructing Dagobah instance from backend with ID {0}'.format(dagobah_id))
     rec = self.backend.get_dagobah_json(dagobah_id)
     if not rec:
         raise DagobahError('dagobah with id %s does not exist '
                            'in backend' % dagobah_id)
     self._construct_from_json(rec)
Exemplo n.º 11
0
 def _put_task_in_run_log(self, task_name):
     """ Initializes the run log task entry for this task. """
     logger.debug('Job {0} initializing run log entry for task {1}'.format(
         self.name, task_name))
     data = {
         'start_time': datetime.utcnow(),
         'command': self.tasks[task_name].command
     }
     self.run_log['tasks'][task_name] = data
Exemplo n.º 12
0
    def commit(self, cascade=False):
        """ Commit this Dagobah instance to the backend.

        If cascade is True, all child Jobs are commited as well.
        """
        logger.debug('Committing Dagobah instance with cascade={0}'.format(cascade))
        self.backend.commit_dagobah(self._serialize())
        if cascade:
            [job.commit() for job in self.jobs]
Exemplo n.º 13
0
 def delete_job(self, job_name):
     """ Delete a job by name, or error out if no such job exists. """
     logger.debug('Deleting job {0}'.format(job_name))
     for idx, job in enumerate(self.jobs):
         if job.name == job_name:
             self.backend.delete_job(job.job_id)
             del self.jobs[idx]
             self.commit()
             return
     raise DagobahError('no job with name %s exists' % job_name)
Exemplo n.º 14
0
    def _timeout_check(self):
        logger.debug('Running timeout check for task {0}'.format(self.name))
        if (self.soft_timeout != 0 and
                    (datetime.utcnow() - self.started_at).seconds >= self.soft_timeout
            and not self.terminate_sent):
            self.terminate()

        if (self.hard_timeout != 0 and
                    (datetime.utcnow() - self.started_at).seconds >= self.hard_timeout
            and not self.kill_sent):
            self.kill()
Exemplo n.º 15
0
    def add_job_from_json(self, job_json, destructive=False):
        """ Construct a new Job from an imported JSON spec. """
        logger.debug('Importing job from JSON document: {0}'.format(job_json))
        rec = self.backend.decode_import_json(job_json)
        if destructive:
            try:
                self.delete_job(rec['name'])
            except DagobahError:  # expected if no job with this name
                pass
        self._add_job_from_spec(rec, use_job_id=False)

        self.commit(cascade=True)
Exemplo n.º 16
0
    def add_dependency(self, from_task_name, to_task_name):
        """ Add a dependency between two tasks. """

        logger.debug('Adding dependency from {0} to {1}'.format(
            from_task_name, to_task_name))
        if not self.state.allow_change_graph:
            raise DagobahError(
                "job's graph is immutable in its current state: %s" %
                self.state.status)

        self.add_edge(from_task_name, to_task_name)
        self.commit()
Exemplo n.º 17
0
 def _start_if_ready(self, task_name):
     """ Start this task if all its dependencies finished successfully. """
     logger.debug('Job {0} running _start_if_ready for task {1}'.format(
         self.name, task_name))
     task = self.tasks[task_name]
     dependencies = self._dependencies(task_name, self.snapshot)
     for dependency in dependencies:
         if self.run_log['tasks'].get(dependency, {}).get('success',
                                                          False) == True:
             continue
         return
     self._put_task_in_run_log(task_name)
     task.start()
Exemplo n.º 18
0
    def add_job(self, job_name, job_id=None):
        """ Create a new, empty Job. """
        logger.debug('Creating a new job named {0}'.format(job_name))
        if not self._name_is_available(job_name):
            raise DagobahError('name %s is not available' % job_name)

        if not job_id:
            job_id = self.backend.get_new_job_id()
            self.created_jobs += 1

        self.jobs.append(Job(self, self.backend, job_id, job_name))

        job = self.get_job(job_name)
        job.commit()
Exemplo n.º 19
0
 def __init__(self, backend=BaseBackend(), event_handler=None,
              ssh_config=None):
     """ Construct a new Dagobah instance with a specified Backend. """
     logger.debug('Starting Dagobah instance constructor')
     self.backend = backend
     self.event_handler = event_handler
     self.dagobah_id = self.backend.get_new_dagobah_id()
     self.jobs = []
     self.created_jobs = 0
     self.scheduler = Scheduler(self)
     self.scheduler.daemon = True
     self.ssh_config = ssh_config
     self.scheduler.start()
     self.commit()
Exemplo n.º 20
0
    def initialize_snapshot(self):
        """ Copy the DAG and validate """
        logger.debug('Initializing DAG snapshot for job {0}'.format(self.name))
        if self.snapshot is not None:
            logger.warn("Attempting to initialize DAG snapshot without " +
                        "first destroying old snapshot.")

        snapshot_to_validate = deepcopy(self.graph)

        is_valid, reason = self.validate(snapshot_to_validate)
        if not is_valid:
            raise DagobahError(reason)

        self.snapshot = snapshot_to_validate
Exemplo n.º 21
0
    def delete_task(self, task_name):
        """ Deletes the named Task in this Job. """

        logger.debug('Deleting task {0}'.format(task_name))
        if not self.state.allow_change_graph:
            raise DagobahError(
                "job's graph is immutable in its current state: %s" %
                self.state.status)

        if task_name not in self.tasks:
            raise DagobahError('task %s does not exist' % task_name)

        self.tasks.pop(task_name)
        self.delete_node(task_name)
        self.commit()
Exemplo n.º 22
0
    def add_task(self, command, name=None, **kwargs):
        """ Adds a new Task to the graph with no edges. """

        logger.debug('Adding task with command {0} to job {1}'.format(
            command, self.name))
        if not self.state.allow_change_graph:
            raise DagobahError(
                "job's graph is immutable in its current state: %s" %
                self.state.status)

        if name is None:
            name = command
        new_task = Task(self, command, name, **kwargs)
        self.tasks[name] = new_task
        self.add_node(name)
        self.commit()
Exemplo n.º 23
0
    def reset(self):
        """ Reset this Task to a clean state prior to execution. """

        logger.debug('Resetting task {0}'.format(self.name))

        self.stdout_file = tempfile.TemporaryFile()
        self.stderr_file = tempfile.TemporaryFile()

        self.stdout = ""
        self.stderr = ""

        self.started_at = None
        self.completed_at = None
        self.successful = None

        self.terminate_sent = False
        self.kill_sent = False
        self.remote_failure = False
Exemplo n.º 24
0
    def add_task_to_job(self, job_or_job_name, task_command, task_name=None,
                        **kwargs):
        """ Add a task to a job owned by the Dagobah instance. """

        if isinstance(job_or_job_name, Job):
            job = job_or_job_name
        else:
            job = self.get_job(job_or_job_name)

        if not job:
            raise DagobahError('job %s does not exist' % job_or_job_name)

        logger.debug('Adding task with command {0} to job {1}'.format(task_command, job.name))

        if not job.state.allow_change_graph:
            raise DagobahError("job's graph is immutable in its current " +
                               "state: %s"
                               % job.state.status)

        job.add_task(task_command, task_name, **kwargs)
        job.commit()
Exemplo n.º 25
0
    def edit_task(self, task_name, **kwargs):
        """ Change the name of a Task owned by this Job.

        This will affect the historical data available for this
        Task, e.g. past run logs will no longer be accessible.
        """

        logger.debug('Job {0} editing task {1}'.format(self.name, task_name))
        if not self.state.allow_edit_task:
            raise DagobahError("tasks cannot be edited in this job's " +
                               "current state")

        if task_name not in self.tasks:
            raise DagobahError('task %s not found' % task_name)

        if 'name' in kwargs and isinstance(kwargs['name'], str):
            if kwargs['name'] in self.tasks:
                raise DagobahError('task name %s is unavailable' %
                                   kwargs['name'])

        task = self.tasks[task_name]
        for key in ['name', 'command']:
            if key in kwargs and isinstance(kwargs[key], str):
                setattr(task, key, kwargs[key])

        if 'soft_timeout' in kwargs:
            task.set_soft_timeout(kwargs['soft_timeout'])

        if 'hard_timeout' in kwargs:
            task.set_hard_timeout(kwargs['hard_timeout'])

        if 'hostname' in kwargs:
            task.set_hostname(kwargs['hostname'])

        if 'name' in kwargs and isinstance(kwargs['name'], str):
            self.rename_edges(task_name, kwargs['name'])
            self.tasks[kwargs['name']] = task
            del self.tasks[task_name]

        self.parent.commit(cascade=True)
Exemplo n.º 26
0
    def edit(self, **kwargs):
        """ Change this Job's name.

        This will affect the historical data available for this
        Job, e.g. past run logs will no longer be accessible.
        """

        logger.debug('Job {0} changing name to {1}'.format(
            self.name, kwargs.get('name')))
        if not self.state.allow_edit_job:
            raise DagobahError('job cannot be edited in its current state')

        if 'name' in kwargs and isinstance(kwargs['name'], str):
            if not self.parent._name_is_available(kwargs['name']):
                raise DagobahError('new job name %s is not available' %
                                   kwargs['name'])

        for key in ['name']:
            if key in kwargs and isinstance(kwargs[key], str):
                setattr(self, key, kwargs[key])

        self.parent.commit(cascade=True)
Exemplo n.º 27
0
    def retry(self):
        """ Restarts failed tasks of a job. """

        logger.info('Job {0} retrying all failed tasks'.format(self.name))
        self.initialize_snapshot()

        failed_task_names = []
        for task_name, log in self.run_log['tasks'].items():
            if log.get('success', True) == False:
                failed_task_names.append(task_name)

        if len(failed_task_names) == 0:
            raise DagobahError('no failed tasks to retry')

        self._set_status('running')
        self.run_log['last_retry_time'] = datetime.utcnow()

        logger.debug('Job {0} seeding run logs'.format(self.name))
        for task_name in failed_task_names:
            self._put_task_in_run_log(task_name)
            self.tasks[task_name].start()

        self._commit_run_log()
Exemplo n.º 28
0
    def _on_completion(self):
        """ Checks to see if the Job has completed, and cleans up if it has. """

        logger.debug('Job {0} running _on_completion check'.format(self.name))
        if self.state.status != 'running' or (not self._is_complete()):
            return

        for job, results in self.run_log['tasks'].items():
            if results.get('success', False) == False:
                self._set_status('failed')
                try:
                    self.backend.acquire_lock()
                    if self.event_handler:
                        self.event_handler.emit(
                            'job_failed',
                            self._serialize(include_run_logs=True))
                except:
                    logger.exception("Error in handling events.")
                finally:
                    self.backend.release_lock()
                break

        if self.state.status != 'failed':
            self._set_status('waiting')
            self.run_log = {}
            try:
                self.backend.acquire_lock()
                if self.event_handler:
                    self.event_handler.emit(
                        'job_complete', self._serialize(include_run_logs=True))
            except:
                logger.exception("Error in handling events.")
            finally:
                self.backend.release_lock()

        self.destroy_snapshot()
Exemplo n.º 29
0
 def commit(self):
     """ Store metadata on this Job to the backend. """
     logger.debug('Committing job {0}'.format(self.name))
     self.backend.commit_job(self._serialize())
     self.parent.commit()
Exemplo n.º 30
0
 def destroy_snapshot(self):
     """ Destroy active copy of the snapshot """
     logger.debug('Destroying DAG snapshot for job {0}'.format(self.name))
     self.snapshot = None