Exemplo n.º 1
0
class JobManager(object):
    _log = attrs_extra.log('%s.JobManager' % __name__)

    def api_create_job(self,
                       job_name,
                       job_desc,
                       job_type,
                       job_settings,
                       project_id,
                       user_id,
                       manager_id,
                       priority=50,
                       *,
                       start_paused=False):
        """Creates a job, returning a dict with its generated fields."""

        job = {
            'name': job_name,
            'description': job_desc,
            'job_type': job_type,
            'project': project_id,
            'user': user_id,
            'manager': manager_id,
            'status': 'under-construction',
            'priority': int(priority),
            'settings': copy.deepcopy(job_settings),
        }
        if start_paused:
            job['start_paused'] = True

        self._log.info('Creating job %r for user %s and manager %s', job_name,
                       user_id, manager_id)

        r, _, _, status = current_app.post_internal('flamenco_jobs', job)
        if status != 201:
            self._log.error('Status should be 201, not %i: %s' % (status, r))
            raise ValueError('Unable to create Flamenco job, status code %i' %
                             status)

        job.update(r)
        return job

    def jobs_for_project(self, project_id, *, archived=False):
        """Returns the jobs for the given project.

        :returns: {'_items': [job, job, ...], '_meta': {Eve metadata}}
        """
        from .sdk import Job

        # Eve doesn't support '$eq' :(
        status_q = 'archived' if archived else {'$ne': 'archived'}
        where = {'project': project_id, 'status': status_q}

        api = pillar_api()
        try:
            j = Job.all(
                {
                    'where': where,
                    'sort': [('_updated', -1), ('_created', -1)],
                },
                api=api)
        except pillarsdk.ResourceNotFound:
            return {'_items': [], '_meta': {'total': 0}}
        return j

    def job_status_summary(self, project_id):
        """Returns number of shots per shot status for the given project.

        :rtype: ProjectSummary
        """
        from .sdk import Job

        api = pillar_api()

        # TODO: turn this into an aggregation call to do the counting on
        # MongoDB.
        try:
            jobs = Job.all(
                {
                    'where': {
                        'project': project_id,
                    },
                    'projection': {
                        'status': 1,
                    },
                    'order': [
                        ('status', 1),
                    ],
                },
                api=api)
        except pillarsdk.ResourceNotFound:
            return ProjectSummary()

        # FIXME: this breaks when we hit the pagination limit.
        summary = ProjectSummary()
        for job in jobs['_items']:
            summary.count(job['status'])

        return summary

    def update_job_after_task_status_change(self, job_id, task_id,
                                            new_task_status):
        """Updates the job status based on the status of this task and other tasks in the job.
        """

        jobs_coll = current_flamenco.db('jobs')
        tasks_coll = current_flamenco.db('tasks')

        def __job_status_if_a_then_b(if_status: str, then_new_status: str):
            """Set job to active if it was queued."""

            job = jobs_coll.find_one(job_id, projection={'status': 1})
            if job['status'] == if_status:
                self._log.info(
                    'Job %s became %s because one of its tasks %s changed '
                    'status to %s', job_id, then_new_status, task_id,
                    new_task_status)
                self.api_set_job_status(job_id, then_new_status)

        if new_task_status == 'queued':
            # Re-queueing a task on a completed job should re-queue the job too.
            __job_status_if_a_then_b('completed', 'queued')
            return

        if new_task_status == 'claimed-by-manager':
            # See if there are any active tasks left. If the job was active, but a task
            # goes to 'claimed-by-manager', this means the task likely active and now re-queued.
            statuses = tasks_coll.distinct('status', {'job': job_id})
            if 'active' not in statuses:
                __job_status_if_a_then_b('active', 'queued')
            return

        if new_task_status in {'cancel-requested', 'claimed-by-manager'}:
            # A task being claimed by the manager also doesn't change job status.
            # Also, canceling a single task has no influence on the job itself.
            return

        if new_task_status == 'canceled':
            # Only trigger cancellation/failure of the job if that was actually requested.
            # A user can also cancel a single task from the Server web UI or API.
            job = jobs_coll.find_one(job_id, projection={'status': 1})
            job_status = job['status']
            if job_status in {'cancel-requested', 'fail-requested'}:
                # This could be the last cancel-requested task to go to 'canceled.
                statuses = tasks_coll.distinct('status', {'job': job_id})
                if 'cancel-requested' not in statuses:
                    self._log.info(
                        'Last task %s of job %s went from cancel-requested to canceled',
                        task_id, job_id)
                    next_status = job_status.replace('-requested', 'ed')
                    self.api_set_job_status(job_id, next_status)
            return

        if new_task_status == 'failed':
            # Count the number of failed tasks. If it is more than 10%, fail the job.
            total_count = tasks_coll.find({'job': job_id}).count()
            fail_count = tasks_coll.find({
                'job': job_id,
                'status': 'failed'
            }).count()
            fail_perc = fail_count / float(total_count) * 100
            if fail_perc >= TASK_FAIL_JOB_PERCENTAGE:
                msg = f'Failing job {job_id} because {fail_count} of its {total_count} tasks ' \
                    f'({int(fail_perc)}%) failed'
                self._log.info(msg)
                self.api_set_job_status(job_id, 'failed', reason=msg)
            else:
                self._log.info(
                    'Task %s of job %s failed; '
                    'only %i of its %i tasks failed (%i%%), so ignoring for now',
                    task_id, job_id, fail_count, total_count, fail_perc)
                __job_status_if_a_then_b('queued', 'active')
            return

        if new_task_status in {'active', 'processing', 'soft-failed'}:
            job = jobs_coll.find_one(job_id, projection={'status': 1})
            if job['status'] not in {
                    'active', 'fail-requested', 'cancel-requested'
            }:
                self._log.info(
                    'Job %s became active because one of its tasks %s changed '
                    'status to %s', job_id, task_id, new_task_status)
                self.api_set_job_status(job_id, 'active')
            return

        if new_task_status == 'completed':
            # Maybe all tasks are completed, which should complete the job.
            statuses = tasks_coll.distinct('status', {'job': job_id})
            if statuses == ['completed']:
                self._log.info(
                    'All tasks (last one was %s) of job %s are completed, '
                    'setting job to completed.', task_id, job_id)
                self.api_set_job_status(job_id, 'completed')
            else:
                __job_status_if_a_then_b('queued', 'active')
            return

        self._log.warning(
            'Task %s of job %s obtained status %s, '
            'which we do not know how to handle.', task_id, job_id,
            new_task_status)

    def web_set_job_status(self, job_id, new_status):
        """Web-level call to updates the job status."""
        from .sdk import Job

        api = pillar_api()
        job = Job({'_id': job_id})
        job.patch({'op': 'set-job-status', 'status': new_status}, api=api)

    def api_set_job_status(
            self,
            job_id: ObjectId,
            new_status: str,
            *,
            reason='',
            now: datetime.datetime = None) -> pymongo.results.UpdateResult:
        """API-level call to updates the job status."""
        assert new_status
        self._log.debug('Setting job %s status to "%s", reason: %r', job_id,
                        new_status, reason)

        jobs_coll = current_flamenco.db('jobs')
        curr_job = jobs_coll.find_one({'_id': job_id},
                                      projection={'status': 1})
        old_status = curr_job['status']

        if reason:
            extra_updates = {
                'status_reason': reason
            }  # type: typing.Optional[dict]
        else:
            extra_updates = None

        # Go through all necessary status transitions.
        result = None  # make sure that 'result' always has a value.
        while new_status:
            result = current_flamenco.update_status(
                'jobs',
                job_id,
                new_status,
                extra_updates=extra_updates,
                now=now)
            extra_updates = None  # Only pass it to the first status update change.
            next_status = self.handle_job_status_change(
                job_id, old_status, new_status)
            old_status, new_status = new_status, next_status

        return result

    def handle_job_status_change(self, job_id: ObjectId, old_status: str,
                                 new_status: str) -> str:
        """Updates task statuses based on this job status transition.

        :returns: the new job status, if this status transition should be
            followed by another one, otherwise an empty string.
        """
        self._log.info('status transition job_id %s from %r to %r', job_id,
                       old_status, new_status)

        if new_status in {'completed', 'canceled'}:
            # Nothing to do; this will happen as a response to all tasks receiving this status.
            return ''
        elif new_status == 'active':
            # Nothing to do; this happens when a task gets started, which has nothing to
            # do with other tasks in the job.
            return ''
        elif new_status in {'cancel-requested', 'failed', 'fail-requested'}:
            return self._do_cancel_tasks(job_id, old_status, new_status)
        elif new_status == 'requeued':
            return self._do_requeue(job_id, old_status, new_status)
        elif new_status == 'queued':
            return self._do_check_completion(job_id, new_status)
        return ''

    def _do_cancel_tasks(self, job_id, old_status, new_status) -> str:
        """Directly cancel any task that might run in the future.

        Only cancels tasks that haven't been touched by a manager yet;
        otherwise it requests the Manager to cancel the tasks.

        :returns: the next job status, if a status change is required.
        """

        current_flamenco.update_status_q('tasks', {
            'job': job_id,
            'status': 'queued'
        }, 'canceled')
        # Request cancel of any task that might run on the manager.
        cancelreq_result = current_flamenco.update_status_q(
            'tasks', {
                'job': job_id,
                'status': {
                    '$in': ['active', 'claimed-by-manager', 'soft-failed']
                }
            }, 'cancel-requested')
        # Update the activity of all the tasks we just cancelled (or requested cancellation),
        # so that users can tell why they were cancelled.
        current_flamenco.task_manager.api_set_activity(
            {
                'job': job_id,
                'status': {
                    '$in': ['cancel-requested', 'canceled']
                },
                'activity': {
                    '$exists': False
                }
            }, 'Server cancelled this task because the job got status %r.' %
            new_status)
        # If the new status is xxx-requested, and no tasks were marked as cancel-requested,
        # we can directly transition the job to 'xxx', without waiting for more task
        # updates.
        if new_status.endswith(
                '-requested') and cancelreq_result.modified_count == 0:
            goto_status = new_status.replace('-requested', 'ed')
            self._log.info(
                'handle_job_status_change(%s, %s, %s): no cancel-requested tasks, '
                'so transitioning directly to %s', job_id, old_status,
                new_status, goto_status)
            return goto_status
        return ''

    def _do_requeue(self, job_id, old_status, new_status) -> str:
        """Re-queue all tasks of the job, and the job itself.

        :returns: the new job status, if this status transition should be
            followed by another one.
        """
        if old_status == 'under-construction':
            # Nothing to do, the job compiler has just finished its work; the tasks have
            # already been set to 'queued' status.
            self._log.debug('Ignoring job status change %r -> %r', old_status,
                            new_status)
            return ''

        if old_status == 'completed':
            # Re-queue all tasks except cancel-requested; those should remain
            # untouched; changing their status is only allowed by managers, to avoid
            # race conditions.
            query = {
                'status': {
                    '$ne': 'cancel-requested'
                }
            }  # type: typing.Dict[str, typing.Any]
        else:
            # Re-queue any non-completed task. Cancel-requested tasks should also be
            # untouched; changing their status is only allowed by managers, to avoid
            # race conditions.
            query = {'status': {'$nin': ['completed', 'cancel-requested']}}

        # Update the tasks.
        query['job'] = job_id
        current_flamenco.update_status_q('tasks',
                                         query,
                                         'queued',
                                         extra_unset={'failed_by_workers'})
        return 'queued'

    def _do_check_completion(self, job_id, new_status) -> str:
        """Completes the job if all tasks are completed.

        :returns: the new job status, if this status transition should be
            followed by another one.
        """

        tasks_coll = current_flamenco.db('tasks')
        total_tasks = tasks_coll.find({'job': job_id}).count()
        completed_tasks = tasks_coll.find({
            'job': job_id,
            'status': 'completed'
        }).count()
        if completed_tasks < total_tasks:
            # Not yet completed, so just stay at current status.
            self._log.debug(
                'Job %s has %d of %d tasks completed, staying at status %r',
                job_id, completed_tasks, total_tasks, new_status)
            return ''

        self._log.info(
            "Job %s has all %d tasks completed, transition from %r to 'completed'",
            job_id, total_tasks, new_status)
        return 'completed'

    def archive_job(self, job: dict):
        """Initiates job archival by creating a Celery task for it."""

        from flamenco.celery import job_archival

        job_id = job['_id']
        job_status = job['status']

        if job_status in ARCHIVE_JOB_STATES:
            msg = f'Job {job_id} cannot be archived, it has status {job_status}'
            self._log.info(msg)
            raise wz_exceptions.UnprocessableEntity(msg)

        # Store current job status in a special key so that it can be restored before
        # writing to the archive ZIP file as JSON.
        jobs_coll = current_flamenco.db('jobs')
        jobs_coll.update_one({'_id': job_id},
                             {'$set': {
                                 'pre_archive_status': job_status
                             }})

        # Immediately set job status to 'archiving', as this should be reflected ASAP in the
        # database + web interface, rather than waiting for a Celery Worker to pick it up.
        self.api_set_job_status(job_id, 'archiving')

        self._log.info(
            'Creating Celery background task for archival of job %s', job_id)
        job_archival.archive_job.delay(str(job_id))

    def api_set_job_priority(self, job_id: ObjectId, new_priority: int):
        """API-level call to updates the job priority."""
        assert isinstance(new_priority, int)
        self._log.debug('Setting job %s priority to %r', job_id, new_priority)

        jobs_coll = current_flamenco.db('jobs')
        curr_job = jobs_coll.find_one({'_id': job_id},
                                      projection={'priority': 1})
        old_priority = curr_job['priority']

        if old_priority == new_priority:
            self._log.debug('Job %s is already at priority %r', job_id,
                            old_priority)
            return

        new_etag = random_etag()
        now = utcnow()
        jobs_coll = current_flamenco.db('jobs')
        result = jobs_coll.update_one({'_id': job_id}, {
            '$set': {
                'priority': new_priority,
                '_updated': now,
                '_etag': new_etag,
            }
        })
        if result.matched_count != 1:
            self._log.warning(
                'Matched %d jobs while setting job %s to priority %r',
                result.matched_count, job_id, new_priority)

        tasks_coll = current_flamenco.db('tasks')
        result = tasks_coll.update_many({'job': job_id}, {
            '$set': {
                'job_priority': new_priority,
                '_updated': now,
                '_etag': new_etag,
            }
        })
        self._log.debug('Matched %d tasks while setting job %s to priority %r',
                        result.matched_count, job_id, new_priority)

    def api_update_rna_overrides(self, job_id: ObjectId,
                                 rna_overrides: typing.List[str]):
        """API-level call to create or update an RNA override task of a Blender Render job."""

        new_etag = random_etag()
        now = utcnow()
        jobs_coll = current_flamenco.db('jobs')

        # Check that the job exists and is a Blender-related job.
        job = jobs_coll.find_one({'_id': job_id})
        if not job:
            self._log.warning(
                'Unable to update RNA overrides of non-existing job %s',
                job_id)
            return None

        compiler = job_compilers.construct_job_compiler(job)
        if not isinstance(compiler, blender_render.AbstractBlenderJobCompiler):
            self._log.warning(
                'Job compiler %r is not an AbstractBlenderJobCompiler, unable '
                'to update RNA overrides for job %s of type %r',
                type(compiler), job_id, job['job_type'])
            return None

        # Update the job itself before updating its tasks. Ideally this would happen in the
        # same transaction.
        # TODO(Sybren): put into one transaction when we upgrade to MongoDB 4+.
        job['settings']['rna_overrides'] = rna_overrides
        result = jobs_coll.update_one({'_id': job_id}, {
            '$set': {
                'settings.rna_overrides': rna_overrides,
                '_updated': now,
                '_etag': new_etag,
            }
        })
        if result.matched_count != 1:
            self._log.warning(
                'Matched %d jobs while setting job %s RNA overrides',
                result.matched_count, job_id)

        compiler.update_rna_overrides_task(job)

    def api_construct_job(
            self,
            job_id: ObjectId,
            new_job_settings: typing.Optional[typing.Dict[str,
                                                          typing.Any]] = None,
            *,
            reason: str):
        """Construct the tasks for a job."""

        jobs_coll = current_flamenco.db('jobs')
        job = jobs_coll.find_one({'_id': job_id})
        if not job:
            raise ValueError(f'Job {job_id} does not exist')

        if new_job_settings:
            self._log.info('Updating settings for job %s: %s', job_id,
                           new_job_settings)
            job_settings = job.setdefault('settings', {})
            job_settings.update(new_job_settings)
            result = jobs_coll.update_one({'_id': job_id},
                                          {'$set': {
                                              'settings': job_settings
                                          }})
            if result.matched_count != 1:
                raise ValueError(
                    f'Could not find job {job_id} for updating new settings')

        self.api_set_job_status(job_id, 'under-construction', reason=reason)
        self._log.info('Generating tasks for job %s', job_id)

        try:
            job_compilers.compile_job(job)
        except Exception as ex:
            self._log.exception('Compiling job %s failed', job_id)
            current_flamenco.job_manager.api_set_job_status(
                job_id,
                'construction-failed',
                reason=f'{reason}; compilation failed: {ex}')
Exemplo n.º 2
0
class TaskManager(object):
    _log = attrs_extra.log('%s.TaskManager' % __name__)

    def collection(self) -> pymongo.collection.Collection:
        """Returns the Mongo database collection."""
        from flamenco import current_flamenco

        return current_flamenco.db('tasks')

    def api_create_task(self,
                        job,
                        commands,
                        name,
                        parents=None,
                        priority=50,
                        status='queued',
                        *,
                        task_type: str) -> bson.ObjectId:
        """Creates a task in MongoDB for the given job, executing commands.

        Returns the ObjectId of the created task.
        """

        task = {
            'job': job['_id'],
            'manager': job['manager'],
            'user': job['user'],
            'name': name,
            'status': status,
            'job_type': job['job_type'],
            'task_type': task_type,
            'commands': [cmd.to_dict() for cmd in commands],
            'job_priority': job['priority'],
            'priority': priority,
            'project': job['project'],
        }
        # Insertion of None parents is not supported
        if parents:
            task['parents'] = parents

        self._log.info('Creating task %s for manager %s, user %s', name,
                       job['manager'], job['user'])

        r, _, _, status = current_app.post_internal('flamenco_tasks', task)
        if status != 201:
            self._log.error('Error %i creating task %s: %s', status, task, r)
            raise wz_exceptions.InternalServerError('Unable to create task')

        return r['_id']

    def tasks_for_job(self,
                      job_id,
                      status=None,
                      *,
                      page=1,
                      max_results=250,
                      extra_where: dict = None):
        from .sdk import Task

        api = pillar_api()

        where = {'job': str(job_id)}
        if extra_where:
            where.update(extra_where)

        payload = {
            'where': where,
            'sort': [
                ('priority', -1),
                ('_id', 1),
            ],
            'max_results': max_results,
            'page': page,
        }
        if status:
            payload['where']['status'] = status

        tasks = Task.all(payload, api=api)
        self._log.debug(
            'task_for_job: where=%s  -> %i tasks in total, fetched page %i (%i per page)',
            payload['where'], tasks['_meta']['total'], page, max_results)
        return tasks

    def tasks_for_project(self, project_id):
        """Returns the tasks for the given project.

        :returns: {'_items': [task, task, ...], '_meta': {Eve metadata}}
        """
        from .sdk import Task

        api = pillar_api()
        try:
            tasks = Task.all({'where': {
                'project': project_id,
            }}, api=api)
        except ResourceNotFound:
            return {'_items': [], '_meta': {'total': 0}}

        return tasks

    def web_set_task_status(self, task_id, new_status):
        """Web-level call to updates the task status."""
        from .sdk import Task

        api = pillar_api()
        task = Task({'_id': task_id})
        task.patch({'op': 'set-task-status', 'status': new_status}, api=api)

    def api_set_task_status_for_job(self,
                                    job_id: bson.ObjectId,
                                    from_status: str,
                                    to_status: str,
                                    *,
                                    now: datetime.datetime = None):
        """Updates the task status for all tasks of a job that have a particular status."""

        self._log.info('Flipping all tasks of job %s from status %r to %r',
                       job_id, from_status, to_status)

        from flamenco import current_flamenco

        current_flamenco.update_status_q('tasks', {
            'job': job_id,
            'status': from_status
        },
                                         to_status,
                                         now=now)

    def api_set_activity(self, task_query: dict, new_activity: str):
        """Updates the activity for all tasks that match the query."""

        import uuid
        from bson import tz_util

        update = {
            'activity': new_activity,
            '_etag': uuid.uuid4().hex,
            '_updated': datetime.datetime.now(tz=tz_util.utc),
        }

        tasks_coll = self.collection()
        tasks_coll.update_many(task_query, {'$set': update})

    def api_find_job_enders(self, job_id):
        """Returns a list of tasks that could be the last tasks of a job.

        In other words, returns all tasks that are not a parent of other tasks.

        :returns: list of task IDs
        :rtype: list
        """

        tasks_coll = self.collection()

        # Get the distinct set of tasks used as parents.
        parent_tasks = tasks_coll.aggregate([
            {
                '$match': {
                    'job': job_id
                }
            },
            {
                '$project': {
                    'parents': 1
                }
            },
            {
                '$unwind': {
                    'path': '$parents'
                }
            },
            {
                '$group': {
                    '_id': '$parents'
                }
            },
        ])
        parent_ids = [t['_id'] for t in parent_tasks]

        # Get all the tasks that do not have such an ID.
        tasks = tasks_coll.find({
            'job': job_id,
            '_id': {
                '$nin': parent_ids
            }
        },
                                projection={'_id': 1})

        tids = [t['_id'] for t in tasks]
        return tids

    def api_delete_tasks_for_job(self, job_id: bson.ObjectId):
        """Deletes all tasks for a given job.

        NOTE: this breaks references in the task log database.
        """

        from pymongo.results import DeleteResult

        self._log.info('Deleting all tasks of job %s', job_id)
        tasks_coll = self.collection()
        delres: DeleteResult = tasks_coll.delete_many({'job': job_id})
        self._log.info('Deleted %i tasks of job %s', delres.deleted_count,
                       job_id)

    def api_requeue_task_and_successors(self, task_id: bson.ObjectId):
        """Recursively re-queue a task and its successors on the job's depsgraph.

        Does not update the job status itself. This is the responsibility
        of the caller.
        """
        from flamenco import current_flamenco

        tasks_coll = self.collection()
        visited_tasks: typing.MutableSet[bson.ObjectId] = set()

        def visit_task(tid: bson.ObjectId, depth: int):
            if depth > 10000:
                raise ValueError('Infinite recursion detected')

            if tid in visited_tasks:
                return
            visited_tasks.add(tid)

            current_flamenco.update_status('tasks', tid, 'queued')
            children = tasks_coll.find({'parents': tid},
                                       projection={'_id': True})
            for child in children:
                visit_task(child['_id'], depth + 1)

        visit_task(task_id, 0)

    def _tasklog_blob_fname(self, task: dict) -> str:
        """Construct the blob filename for this task's log file.

        The blob is supposed to go into the project's storage bucket,
        so it does not contain the project ID.

        Assumes the log will be gzip-compressed, and thus the returned
        filename ends in '.log.gz'.
        """

        return f'flamenco-task-logs/job-{task["job"]}/task-{task["_id"]}.log.gz'

    def logfile_blob(self, task: dict) -> Blob:
        """Return the storage blob for this task's log file."""

        project_id = task['project']
        blob_fname = self._tasklog_blob_fname(task)
        bucket = pillar.api.projects.utils.storage(project_id)
        return bucket.blob(blob_fname)

    def api_attach_log(self, task: dict, file_obj: FileType) -> bool:
        """Store the POSTed task log as a file in the storage backend.

        Also updates the task itself to have a reference to the file.

        :return: Whether this file was new (False) or overwrote a pre-existing
            log file (True).
        """
        blob = self.logfile_blob(task)

        self._log.debug(
            'Storing log for task %s in storage blob %s of project %s',
            task['_id'], blob.name, task['project'])

        preexisting = blob.exists()
        blob.create_from_file(file_obj, content_type='application/gzip')
        blob.update_filename(pathlib.PurePosixPath(blob.name).name,
                             is_attachment=False)
        blob.update_content_type('text/plain', 'gzip')

        self._log.info(
            'Stored log for task %s in storage blob %s of project %s',
            task['_id'], blob.name, task['project'])

        tasks_coll = self.collection()
        tasks_coll.update_one({'_id': task['_id']}, {
            '$set': {
                'log_file': {
                    'backend': blob.bucket.backend_name,
                    'file_path': blob.name,
                },
            }
        })

        return preexisting
class BlenderRenderProgressive(blender_render.AbstractBlenderJobCompiler):
    """Progressive Blender render job.

    Creates a render task for each Cycles sample chunk, and creates merge
    tasks to merge those render outputs into progressively refining output.

    Intermediary files are created in a subdirectory of the render output path.

    To make things simple, we choose one chunk per sample. This requires
    Blender 7744203b7fde3 or newer (from Tue Jan 29 18:08:12 2019 +0100).

    NOTE: progressive rendering does not work with the denoiser.
    """

    _log = attrs_extra.log('%s.BlenderRenderProgressive' % __name__)

    REQUIRED_SETTINGS = ('blender_cmd', 'render_output', 'frames',
                         'chunk_size', 'format', 'cycles_sample_count',
                         'cycles_sample_cap', 'fps')

    # So that unit tests can override this value and produce smaller jobs.
    _uncapped_chunk_count = 4

    def _frame_chunk_size(
        self,
        max_samples_per_task: int,
        total_frame_count: int,
        current_sample_count: int,
    ) -> int:
        """Compute the frame chunk given the current sample count."""

        frame_chunk_size = max_samples_per_task // current_sample_count

        # Allow the chunk size to become one frame smaller if that produces
        # a less-empty last task. Having a task with only one frame is
        # acceptable when the chunk size is only 1 or 2.
        if frame_chunk_size > 2:
            frames_in_last_task = total_frame_count % frame_chunk_size
            with_smaller_chunk = total_frame_count % (frame_chunk_size - 1)
            if 0 < frames_in_last_task < with_smaller_chunk:
                frame_chunk_size -= 1

        return frame_chunk_size

    def _compile(self, job: dict):
        from .blender_render import intermediate_path

        self._log.info('Compiling job %s', job['_id'])
        self.validate_job_settings(job, _must_have_filepath=True)
        self.job_settings = job['settings']
        self.do_render_video: bool = utils.frame_range_count(
            self.job_settings['frames']) > 1

        # The render output contains a filename pattern, most likely '######' or
        # something similar. This has to be removed, so that we end up with
        # the directory that will contain the frames.
        self.render_output = PurePath(job['settings']['render_output'])
        self.render_path = self.render_output.parent
        self.intermediate_path = intermediate_path(job, self.render_path)

        destroy_interm_task_id = self._make_destroy_intermediate_task(job)
        rna_overrides_task_id = self._make_rna_overrides_task(
            job, destroy_interm_task_id)
        render_parent_task_id = rna_overrides_task_id or destroy_interm_task_id
        task_count = 1 + bool(rna_overrides_task_id)

        cycles_sample_count = int(self.job_settings['cycles_sample_count'])
        cycles_sample_cap = int(self.job_settings.get('cycles_sample_cap',
                                                      100))

        next_merge_task_deps = []
        next_preview_images_tid: typing.Optional[ObjectId] = None
        next_preview_video_tid: typing.Optional[ObjectId] = None
        prev_samples_to = 0

        self.chunk_generator = ChunkGenerator(cycles_sample_count,
                                              cycles_sample_cap,
                                              self._uncapped_chunk_count)
        chunks = list(self.chunk_generator)
        if len(chunks) < 2:
            raise ValueError(
                'This job would not be progressive, use a blender-render job instead.'
            )

        max_frame_chunk_size: int = self.job_settings['chunk_size']
        max_samples_per_task: int = max_frame_chunk_size * cycles_sample_cap
        total_frame_count = utils.frame_range_count(
            self.job_settings['frames'])
        self._log.info('Total frame count is %d', total_frame_count)

        for cycles_chunk_idx, (cycles_chunk_start,
                               cycles_chunk_end) in enumerate(chunks):
            render_task_priority = -cycles_chunk_idx * 10

            # Compute how big the frame chunk can get given the current sample count.
            frame_chunk_size = self._frame_chunk_size(
                max_samples_per_task, total_frame_count,
                cycles_chunk_end - cycles_chunk_start + 1)

            render_task_ids = self._make_progressive_render_tasks(
                job,
                f'render-s_{cycles_chunk_start}-{cycles_chunk_end}-f_%s',
                render_parent_task_id,
                cycles_sample_count,  # We use 1 chunk = 1 sample
                cycles_chunk_start,
                cycles_chunk_end,
                frame_chunk_size,
                task_priority=render_task_priority,
            )
            task_count += len(render_task_ids)

            # Create progressive image merge tasks, based on previous list of render tasks
            # and the just-created list.
            if cycles_chunk_idx == 0:
                render_out = self._render_output(cycles_chunk_start,
                                                 cycles_chunk_end)
                exr_glob = render_out.with_name(
                    render_out.name.replace('######', '*.exr'))

                next_preview_images_tid, next_preview_video_tid = self._make_previews_tasks(
                    job,
                    render_task_ids,
                    next_preview_images_tid,
                    next_preview_video_tid,
                    exr_glob=exr_glob,
                    task_priority=render_task_priority + 1)
                task_count += 2
                next_merge_task_deps = render_task_ids
            else:
                output_pattern = f'merge-to-s_{cycles_chunk_end}-f_%s'
                merge_task_ids = self._make_merge_tasks(
                    job,
                    output_pattern,
                    cycles_chunk_idx + 1,
                    next_merge_task_deps,
                    render_task_ids,
                    cycles_chunks_to1=prev_samples_to,
                    cycles_chunks_from2=cycles_chunk_start,
                    cycles_chunks_to2=cycles_chunk_end,
                    task_priority=1,
                )

                merge_out = self._merge_output(cycles_chunk_end)
                exr_glob = merge_out.with_name(
                    merge_out.name.replace('######', '*.exr'))
                next_preview_images_tid, next_preview_video_tid = self._make_previews_tasks(
                    job,
                    merge_task_ids,
                    next_preview_images_tid,
                    next_preview_video_tid,
                    exr_glob=exr_glob,
                    task_priority=1)

                task_count += len(merge_task_ids) + 2
                next_merge_task_deps = merge_task_ids
            prev_samples_to = cycles_chunk_end

        # Only after the render job is done we publish to the output directory.
        # This makes sure any previous high-quality render is only replaced by
        # another high-quality render.
        moow_tid = self._make_moow_task(job,
                                        next_merge_task_deps,
                                        task_priority=1)
        self._make_publish_exr_task(
            job,
            [moow_tid],
            cycles_sample_count,
            task_priority=1,
        )
        self._make_publish_jpeg_task(job, [next_preview_images_tid, moow_tid],
                                     task_priority=1)
        self._make_publish_preview_video_task(
            job, [next_preview_video_tid, moow_tid], task_priority=1)

        self._log.info('Created %i tasks for job %s', task_count, job['_id'])

    def validate_job_settings(self, job, *, _must_have_filepath=False):
        """Ensure that the job uses format=OPEN_EXR."""
        from flamenco import exceptions

        job_id_str = job.get('_id', '')
        if job_id_str:
            job_id_str = f'{job_id_str} '
        if job['settings'].get('cycles_num_chunks'):
            # End of January 2019 we changed how progressive rendering works.
            # Users no longer provide the number of chunks, but the maximum
            # number of samples per render task.
            raise exceptions.JobSettingError(
                f'Job {job_id_str}was created using outdated Blender Cloud add-on, please upgrade.'
            )

        super().validate_job_settings(job,
                                      _must_have_filepath=_must_have_filepath)

        render_format = job['settings']['format']
        if render_format.upper() not in {'OPEN_EXR', 'EXR'}:
            raise exceptions.JobSettingError(
                f'Job {job_id_str}must use format="OPEN_EXR", not {render_format!r}'
            )

        # This is quite a limitation, but makes our code to predict the
        # filename that Blender will use a lot simpler.
        render_output = job['settings']['render_output']
        if not render_output.endswith('######') or render_output.endswith(
                '#######'):
            raise exceptions.JobSettingError(
                'Setting "render_output" must end in exactly 6 "#" marks.')

    def _make_destroy_intermediate_task(self, job: dict) -> ObjectId:
        """Removes the entire intermediate directory."""

        cmd = commands.RemoveTree(path=str(self.intermediate_path))
        task_id = self._create_task(job, [cmd],
                                    'destroy-preexisting-intermediate',
                                    'file-management')
        return task_id

    def _make_publish_exr_task(self, job: dict, parents: typing.List[ObjectId],
                               cycles_samples_to: int,
                               task_priority: int) -> ObjectId:
        """Publish the final result to the output directory."""

        cmds: typing.List[commands.AbstractCommand] = []

        src_path = self._merge_output(cycles_samples_to)
        src_fmt = str(src_path).replace('######', '%06i.exr')
        dest_fmt = str(self.render_output).replace('######', '%06i.exr')

        for chunk_frames in self._iter_frame_chunks():
            for frame in chunk_frames:
                cmds.append(
                    commands.CopyFile(
                        src=src_fmt % frame,
                        dest=dest_fmt % frame,
                    ))

        task_id = self._create_task(job,
                                    cmds,
                                    'publish-exr-to-output',
                                    'file-management',
                                    parents=parents,
                                    priority=task_priority)
        return task_id

    def _make_publish_jpeg_task(self, job: dict,
                                parents: typing.List[ObjectId],
                                task_priority: int) -> ObjectId:
        """Publish the JPEG previews to the output directory."""

        cmds: typing.List[commands.AbstractCommand] = []

        src_fmt = str(self.intermediate_path / 'preview-%06i.jpg')
        dest_fmt = str(self.render_output).replace('######', '%06i.jpg')

        for chunk_frames in self._iter_frame_chunks():
            for frame in chunk_frames:
                cmds.append(
                    commands.CopyFile(
                        src=src_fmt % frame,
                        dest=dest_fmt % frame,
                    ))

        task_id = self._create_task(job,
                                    cmds,
                                    'publish-jpeg-to-output',
                                    'file-management',
                                    parents=parents,
                                    priority=task_priority)
        return task_id

    def _make_publish_preview_video_task(self, job: dict,
                                         parents: typing.List[ObjectId],
                                         task_priority: int) -> ObjectId:
        """Publish the MKV preview to the output directory."""

        if not self.do_render_video:
            return None

        cmds = [
            commands.CopyFile(
                src=str(self.intermediate_path / 'preview.mkv'),
                dest=str(self.render_path / 'preview.mkv'),
            )
        ]

        task_id = self._create_task(job,
                                    cmds,
                                    'publish-video-to-output',
                                    'file-management',
                                    parents=parents,
                                    priority=task_priority)
        return task_id

    def _make_previews_tasks(self, job: dict,
                             parents: typing.List[ObjectId],
                             parent_images_tid: typing.Optional[ObjectId],
                             parent_video_tid: typing.Optional[ObjectId],
                             exr_glob: PurePath,
                             task_priority: int) \
            -> typing.Tuple[ObjectId, typing.Optional[ObjectId]]:
        """Converts EXR files in the render output directory to JPEG files.

        This constructs one or two tasks, one of type 'blender-render' and
        optionally one of type 'video-encoding'.

        :return: (images task ID, video task ID or None)
        """
        assert isinstance(parents, list)
        assert isinstance(parents[0], ObjectId)
        assert isinstance(parent_images_tid, (ObjectId, type(None)))
        assert isinstance(parent_video_tid, (ObjectId, type(None)))

        job_settings = job['settings']
        cmds = [
            commands.ExrSequenceToJpeg(
                blender_cmd=job_settings['blender_cmd'],
                filepath=job_settings['filepath'],
                exr_glob=str(exr_glob),
                output_pattern='preview-######',
            ),
        ]

        image_parents = parents[:]
        if parent_images_tid:
            image_parents.insert(0, parent_images_tid)
        images_task_id = self._create_task(job,
                                           cmds,
                                           'create-preview-images',
                                           'blender-render',
                                           parents=image_parents,
                                           priority=task_priority)

        if not self.do_render_video:
            return images_task_id, None

        cmds = [
            commands.CreateVideo(
                input_files=str(self.intermediate_path / 'preview-*.jpg'),
                output_file=str(self.intermediate_path / 'preview.mkv'),
                fps=job_settings['fps'],
            )
        ]
        video_parents = [images_task_id]
        if parent_video_tid:
            video_parents.insert(0, parent_video_tid)
        video_task_id = self._create_task(job,
                                          cmds,
                                          'create-preview-video',
                                          'video-encoding',
                                          parents=video_parents,
                                          priority=task_priority)
        return images_task_id, video_task_id

    def _make_progressive_render_tasks(self, job, name_fmt, parents,
                                       cycles_num_chunks: int,
                                       cycles_chunk_start: int,
                                       cycles_chunk_end: int,
                                       frame_chunk_size: int,
                                       task_priority: int):
        """Creates the render tasks for this job.

        :param parents: either a list of parents, one for each task, or a
            single parent used for all tasks.

        :returns: created task IDs, one render task per frame chunk.
        :rtype: list
        """

        from bson import ObjectId
        from flamenco.utils import iter_frame_range, frame_range_merge

        job_settings = job['settings']

        task_ids = []
        frame_chunk_iter = iter_frame_range(job_settings['frames'],
                                            frame_chunk_size)
        for chunk_idx, chunk_frames in enumerate(frame_chunk_iter):
            frame_range = frame_range_merge(chunk_frames)
            frame_range_bstyle = frame_range_merge(chunk_frames,
                                                   blender_style=True)

            name = name_fmt % frame_range

            render_output = self._render_output(cycles_chunk_start,
                                                cycles_chunk_end)

            task_cmds = [
                commands.BlenderRenderProgressive(
                    blender_cmd=job_settings['blender_cmd'],
                    filepath=job_settings['filepath'],
                    format=job_settings.get('format'),
                    # Don't render to actual render output, but to an intermediate file.
                    render_output=str(render_output),
                    frames=frame_range_bstyle,
                    cycles_num_chunks=cycles_num_chunks,
                    cycles_chunk_start=cycles_chunk_start,
                    cycles_chunk_end=cycles_chunk_end,
                )
            ]

            if isinstance(parents, list):
                parent_task_id = parents[chunk_idx]
            else:
                parent_task_id = parents

            if not isinstance(parent_task_id, ObjectId):
                raise TypeError(
                    'parents should be list of ObjectIds or ObjectId, not %s' %
                    parents)

            task_id = self._create_task(job,
                                        task_cmds,
                                        name,
                                        'blender-render',
                                        parents=[parent_task_id],
                                        priority=task_priority)
            task_ids.append(task_id)

        return task_ids

    def _render_output(self, cycles_samples_from,
                       cycles_samples_to) -> PurePath:
        """Intermediate render output path, with ###### placeholder for the frame nr"""
        render_fname = 'render-smpl-%04i-%04i-######' % (cycles_samples_from,
                                                         cycles_samples_to)
        render_output = self.intermediate_path / render_fname
        return render_output

    def _merge_output(self, cycles_samples_to) -> PurePath:
        """Intermediate merge output path, with ###### placeholder for the frame nr"""
        merge_fname = 'merge-smpl-%04i-######' % cycles_samples_to
        merge_output = self.intermediate_path / merge_fname
        return merge_output

    def _iter_frame_chunks(self) -> typing.Iterable[typing.List[int]]:
        """Iterates over the frame chunks"""
        from flamenco.utils import iter_frame_range

        yield from iter_frame_range(self.job_settings['frames'],
                                    self.job_settings['chunk_size'])

    def _make_merge_tasks(self, job, name_fmt, cycles_chunk_idx, parents1,
                          parents2, cycles_chunks_to1, cycles_chunks_from2,
                          cycles_chunks_to2, task_priority):
        """Creates merge tasks for each chunk, consisting of merges for each frame.

        :param cycles_chunk_idx: base-1 sample chunk index

        """

        # Merging cannot happen unless we have at least two chunks
        assert cycles_chunk_idx >= 2

        weight1 = cycles_chunks_to1
        weight2 = cycles_chunks_to2 - cycles_chunks_from2 + 1

        # Replace Blender formatting with Python formatting in render output path
        if cycles_chunk_idx == 2:
            # The first merge takes a render output as input1, subsequent ones take merge outputs.
            # Merging only happens from Cycles chunk 2 (it needs two inputs, hence 2 chunks).
            input1 = self._render_output(1, cycles_chunks_to1)
        else:
            input1 = self._merge_output(cycles_chunks_to1)
        input2 = self._render_output(cycles_chunks_from2, cycles_chunks_to2)
        output = self._merge_output(cycles_chunks_to2)

        # Construct format strings from the paths.
        input1_fmt = str(input1).replace('######', '%06i.exr')
        input2_fmt = str(input2).replace('######', '%06i.exr')

        blender_cmd = job['settings']['blender_cmd']

        frame_start, frame_end = utils.frame_range_start_end(
            self.job_settings['frames'])
        assert frame_start is not None
        assert frame_end is not None

        cmds = [
            commands.MergeProgressiveRenderSequence(
                blender_cmd=blender_cmd,
                input1=input1_fmt % frame_start,
                input2=input2_fmt % frame_start,
                output=str(output),
                weight1=weight1,
                weight2=weight2,
                frame_start=frame_start,
                frame_end=frame_end,
            )
        ]
        name = name_fmt % f'{frame_start}-{frame_end}'

        task_id = self._create_task(job,
                                    cmds,
                                    name,
                                    'exr-merge',
                                    parents=parents1 + parents2,
                                    priority=task_priority)

        return [task_id]

    def _make_moow_task(self, job: dict, parents: typing.List[ObjectId],
                        task_priority: int) -> ObjectId:
        """Make the move-out-of-way task."""

        cmd = commands.MoveOutOfWay(src=str(self.render_path))
        return self._create_task(job, [cmd],
                                 'move-outdir-out-of-way',
                                 'file-management',
                                 parents=parents,
                                 priority=task_priority)

    def insert_rna_overrides_task(self, job: dict) -> ObjectId:
        """Inject a new RNA Overrides task into an existing job.

        Returns the new task ID.
        """
        return self._insert_rna_overrides_task(
            job, {'name': 'destroy-preexisting-intermediate'})
Exemplo n.º 4
0
class Auth(object):
    """Handles authorization for Flamenco."""

    _log = attrs_extra.log('%s.Auth' % __name__)
    Actions = Actions  # this allows using current_flamenco.auth.Actions

    def current_user_is_flamenco_admin(self) -> bool:
        """Returns True iff the user is a Flamenco admin or regular admin."""

        return current_user.has_cap('flamenco-admin')

    def current_user_is_flamenco_manager(self) -> bool:
        """Returns True iff the user is a Flamenco Manager."""

        from pillar.api.utils.authorization import user_matches_roles

        return user_matches_roles({'service', 'flamenco_manager'},
                                  require_all=True)

    def current_user_is_flamenco_user(self) -> bool:
        """Returns True iff the current user has Flamenco User role."""

        return current_user.has_cap('flamenco-use')

    def user_is_flamenco_user(self, user_id: bson.ObjectId) -> bool:
        """Returns True iff the user has Flamenco User role."""

        from pillar import current_app
        from pillar.auth import UserClass

        assert isinstance(user_id, bson.ObjectId)

        # TODO: move role/cap checking code to Pillar.
        users_coll = current_app.db('users')
        db_user = users_coll.find_one({'_id': user_id}, {'roles': 1})
        if not db_user:
            self._log.debug('user_is_flamenco_user: User %s not found',
                            user_id)
            return False

        user = UserClass.construct('', db_user)
        return user.has_cap('flamenco-use')

    def current_user_may(self, action: Actions,
                         project_id: bson.ObjectId) -> bool:
        """Returns True iff the user is authorised to use/view Flamenco on the given project.

        This is linked to the Managers assigned to this project. As a result, you cannot
        use Flamenco until one or more Managers is assigned.
        """

        from pillar.api.projects.utils import user_rights_in_project
        import pillar.auth
        from flamenco import current_flamenco

        # Get the actual user object to prevent multiple passes through the LocalProxy.
        user: pillar.auth.UserClass = current_user._get_current_object()
        if user.is_anonymous:
            self._log.debug('Anonymous user never has access to Flamenco.')
            return False

        cap = req_cap[action]
        if not user.has_cap(cap):
            self._log.info(
                'User %s does not have capability %r required for action %s; '
                'denying access to Flamenco', user.user_id, cap, action)
            return False

        # TODO Sybren: possibly split this up into a manager-fetching func + authorisation func.
        # TODO: possibly store the user rights on the current project in the current_user object?
        allowed_on_proj = user_rights_in_project(project_id)
        if not allowed_on_proj.intersection(PROJECT_METHODS_TO_USE_FLAMENCO):
            self._log.info('User %s has no %s access to project %s.',
                           user.user_id, PROJECT_METHODS_TO_USE_FLAMENCO,
                           project_id)
            return False

        if user.has_cap('flamenco-admin'):
            self._log.debug(
                'User is flamenco-admin, so has access to all Managers')
            return True

        managers_coll = current_flamenco.db('managers')
        managers_count = managers_coll.count_documents(
            {'projects': project_id})

        return managers_count > 0
Exemplo n.º 5
0
class AbstractBlenderJobCompiler(AbstractJobCompiler, metaclass=abc.ABCMeta):
    """Blender Render job compiler with support for RNA Overrides."""
    _log = attrs_extra.log('%s.AbstractBlenderJobCompiler' % __name__)

    def _make_rna_overrides_task(self,
                                 job: dict,
                                 parent_task_id: typing.Optional[bson.ObjectId] = None) \
            -> typing.Optional[bson.ObjectId]:
        """Create a task that writes a Python file with RNA overrides."""

        rna_overrides = job['settings'].get('rna_overrides')
        if not rna_overrides:
            return None

        parent_task_ids = [parent_task_id] if parent_task_id else None
        cmd = rna_overrides_command(job)
        task_id = self._create_task(job, [cmd],
                                    RNA_OVERRIDES_TASK_NAME,
                                    'file-management',
                                    parents=parent_task_ids)
        return task_id

    @abc.abstractmethod
    def insert_rna_overrides_task(self, job: dict) -> bson.ObjectId:
        """Inject a new RNA Overrides task into an existing job.

        Implement in a subclass. Can use _insert_rna_overrides_task() to do
        the heavy lifting.

        Returns the new task ID.
        """

    def _insert_rna_overrides_task(
            self, job: dict, parent_task_selector: dict) -> bson.ObjectId:
        # Find the task that is supposed to be the parent of the new task.
        tasks_coll = current_flamenco.db('tasks')
        if parent_task_selector:
            parent_task = tasks_coll.find_one(
                {
                    'job': job['_id'],
                    **parent_task_selector
                },
                projection={'_id': True})
            if not parent_task:
                raise ValueError(
                    'unable to find move-out-of-way task, cannot update this job'
                )

            parents_kwargs = {'parents': [parent_task['_id']]}
        else:
            parents_kwargs = {}

        # Construct the new task.
        cmd = rna_overrides_command(job)
        task_id = self._create_task(job, [cmd],
                                    RNA_OVERRIDES_TASK_NAME,
                                    'file-management',
                                    priority=80,
                                    status='queued',
                                    **parents_kwargs)
        self._log.info('Inserted RNA Overrides task %s into job %s', task_id,
                       job['_id'])

        # Update existing render tasks to have the new task as parent.
        new_etag = random_etag()
        now = utcnow()
        result = tasks_coll.update_many(
            {
                'job': job['_id'],
                'task_type': 'blender-render',
                **parents_kwargs,
            }, {
                '$set': {
                    '_etag': new_etag,
                    '_updated': now,
                    'parents': [task_id],
                }
            })
        self._log.debug('Updated %d task parent pointers to %s',
                        result.modified_count, task_id)
        return task_id

    def update_rna_overrides_task(self, job: dict):
        """Update or create an RNA Overrides task of an existing job."""
        tasks_coll = current_flamenco.db('tasks')
        task = tasks_coll.find_one(
            {
                'job': job['_id'],
                'name': RNA_OVERRIDES_TASK_NAME
            },
            projection={'_id': True})
        if not task:
            self.insert_rna_overrides_task(job)
            return

        cmd = rna_overrides_command(job)
        new_etag = random_etag()
        now = utcnow()
        result = tasks_coll.update_one(
            task, {
                '$set': {
                    '_etag': new_etag,
                    '_updated': now,
                    'status': 'queued',
                    'commands': [cmd.to_dict()],
                }
            })

        self._log.info('Modified %d RNA override task (%s) of job %s',
                       result.modified_count, task['_id'], job['_id'])
Exemplo n.º 6
0
class NodeMover(object):
    db = attr.ib(
        validator=attr.validators.instance_of(pymongo.database.Database))
    skip_gcs = attr.ib(default=False,
                       validator=attr.validators.instance_of(bool))
    _log = attrs_extra.log('%s.NodeMover' % __name__)

    def change_project(self, node, dest_proj):
        """Moves a node and children to a new project."""

        assert isinstance(node, dict)
        assert isinstance(dest_proj, dict)

        for move_node in self._children(node):
            self._change_project(move_node, dest_proj)

    def _change_project(self, node, dest_proj):
        """Changes the project of a single node, non-recursively."""

        node_id = node['_id']
        proj_id = dest_proj['_id']
        self._log.info('Moving node %s to project %s', node_id, proj_id)

        # Find all files in the node.
        moved_files = set()
        self._move_files(moved_files, dest_proj,
                         self._files(node.get('picture', None)))
        self._move_files(moved_files, dest_proj,
                         self._files(node['properties'], 'file'))
        self._move_files(moved_files, dest_proj,
                         self._files(node['properties'], 'files', 'file'))
        self._move_files(
            moved_files, dest_proj,
            self._files(node['properties'], 'attachments', 'files', 'file'))

        # Switch the node's project after its files have been moved.
        self._log.info('Switching node %s to project %s', node_id, proj_id)
        nodes_coll = self.db['nodes']
        update_result = nodes_coll.update_one({'_id': node_id},
                                              {'$set': {
                                                  'project': proj_id
                                              }})
        if update_result.matched_count != 1:
            raise RuntimeError(
                'Unable to update node %s in MongoDB: matched_count=%i; modified_count=%i'
                % (node_id, update_result.matched_count,
                   update_result.modified_count))

    def _move_files(self, moved_files, dest_proj, file_generator):
        """Tries to find all files from the given properties."""

        for file_id in file_generator:
            if file_id in moved_files:
                continue
            moved_files.add(file_id)
            self.move_file(dest_proj, file_id)

    def move_file(self, dest_proj, file_id):
        """Moves a single file to another project"""

        self._log.info('Moving file %s to project %s', file_id,
                       dest_proj['_id'])
        pillar.api.file_storage.moving.move_to_bucket(
            file_id, dest_proj['_id'], skip_storage=self.skip_gcs)

    def _files(self, file_ref, *properties):
        """Yields file ObjectIDs."""

        # Degenerate cases.
        if not file_ref:
            return

        # Single ObjectID
        if isinstance(file_ref, ObjectId):
            assert not properties
            yield file_ref
            return

        # List of ObjectIDs
        if isinstance(file_ref, list):
            for item in file_ref:
                for subitem in self._files(item, *properties):
                    yield subitem
            return

        # Dict, use properties[0] as key
        if isinstance(file_ref, dict):
            try:
                subref = file_ref[properties[0]]
            except KeyError:
                # Silently skip non-existing keys.
                return

            for subitem in self._files(subref, *properties[1:]):
                yield subitem
            return

        raise TypeError('File ref is of type %s, not implemented' %
                        type(file_ref))

    def _children(self, node):
        """Generator, recursively yields the node and its children."""

        yield node

        nodes_coll = self.db['nodes']
        for child in nodes_coll.find({'parent': node['_id']}):
            # "yield from self.children(child)" was introduced in Python 3.3
            for grandchild in self._children(child):
                yield grandchild
Exemplo n.º 7
0
class ManagerManager(object):
    """Manager manager.

    Performs actions on a Flamenco Manager. Does *NOT* test user permissions -- the caller
    is responsible for that.
    """

    _log = attrs_extra.log('%s.ManagerManager' % __name__)
    ShareAction = ShareAction  # so you can use current_flamenco.manager_manager.ShareAction

    def collection(self) -> pymongo.collection.Collection:
        """Returns the Mongo database collection."""
        from flamenco import current_flamenco

        return current_flamenco.db('managers')

    def create_new_manager(self, name: str, description: str, owner_id: bson.ObjectId) \
            -> typing.Tuple[dict, dict, dict]:
        """Creates a new Manager, including its system account."""

        assert isinstance(owner_id, bson.ObjectId)

        from pillar.api import service
        from pillar.api.users import add_user_to_group

        # Create the service account and the Manager.
        account, token_data = service.create_service_account(
            '', ['flamenco_manager'], {'flamenco_manager': {}})
        mngr_doc = self.create_manager_doc(account['_id'], name, description)

        # Assign the owner to the owner group.
        add_user_to_group(owner_id, mngr_doc['owner'])

        return account, mngr_doc, token_data

    def create_manager_doc(self,
                           service_account_id,
                           name,
                           description,
                           url=None):
        """Creates a new Flamenco manager and its owner group.

        Returns the MongoDB document.
        """

        from pillar.api.utils import str2id
        import bson

        # Determine the Object IDs beforehand, so that the manager can refer to the
        # group (by actual ID) and the group can mention the manager ID in the name.
        manager_id = bson.ObjectId()
        group_id = bson.ObjectId()

        # Create an owner group for this manager.
        group_doc = {
            '_id': group_id,
            'name': f'Owners of Flamenco Manager {manager_id}'
        }
        r, _, _, status = current_app.post_internal('groups', group_doc)
        if status != 201:
            self._log.error(
                'Error creating manager owner group; status should be 201, not %i: %s',
                status, r)
            raise ValueError(
                f'Unable to create Flamenco manager, status code {status}')

        # Create the manager.
        mngr_doc = {
            '_id': manager_id,
            'name': name,
            'description': description,
            'job_types': {
                'sleep': {
                    'vars': {}
                }
            },
            'service_account': str2id(service_account_id),
            'owner': group_id,
        }
        if url:
            mngr_doc['url'] = url
            self._log.info('Creating manager %r at %s', name, url)
        else:
            self._log.info('Creating manager %r', name)

        r, _, _, status = current_app.post_internal('flamenco_managers',
                                                    mngr_doc)
        if status != 201:
            self._log.error('Status should be 201, not %i: %s' % (status, r))
            raise ValueError(
                'Unable to create Flamenco manager, status code %i' % status)

        mngr_doc.update(r)
        return mngr_doc

    def user_is_manager(self) -> bool:
        """Returns True iff the current user is a Flamenco manager service account."""

        from pillar.api.utils.authorization import user_matches_roles

        return user_matches_roles(
            require_roles={'service', 'flamenco_manager'}, require_all=True)

    def _get_manager(
            self,
            mngr_doc_id: bson.ObjectId = None,
            mngr_doc: dict = None,
            projection: dict = None) -> typing.Tuple[bson.ObjectId, dict]:

        assert (mngr_doc_id is None) != (mngr_doc is None), \
            'Either one or the other parameter must be given.'

        if mngr_doc is None:
            mngr_coll = current_flamenco.db('managers')
            mngr_doc = mngr_coll.find_one({'_id': mngr_doc_id}, projection)
            if not mngr_doc:
                self._log.warning(
                    'user_manages(%s): no such document (user=%s)',
                    mngr_doc_id, current_user.user_id)
                raise ValueError(f'Manager {mngr_doc_id} does not exist.')
        else:
            mngr_doc_id = mngr_doc['_id']

        return mngr_doc_id, mngr_doc

    def user_is_owner(self,
                      *,
                      mngr_doc_id: bson.ObjectId = None,
                      mngr_doc: dict = None) -> bool:
        """Returns True iff the current user is an owner of the given Flamenco Manager."""

        user_id = current_user.user_id
        if current_user.has_cap('flamenco-admin'):
            self._log.debug(
                'user_is_owner(...): user %s has flamenco-admin cap, '
                'so considered owner', user_id)
            return True

        if not current_user.has_cap('flamenco-view'):
            self._log.debug(
                'user_is_owner(...): user %s does not have flamenco-view cap',
                user_id)
            return False

        if not current_user.has_cap('flamenco-use'):
            self._log.debug(
                'user_is_owner(...): user %s does not have flamenco-use cap',
                user_id)
            return False

        mngr_doc_id, mngr_doc = self._get_manager(mngr_doc_id, mngr_doc,
                                                  {'owner': 1})

        owner_group = mngr_doc.get('owner')
        if not owner_group:
            self._log.warning('user_is_owner(%s): Manager has no owner!',
                              mngr_doc_id)
            return False

        user_groups = current_user.get('groups', set())
        return owner_group in user_groups

    def user_manages(self,
                     *,
                     mngr_doc_id: bson.ObjectId = None,
                     mngr_doc: dict = None) -> bool:
        """
        Returns True iff the current user is the Flamenco manager service account for this doc.
        """

        if not self.user_is_manager():
            # User is not a Flamenco manager service account.
            return False

        mngr_doc_id, mngr_doc = self._get_manager(mngr_doc_id, mngr_doc,
                                                  {'service_account': 1})

        service_account = mngr_doc.get('service_account')
        user_id = current_user.user_id
        if service_account != user_id:
            self._log.debug(
                'user_manages(%s): current user %s is not manager %s',
                mngr_doc_id, user_id, service_account)
            return False

        return True

    def user_may_use(self,
                     *,
                     mngr_doc_id: bson.ObjectId = None,
                     mngr_doc: dict = None) -> bool:
        """Returns True iff this user may use this Flamenco Manager.

        Usage implies things like requeuing tasks and jobs, creating new jobs, etc.
        """
        from flamenco import current_flamenco

        # Flamenco Admins always have access.
        if current_flamenco.auth.current_user_is_flamenco_admin():
            return True

        mngr_doc_id, mngr_doc = self._get_manager(mngr_doc_id, mngr_doc, {
            'owner': 1,
            'user_groups': 1
        })

        user_groups = set(current_user.group_ids)
        owner_group = mngr_doc.get('owner')
        if owner_group and owner_group in user_groups:
            return True

        if not current_user.has_cap('flamenco-use'):
            return False

        manager_groups = set(mngr_doc.get('user_groups', []))
        return bool(user_groups.intersection(manager_groups))

    def api_assign_to_project(self, manager_id: bson.ObjectId,
                              project_id: bson.ObjectId, action: str) -> bool:
        """Assigns the manager to the given project.

        Does NOT check whether the project actually exists or not.

        :param action: either 'assign' or 'remove'
        :returns: True iff the action was successful.
        """

        from collections import defaultdict
        from pymongo.results import UpdateResult
        from flamenco import current_flamenco
        from pillar.api.projects import utils as project_utils

        if action not in {'assign', 'remove'}:
            raise ValueError("Action must be either 'assign' or 'remove'")

        assert isinstance(manager_id, bson.ObjectId)
        assert isinstance(project_id, bson.ObjectId)

        mngr_coll = current_flamenco.db('managers')
        manager_doc = mngr_coll.find_one({'_id': manager_id}, {
            'projects': 1,
            'user_groups': 1
        })

        if not manager_doc:
            self._log.warning(
                'api_assign_to_project(%s, %s): no manager with id=%s (user=%s)',
                manager_id, project_id, manager_id, current_user.user_id)
            return False

        mngr_projects = set(manager_doc.get('projects', []))
        mngr_user_groups = set(manager_doc.get('user_groups', []))

        admin_group_id = project_utils.get_admin_group_id(project_id)

        if action == 'assign':
            mngr_projects.add(project_id)
            mngr_user_groups.add(admin_group_id)
        else:
            mngr_projects.discard(project_id)
            mngr_user_groups.discard(admin_group_id)

        # Convert to list because JSON/BSON doesn't do sets, and sort to get predictable output.
        projects = sorted(mngr_projects)
        user_groups = sorted(mngr_user_groups)

        if self._log.isEnabledFor(logging.INFO):
            self._log.info(
                'Updating Manager %s projects to [%s] and user_groups to [%s]',
                manager_id,
                ', '.join(f"'{pid}'" for pid in projects),
                ', '.join(f"'{gid}'" for gid in user_groups),
            )

        update = defaultdict(dict)  # type: typing.DefaultDict[str, typing.Any]
        if projects:
            update['$set']['projects'] = projects
        else:
            update['$unset']['projects'] = 1

        if user_groups:
            update['$set']['user_groups'] = user_groups
        else:
            update['$unset']['user_groups'] = 1

        res: UpdateResult = mngr_coll.update_one({'_id': manager_id}, update)

        if res.matched_count < 1:
            self._log.error(
                'Unable to update projects on Manager %s to %s: %s',
                manager_id, ', '.join(f"'{pid}'" for pid in projects), res)
            return False
        return True

    def find_service_account_id(self,
                                manager_id: bson.ObjectId) -> bson.ObjectId:
        _, manager = self._get_manager(mngr_doc_id=manager_id,
                                       projection={'service_account': 1})
        users_coll = current_app.db('users')
        service_account_id = manager['service_account']
        service_account = users_coll.find_one({
            '_id': service_account_id,
            'service.flamenco_manager': {
                '$exists': True
            }
        })
        if not service_account:
            self._log.error('Unable to find service account %s for manager %s',
                            service_account_id, manager_id)
            raise wz_exceptions.NotFound()
        return service_account_id

    def hasher(self, manager_id: bson.ObjectId) -> typing.Optional[hmac.HMAC]:
        """Return an HMAC hasher for this Manager."""

        service_account_id = self.find_service_account_id(manager_id)
        tokens_coll = current_app.db('tokens')
        token_dict = tokens_coll.find_one({'user': service_account_id})
        if token_dict is None:
            return None
        secret = token_dict['token']
        hasher = hmac.new(secret.encode('utf8'), digestmod=hashlib.sha256)
        return hasher

    def revoke_auth_token(self, manager_id: bson.ObjectId) -> bson.ObjectId:
        """Deletes all existing authentication tokens of the Manager.

        Returns the service account ID.
        """

        self._log.info(
            'Revoking authentication tokens for Manager %s on behalf of user %s',
            manager_id, current_user.user_id)
        service_account_id = self.find_service_account_id(manager_id)

        tokens_coll = current_app.db('tokens')
        result: pymongo.results.DeleteResult = tokens_coll.delete_many(
            {'user': service_account_id})

        self._log.debug('Deleted %i authentication tokens of Manager %s',
                        result.deleted_count, manager_id)

        return service_account_id

    def gen_new_auth_token(
            self, manager_id: bson.ObjectId) -> typing.Optional[AuthTokenInfo]:
        """Generates a new authentication token for the given Manager.

        Deletes all pre-existing authentication tokens of the Manager.
        """

        from pillar.api import service

        service_account_id = self.revoke_auth_token(manager_id)

        self._log.info(
            'Generating new authentication token for Manager %s on behalf of user %s',
            manager_id, current_user.user_id)
        token_info = service.generate_auth_token(service_account_id)
        return AuthTokenInfo(
            token=token_info['token'],
            expire_time=token_info['expire_time'],
        )

    def share_unshare_manager(self, manager_id: bson.ObjectId,
                              share_action: ShareAction,
                              subject_uid: bson.ObjectId):
        self._log.info(
            '%s Manager %s on behalf of user %s, subject user is %s',
            share_action, manager_id, current_user.user_id, subject_uid)

        from pillar.api import users

        _, manager = self._get_manager(mngr_doc_id=manager_id)
        owner_gid = manager['owner']

        # Check that there is at least one user left in the group.
        users_coll = current_app.db('users')
        owners = users_coll.find({'groups': owner_gid})
        if share_action == ShareAction.unshare and owners.count() < 2:
            self._log.warning('User %s tried to make Manager %s ownerless',
                              current_user.user_id, manager_id)
            raise ValueError('Manager cannot become ownerless.')

        group_action = {
            ShareAction.share: '$addToSet',
            ShareAction.unshare: '$pull',
        }[share_action]

        users.user_group_action(subject_uid, owner_gid, group_action)

    def owning_users(self, owner_gid: bson.ObjectId) -> typing.List[dict]:
        assert isinstance(owner_gid, bson.ObjectId)

        users_coll = current_app.db('users')
        users = users_coll.find({'groups': owner_gid})
        return list(users)

    def managers_for_project(
            self, project_id: bson.ObjectId) -> typing.List[bson.ObjectId]:
        """Returns a list of Manager object IDs assigned to the given project."""

        assert isinstance(project_id, bson.ObjectId)

        managers_coll = current_flamenco.db('managers')
        managers = managers_coll.find({'projects': project_id}, {'_id': 1})
        return [m['_id'] for m in managers]

    def owned_managers(
            self,
            user_group_ids: typing.List[bson.ObjectId],
            projection: typing.Optional[dict] = None) -> pymongo.cursor.Cursor:
        """Returns a Mongo cursor of Manager object IDs owned by the given user.

        :param user_group_ids: list of the group IDs of the user.
        :param projection: When not None, it is used instead of the default {'_id': 1}.
        """

        if projection is None:
            projection = {'_id': 1}

        managers_coll = current_flamenco.db('managers')
        managers = managers_coll.find({'owner': {
            '$in': user_group_ids
        }}, projection)
        return managers

    def queue_task_log_request(self, manager_id: bson.ObjectId,
                               job_id: bson.ObjectId, task_id: bson.ObjectId):
        """Queue a request to the Manager to upload this task's log file."""

        self._log.info(
            'Queueing task log file request for Manager %s, job %s task %s',
            manager_id, job_id, task_id)
        self._task_log_request(
            manager_id, {
                '$addToSet': {
                    'upload_task_file_queue': {
                        'job': job_id,
                        'task': task_id
                    }
                }
            })

    def dequeue_task_log_request(self, manager_id: bson.ObjectId,
                                 task_id: bson.ObjectId):
        """De-queue a request to the Manager to upload this task's log file.

        This is what's called when the Manager has actually uploaded this task's file.
        """

        self._log.info(
            'De-queueing task log file request for Manager %s, task %s',
            manager_id, task_id)

        self._task_log_request(
            manager_id,
            {'$pull': {
                'upload_task_file_queue': {
                    'task': task_id
                }
            }})

    def _task_log_request(self, manager_id: bson.ObjectId, operation: dict):
        managers_coll = current_flamenco.db('managers')
        managers_coll.update_one({'_id': manager_id}, {
            **operation,
            '$set': {
                '_updated': utcnow(),
                '_etag': random_etag(),
            },
        })
Exemplo n.º 8
0
class JobManager(object):
    _log = attrs_extra.log('%s.JobManager' % __name__)

    def api_create_job(self, job_name, job_desc, job_type, job_settings,
                       project_id, user_id, manager_id, priority=50):
        """Creates a job, returning a dict with its generated fields."""

        job = {
            'name': job_name,
            'description': job_desc,
            'job_type': job_type,
            'project': project_id,
            'user': user_id,
            'manager': manager_id,
            'status': 'under-construction',
            'priority': int(priority),
            'settings': copy.deepcopy(job_settings),
        }

        self._log.info('Creating job %r for user %s and manager %s',
                       job_name, user_id, manager_id)

        r, _, _, status = current_app.post_internal('flamenco_jobs', job)
        if status != 201:
            self._log.error('Status should be 201, not %i: %s' % (status, r))
            raise ValueError('Unable to create Flamenco job, status code %i' % status)

        job.update(r)
        return job

    def jobs_for_project(self, project_id, *, archived=False):
        """Returns the jobs for the given project.

        :returns: {'_items': [job, job, ...], '_meta': {Eve metadata}}
        """
        from .sdk import Job

        # Eve doesn't support '$eq' :(
        status_q = 'archived' if archived else {'$ne': 'archived'}
        where = {'project': project_id,
                 'status': status_q}

        api = pillar_api()
        try:
            j = Job.all({
                'where': where,
                'sort': [('_updated', -1), ('_created', -1)],
            }, api=api)
        except pillarsdk.ResourceNotFound:
            return {'_items': [], '_meta': {'total': 0}}
        return j

    def job_status_summary(self, project_id):
        """Returns number of shots per shot status for the given project.

        :rtype: ProjectSummary
        """
        from .sdk import Job

        api = pillar_api()

        # TODO: turn this into an aggregation call to do the counting on
        # MongoDB.
        try:
            jobs = Job.all({
                'where': {
                    'project': project_id,
                },
                'projection': {
                    'status': 1,
                },
                'order': [
                    ('status', 1),
                ],
            }, api=api)
        except pillarsdk.ResourceNotFound:
            return ProjectSummary()

        # FIXME: this breaks when we hit the pagination limit.
        summary = ProjectSummary()
        for job in jobs['_items']:
            summary.count(job['status'])

        return summary

    def update_job_after_task_status_change(self, job_id, task_id, new_task_status):
        """Updates the job status based on the status of this task and other tasks in the job.
        """

        jobs_coll = current_flamenco.db('jobs')
        tasks_coll = current_flamenco.db('tasks')

        def __job_status_if_a_then_b(if_status: str, then_new_status: str):
            """Set job to active if it was queued."""

            job = jobs_coll.find_one(job_id, projection={'status': 1})
            if job['status'] == if_status:
                self._log.info('Job %s became %s because one of its tasks %s changed '
                               'status to %s', job_id, then_new_status, task_id, new_task_status)
                self.api_set_job_status(job_id, then_new_status)

        if new_task_status == 'queued':
            # Re-queueing a task on a completed job should re-queue the job too.
            __job_status_if_a_then_b('completed', 'queued')
            return

        if new_task_status in {'queued', 'cancel-requested', 'claimed-by-manager'}:
            # Also, canceling a single task has no influence on the job itself.
            # A task being claimed by the manager also doesn't change job status.
            return

        if new_task_status == 'canceled':
            # This could be the last cancel-requested task to go to 'canceled.
            statuses = tasks_coll.distinct('status', {'job': job_id})
            if 'cancel-requested' not in statuses:
                self._log.info('Last task %s of job %s went from cancel-requested to canceld.',
                               task_id, job_id)
                self.api_set_job_status(job_id, 'canceled')
            return

        if new_task_status == 'failed':
            # Count the number of failed tasks. If it is more than 10%, fail the job.
            total_count = tasks_coll.find({'job': job_id}).count()
            fail_count = tasks_coll.find({'job': job_id, 'status': 'failed'}).count()
            fail_perc = fail_count / float(total_count) * 100
            if fail_perc >= TASK_FAIL_JOB_PERCENTAGE:
                self._log.warning('Failing job %s because %i of its %i tasks (%i%%) failed',
                                  job_id, fail_count, total_count, fail_perc)
                self.api_set_job_status(job_id, 'failed')
            else:
                self._log.warning('Task %s of job %s failed; '
                                  'only %i of its %i tasks failed (%i%%), so ignoring for now',
                                  task_id, job_id, fail_count, total_count, fail_perc)
                __job_status_if_a_then_b('queued', 'active')
            return

        if new_task_status in {'active', 'processing'}:
            job = jobs_coll.find_one(job_id, projection={'status': 1})
            if job['status'] != 'active':
                self._log.info('Job %s became active because one of its tasks %s changed '
                               'status to %s', job_id, task_id, new_task_status)
                self.api_set_job_status(job_id, 'active')
            return

        if new_task_status == 'completed':
            # Maybe all tasks are completed, which should complete the job.
            statuses = tasks_coll.distinct('status', {'job': job_id})
            if statuses == ['completed']:
                self._log.info('All tasks (last one was %s) of job %s are completed, '
                               'setting job to completed.',
                               task_id, job_id)
                self.api_set_job_status(job_id, 'completed')
            else:
                __job_status_if_a_then_b('queued', 'active')
            return

        self._log.warning('Task %s of job %s obtained status %s, '
                          'which we do not know how to handle.',
                          task_id, job_id, new_task_status)

    def web_set_job_status(self, job_id, new_status):
        """Web-level call to updates the job status."""
        from .sdk import Job

        api = pillar_api()
        job = Job({'_id': job_id})
        job.patch({'op': 'set-job-status',
                   'status': new_status}, api=api)

    def api_set_job_status(self, job_id, new_status,
                           *, now: datetime.datetime = None) -> pymongo.results.UpdateResult:
        """API-level call to updates the job status."""

        self._log.info('Setting job %s status to "%s"', job_id, new_status)

        jobs_coll = current_flamenco.db('jobs')
        curr_job = jobs_coll.find_one({'_id': job_id}, projection={'status': 1})
        old_status = curr_job['status']

        result = current_flamenco.update_status('jobs', job_id, new_status, now=now)
        self.handle_job_status_change(job_id, old_status, new_status)

        return result

    def handle_job_status_change(self, job_id, old_status, new_status):
        """Updates task statuses based on this job status transition."""

        query = None
        to_status = None
        if new_status in {'completed', 'canceled'}:
            # Nothing to do; this will happen as a response to all tasks receiving this status.
            return
        elif new_status == 'active':
            # Nothing to do; this happens when a task gets started, which has nothing to
            # do with other tasks in the job.
            return
        elif new_status in {'cancel-requested', 'failed'}:
            # Directly cancel any task that might run in the future, but is not touched by
            # a manager yet.
            current_flamenco.update_status_q(
                'tasks',
                {'job': job_id, 'status': 'queued'},
                'canceled')
            # Request cancel of any task that might run on the manager.
            cancelreq_result = current_flamenco.update_status_q(
                'tasks',
                {'job': job_id, 'status': {'$in': ['active', 'claimed-by-manager']}},
                'cancel-requested')

            # Update the activity of all the tasks we just cancelled (or requested cancellation),
            # so that users can tell why they were cancelled.
            current_flamenco.task_manager.api_set_activity(
                {'job': job_id,
                 'status': {'$in': ['cancel-requested', 'canceled']},
                 'activity': {'$exists': False}},
                'Server cancelled this task because the job got status %r.' % new_status
            )

            # If the new status is cancel-requested, and no tasks were marked as cancel-requested,
            # we can directly transition the job to 'canceled', without waiting for more task
            # updates.
            if new_status == 'cancel-requested' and cancelreq_result.modified_count == 0:
                self._log.info('handle_job_status_change(%s, %s, %s): no cancel-requested tasks, '
                               'so transitioning directly to canceled',
                               job_id, old_status, new_status)
                self.api_set_job_status(job_id, 'canceled')
            return
        elif new_status == 'queued':
            if old_status == 'under-construction':
                # Nothing to do, the job compiler has just finished its work; the tasks have
                # already been set to 'queued' status.
                self._log.debug('Ignoring job status change %r -> %r', old_status, new_status)
                return

            if old_status == 'completed':
                # Re-queue all tasks except cancel-requested; those should remain
                # untouched; changing their status is only allowed by managers, to avoid
                # race conditions.
                query = {'status': {'$ne': 'cancel-requested'}}
            else:
                # Re-queue any non-completed task. Cancel-requested tasks should also be
                # untouched; changing their status is only allowed by managers, to avoid
                # race conditions.
                query = {'status': {'$nin': ['completed', 'cancel-requested']}}
            to_status = 'queued'

        if query is None:
            self._log.debug('Job %s status change from %s to %s has no effect on tasks.',
                            job_id, old_status, new_status)
            return
        if to_status is None:
            self._log.error('Job %s status change from %s to %s has to_status=None, aborting.',
                            job_id, old_status, new_status)
            return

        # Update the tasks.
        query['job'] = job_id

        current_flamenco.update_status_q('tasks', query, to_status)

    def archive_job(self, job: dict):
        """Initiates job archival by creating a Celery task for it."""

        from flamenco.celery import job_archival

        job_id = job['_id']
        job_status = job['status']

        if job_status in ARCHIVE_JOB_STATES:
            msg = f'Job {job_id} cannot be archived, it has status {job_status}'
            self._log.info(msg)
            raise wz_exceptions.UnprocessableEntity(msg)

        # Store current job status in a special key so that it can be restored before
        # writing to the archive ZIP file as JSON.
        jobs_coll = current_flamenco.db('jobs')
        jobs_coll.update_one({'_id': job_id},
                             {'$set': {'pre_archive_status': job_status}})

        # Immediately set job status to 'archiving', as this should be reflected ASAP in the
        # database + web interface, rather than waiting for a Celery Worker to pick it up.
        self.api_set_job_status(job_id, 'archiving')

        self._log.info('Creating Celery background task for archival of job %s', job_id)
        job_archival.archive_job.delay(str(job_id))
Exemplo n.º 9
0
class API:
    # The remote URL and credentials are separate. This way we can log the
    # URL that is used in requests without worrying about leaking creds.
    remote_url: str = attr.ib(validator=attr.validators.instance_of(str))
    """URL of the remote SVNMan API.
    
    Should probably end in '/api/'.
    """

    username: str = attr.ib(validator=attr.validators.instance_of(str))
    """Username for authenticating ourselves with the API."""
    password: str = attr.ib(validator=attr.validators.instance_of(str),
                            repr=False)
    """Password for authenticating ourselves with the API."""

    _log = attrs_extra.log('%s.Remote' % __name__)
    _session = requests.Session()

    def __attrs_post_init__(self):
        from requests.adapters import HTTPAdapter
        self._session.mount('/', HTTPAdapter(max_retries=10))

    def _request(self, method: str, rel_url: str,
                 **kwargs) -> requests.Response:
        """Performs a HTTP request on the API server."""

        from urllib.parse import urljoin

        abs_url = urljoin(self.remote_url, rel_url)
        self._log.getChild('request').info('%s %s', method, abs_url)

        auth = (self.username,
                self.password) if self.username or self.password else None
        return self._session.request(method, abs_url, auth=auth, **kwargs)

    def _raise_for_status(self, resp: requests.Response):
        """Raises the appropriate exception for the given response."""

        if resp.status_code < 400:
            return

        exc_class = exceptions.http_error_map[resp.status_code]
        raise exc_class(resp.text)

    def fetch_repo(self, repo_id: str) -> RepoDescription:
        """Fetches repository information from the remote."""

        resp = self._request('GET', f'repo/{repo_id}')
        self._raise_for_status(resp)

        return RepoDescription(**resp.json())

    def create_repo(self, create_repo: CreateRepo) -> str:
        """Creates a new repository with the given ID.

        Note that the repository ID may be changed by the SVNMan;
        always use the repo ID as returned by this function.

        :param create_repo: info required by the API
        :raises svnman.exceptions.RepoAlreadyExists:
        :returns: the repository ID as returned by the SVNMan.
        """

        self._log.info('Creating repository %r', create_repo)
        resp = self._request('POST', 'repo', json=attr.asdict(create_repo))
        if resp.status_code == requests.codes.conflict:
            raise exceptions.RepoAlreadyExists(create_repo.repo_id)
        self._raise_for_status(resp)

        repo_info = resp.json()
        return repo_info['repo_id']

    def modify_access(self, repo_id: str,
                      grant: typing.List[typing.Tuple[str, str]],
                      revoke: typing.List[str]):
        """Modifies user access to the repository.

        Does not return anything; no exception means exection was ok.

        :param repo_id: the repository ID
        :param grant: list of (username password) tuples. The passwords should be BCrypt-hashed.
        :param revoke: list of usernames.
        """

        # Replace the hash type indicator, as Apache only gets BCrypt
        # when using the 2y marker.
        def changehash(p):
            if p[:4] in HASH_TYPES_TO_REPLACE:
                return f'$2y${p[4:]}'
            return p

        grants = [{'username': u, 'password': changehash(p)} for u, p in grant]

        self._log.info(
            'Modifying access rules for repository %r: grants=%s revokes=%s',
            repo_id, [u for u, p in grant], revoke)

        resp = self._request('POST',
                             f'repo/{repo_id}/access',
                             json={
                                 'grant': grants,
                                 'revoke': revoke,
                             })
        self._raise_for_status(resp)

    def delete_repo(self, repo_id: str):
        """Deletes a repository, cannot be undone through the API."""

        self._log.info('Deleting repository %r', repo_id)
        resp = self._request('DELETE', f'repo/{repo_id}')
        self._raise_for_status(resp)
Exemplo n.º 10
0
class BlenderRenderProgressive(AbstractJobCompiler):
    """Progressive Blender render job.

    Creates a render task for each Cycles sample chunk, and creates merge
    tasks to merge those render outputs into progressively refining output.

    Intermediary files are created in a subdirectory of the render output path.
    """

    _log = attrs_extra.log('%s.BlenderRenderProgressive' % __name__)

    REQUIRED_SETTINGS = ('blender_cmd', 'filepath', 'render_output', 'frames',
                         'chunk_size', 'format', 'cycles_sample_count',
                         'cycles_num_chunks')

    def _compile(self, job):
        import math

        self._log.info('Compiling job %s', job['_id'])
        self.validate_job_settings(job)
        self.job_settings = job['settings']

        # The render output contains a filename pattern, most likely '######' or
        # something similar. This has to be removed, so that we end up with
        # the directory that will contain the frames.
        self.render_output = PurePath(job['settings']['render_output'])
        self.render_path = self.render_output.parent
        self.intermediate_path = self.render_path.with_name(
            self.render_path.name + '__intermediate')

        destroy_interm_task_id = self._make_destroy_intermediate_task(job)
        task_count = 1

        cycles_sample_count = int(self.job_settings['cycles_sample_count'])
        self.cycles_num_chunks = int(self.job_settings['cycles_num_chunks'])
        sample_count_per_chunk = int(
            math.ceil(float(cycles_sample_count) / self.cycles_num_chunks))

        next_merge_task_deps = []
        prev_samples_to = 0
        for cycles_chunk_idx in range(
                int(self.job_settings['cycles_num_chunks'])):
            # Compute the Cycles sample range for this chunk (chunk_idx in base-0), in base-1.
            cycles_samples_from = cycles_chunk_idx * sample_count_per_chunk + 1
            cycles_samples_to = min(
                (cycles_chunk_idx + 1) * sample_count_per_chunk,
                cycles_sample_count)

            # Create render tasks for each frame chunk. Only this function uses the base-0
            # chunk/sample numbers, so we also convert to the base-1 numbers that Blender
            # uses.
            render_task_ids = self._make_progressive_render_tasks(
                job,
                'render-smpl%i-%i-frm%%s' %
                (cycles_samples_from, cycles_samples_to),
                destroy_interm_task_id,
                cycles_chunk_idx + 1,
                cycles_samples_from,
                cycles_samples_to,
                task_priority=-cycles_chunk_idx * 10,
            )
            task_count += len(render_task_ids)

            # Create progressive image merge tasks, based on previous list of render tasks
            # and the just-created list.
            if cycles_chunk_idx == 0:
                # Nothing to merge yet, just copy the first renders.
                publish_task_id = self._make_publish_first_chunk_task(
                    job,
                    render_task_ids,
                    cycles_samples_from,
                    cycles_samples_to,
                )
                task_count += 1
                next_merge_task_deps = len(render_task_ids) * [publish_task_id]
            else:
                # Both merge and render tasks should have same number of frame chunks.
                assert len(next_merge_task_deps) == len(render_task_ids)
                merge_task_ids = self._make_merge_tasks(
                    job,
                    'merge-to-smpl%i-frm%%s' % cycles_samples_to,
                    cycles_chunk_idx + 1,
                    next_merge_task_deps,
                    render_task_ids,
                    cycles_samples_to1=prev_samples_to,
                    cycles_samples_from2=cycles_samples_from,
                    cycles_samples_to2=cycles_samples_to,
                    task_priority=-cycles_chunk_idx * 10 - 1,
                )
                task_count += len(merge_task_ids)
                next_merge_task_deps = merge_task_ids
            prev_samples_to = cycles_samples_to

        self._log.info('Created %i tasks for job %s', task_count, job['_id'])

    def validate_job_settings(self, job):
        """Ensure that the job uses format=EXR."""
        super().validate_job_settings(job)

        from flamenco import exceptions

        render_format = job['settings']['format']
        if render_format.upper() != 'EXR':
            raise exceptions.JobSettingError(
                'Job %s must use format="EXR", not %r' %
                (job['_id'], render_format))

        # This is quite a limitation, but makes our code to predict the
        # filename that Blender will use a lot simpler.
        render_output = job['settings']['render_output']
        if not render_output.endswith('######') or render_output.endswith(
                '#######'):
            raise exceptions.JobSettingError(
                'Setting "render_output" must end in exactly 6 "#" marks.')

    def _make_destroy_intermediate_task(self, job: dict) -> ObjectId:
        """Removes the entire intermediate directory."""

        cmd = commands.RemoveTree(path=str(self.intermediate_path))
        task_id = self._create_task(job, [cmd],
                                    'destroy-preexisting-intermediate',
                                    'file-management')
        return task_id

    def _make_publish_first_chunk_task(self, job: dict,
                                       parents: typing.List[ObjectId],
                                       cycles_samples_from: int,
                                       cycles_samples_to: int) -> ObjectId:
        """Publishes the first cycles-chunk of renders."""

        cmds: typing.List[commands.AbstractCommand] = [
            commands.MoveOutOfWay(src=str(self.render_path))
        ]

        src_path = self._render_output(cycles_samples_from, cycles_samples_to)
        src_fmt = str(src_path).replace('######', '%06i.exr')
        dest_fmt = str(self.render_output).replace('######', '%06i.exr')

        for chunk_frames in self._iter_frame_chunks():
            for frame in chunk_frames:
                cmds.append(
                    commands.CopyFile(
                        src=src_fmt % frame,
                        dest=dest_fmt % frame,
                    ))

        task_id = self._create_task(job,
                                    cmds,
                                    'publish-first-chunk',
                                    'file-management',
                                    parents=parents)
        return task_id

    def _make_progressive_render_tasks(self, job, name_fmt, parents,
                                       cycles_chunk_idx, cycles_samples_from,
                                       cycles_samples_to, task_priority):
        """Creates the render tasks for this job.

        :param parents: either a list of parents, one for each task, or a
            single parent used for all tasks.
        :param cycles_chunk_idx: base-1 sample chunk index

        :returns: created task IDs, one render task per frame chunk.
        :rtype: list
        """

        from bson import ObjectId
        from flamenco.utils import iter_frame_range, frame_range_merge

        job_settings = job['settings']

        task_ids = []
        frame_chunk_iter = iter_frame_range(job_settings['frames'],
                                            job_settings['chunk_size'])
        for chunk_idx, chunk_frames in enumerate(frame_chunk_iter):
            frame_range = frame_range_merge(chunk_frames)
            frame_range_bstyle = frame_range_merge(chunk_frames,
                                                   blender_style=True)

            name = name_fmt % frame_range

            render_output = self._render_output(cycles_samples_from,
                                                cycles_samples_to)

            task_cmds = [
                commands.BlenderRenderProgressive(
                    blender_cmd=job_settings['blender_cmd'],
                    filepath=job_settings['filepath'],
                    format=job_settings.get('format'),
                    # Don't render to actual render output, but to an intermediate file.
                    render_output=str(render_output),
                    frames=frame_range_bstyle,
                    cycles_num_chunks=self.cycles_num_chunks,
                    cycles_chunk=cycles_chunk_idx,
                    cycles_samples_from=cycles_samples_from,
                    cycles_samples_to=cycles_samples_to,
                )
            ]

            if isinstance(parents, list):
                parent_task_id = parents[chunk_idx]
            else:
                parent_task_id = parents

            if not isinstance(parent_task_id, ObjectId):
                raise TypeError(
                    'parents should be list of ObjectIds or ObjectId, not %s' %
                    parents)

            task_id = self._create_task(job,
                                        task_cmds,
                                        name,
                                        'blender-render',
                                        parents=[parent_task_id],
                                        priority=task_priority)
            task_ids.append(task_id)

        return task_ids

    def _render_output(self, cycles_samples_from,
                       cycles_samples_to) -> PurePath:
        """Intermediate render output path, with ###### placeholder for the frame nr"""
        render_fname = 'render-smpl-%04i-%04i-frm-######' % (
            cycles_samples_from, cycles_samples_to)
        render_output = self.intermediate_path / render_fname
        return render_output

    def _merge_output(self, cycles_samples_to) -> PurePath:
        """Intermediate merge output path, with ###### placeholder for the frame nr"""
        merge_fname = 'merge-smpl-%04i-frm-######' % cycles_samples_to
        merge_output = self.intermediate_path / merge_fname
        return merge_output

    def _iter_frame_chunks(self) -> typing.Iterable[typing.List[int]]:
        """Iterates over the frame chunks"""
        from flamenco.utils import iter_frame_range

        yield from iter_frame_range(self.job_settings['frames'],
                                    self.job_settings['chunk_size'])

    def _make_merge_tasks(self, job, name_fmt, cycles_chunk_idx, parents1,
                          parents2, cycles_samples_to1, cycles_samples_from2,
                          cycles_samples_to2, task_priority):
        """Creates merge tasks for each chunk, consisting of merges for each frame.

        :param cycles_chunk_idx: base-1 sample chunk index

        """

        # Merging cannot happen unless we have at least two chunks
        assert cycles_chunk_idx >= 2

        from flamenco.utils import frame_range_merge

        task_ids = []

        weight1 = cycles_samples_to1
        weight2 = cycles_samples_to2 - cycles_samples_from2 + 1

        # Replace Blender formatting with Python formatting in render output path
        if cycles_chunk_idx == 2:
            # The first merge takes a render output as input1, subsequent ones take merge outputs.
            # Merging only happens from Cycles chunk 2 (it needs two inputs, hence 2 chunks).
            input1 = self._render_output(1, cycles_samples_to1)
        else:
            input1 = self._merge_output(cycles_samples_to1)
        input2 = self._render_output(cycles_samples_from2, cycles_samples_to2)
        output = self._merge_output(cycles_samples_to2)

        # Construct format strings from the paths.
        input1_fmt = str(input1).replace('######', '%06i.exr')
        input2_fmt = str(input2).replace('######', '%06i.exr')
        output_fmt = str(output).replace('######', '%06i.exr')
        final_dest_fmt = str(self.render_output).replace('######', '%06i.exr')

        for chunk_idx, chunk_frames in enumerate(self._iter_frame_chunks()):
            # Create a merge command for every frame in the chunk.
            task_cmds = []
            for framenr in chunk_frames:
                intermediate = output_fmt % framenr
                task_cmds.append(
                    commands.MergeProgressiveRenders(
                        input1=input1_fmt % framenr,
                        input2=input2_fmt % framenr,
                        output=intermediate,
                        weight1=weight1,
                        weight2=weight2,
                    ))
                task_cmds.append(
                    commands.CopyFile(
                        src=intermediate,
                        dest=final_dest_fmt % framenr,
                    ))

            name = name_fmt % frame_range_merge(chunk_frames)

            parent1 = parents1[chunk_idx]
            parent2 = parents2[chunk_idx]

            task_id = self._create_task(job,
                                        task_cmds,
                                        name,
                                        'exr-merge',
                                        parents=[parent1, parent2],
                                        priority=task_priority)
            task_ids.append(task_id)

        return task_ids
Exemplo n.º 11
0
class OrgManager:
    """Organization manager.

    Performs actions on an Organization. Does *NOT* test user permissions -- the caller
    is responsible for that.
    """

    _log = attrs_extra.log('%s.OrgManager' % __name__)

    def create_new_org(self,
                       name: str,
                       admin_uid: bson.ObjectId,
                       seat_count: int,
                       *,
                       org_roles: typing.Iterable[str] = None) -> dict:
        """Creates a new Organization.

        Returns the new organization document.
        """

        assert isinstance(admin_uid, bson.ObjectId)

        org_doc = {
            'name': name,
            'admin_uid': admin_uid,
            'seat_count': seat_count,
        }

        if org_roles:
            org_doc['org_roles'] = list(org_roles)

        r, _, _, status = current_app.post_internal('organizations', org_doc)
        if status != 201:
            self._log.error(
                'Error creating organization; status should be 201, not %i: %s',
                status, r)
            raise ValueError(
                f'Unable to create organization, status code {status}')

        org_doc.update(r)
        return org_doc

    def assign_users(self, org_id: bson.ObjectId,
                     emails: typing.List[str]) -> dict:
        """Assigns users to the organization.

        Checks the seat count and throws a NotEnoughSeats exception when the
        seat count is not sufficient to assign the requested users.

        Users are looked up by email address, and known users are
        automatically mapped.

        :returns: the new organization document.
        """

        self._log.info('Adding %i new members to organization %s', len(emails),
                       org_id)

        users_coll = current_app.db('users')
        existing_user_docs = list(
            users_coll.find({'email': {
                '$in': emails
            }},
                            projection={
                                '_id': 1,
                                'email': 1
                            }))
        unknown_users = set(emails) - {
            user['email']
            for user in existing_user_docs
        }
        existing_users = {user['_id'] for user in existing_user_docs}

        return self._assign_users(org_id, unknown_users, existing_users)

    def assign_single_user(self, org_id: bson.ObjectId, *,
                           user_id: bson.ObjectId) -> dict:
        """Assigns a single, known user to the organization.

        :returns: the new organization document.
        """

        self._log.info('Adding new member %s to organization %s', user_id,
                       org_id)
        return self._assign_users(org_id, set(), {user_id})

    def _assign_users(self, org_id: bson.ObjectId,
                      unknown_users: typing.Set[str],
                      existing_users: typing.Set[bson.ObjectId]) -> dict:

        if self._log.isEnabledFor(logging.INFO):
            self._log.info('  - found users: %s',
                           ', '.join(str(uid) for uid in existing_users))
            self._log.info('  - unknown users: %s', ', '.join(unknown_users))

        org_doc = self._get_org(org_id)

        # Compute the new members.
        members = set(org_doc.get('members') or []) | existing_users
        unknown_members = set(org_doc.get('unknown_members')
                              or []) | unknown_users

        # Make sure we don't exceed the current seat count.
        new_seat_count = len(members) + len(unknown_members)
        if new_seat_count > org_doc['seat_count']:
            self._log.warning(
                'assign_users(%s, ...): Trying to increase seats to %i, '
                'but org only has %i seats.', org_id, new_seat_count,
                org_doc['seat_count'])
            raise NotEnoughSeats(org_id, org_doc['seat_count'], new_seat_count)

        # Update the organization.
        org_doc['members'] = list(members)
        org_doc['unknown_members'] = list(unknown_members)

        r, _, _, status = current_app.put_internal(
            'organizations', remove_private_keys(org_doc), _id=org_id)
        if status != 200:
            self._log.error(
                'Error updating organization; status should be 200, not %i: %s',
                status, r)
            raise ValueError(
                f'Unable to update organization, status code {status}')
        org_doc.update(r)

        # Update the roles for the affected members
        for uid in existing_users:
            self.refresh_roles(uid)

        return org_doc

    def assign_admin(self, org_id: bson.ObjectId, *, user_id: bson.ObjectId):
        """Assigns a user as admin user for this organization."""

        assert isinstance(org_id, bson.ObjectId)
        assert isinstance(user_id, bson.ObjectId)

        org_coll = current_app.db('organizations')
        users_coll = current_app.db('users')

        if users_coll.count_documents({'_id': user_id}) == 0:
            raise ValueError('User not found')

        self._log.info('Updating organization %s, setting admin user to %s',
                       org_id, user_id)
        org_coll.update_one({'_id': org_id}, {'$set': {'admin_uid': user_id}})

    def remove_user(self,
                    org_id: bson.ObjectId,
                    *,
                    user_id: bson.ObjectId = None,
                    email: str = None) -> dict:
        """Removes a user from the organization.

        The user can be identified by either user ID or email.

        Returns the new organization document.
        """

        users_coll = current_app.db('users')

        assert user_id or email

        # Collect the email address if not given. This ensures the removal
        # if the email was accidentally in the unknown_members list.
        if email is None:
            user_doc = users_coll.find_one(user_id, projection={'email': 1})
            if user_doc is not None:
                email = user_doc['email']

        # See if we know this user.
        if user_id is None:
            user_doc = users_coll.find_one({'email': email},
                                           projection={'_id': 1})
            if user_doc is not None:
                user_id = user_doc['_id']

        if user_id and not users_coll.count_documents({'_id': user_id}):
            raise wz_exceptions.UnprocessableEntity('User does not exist')

        self._log.info('Removing user %s / %s from organization %s', user_id,
                       email, org_id)

        org_doc = self._get_org(org_id)

        # Compute the new members.
        if user_id:
            members = set(org_doc.get('members') or []) - {user_id}
            org_doc['members'] = list(members)

        if email:
            unknown_members = set(org_doc.get('unknown_members')) - {email}
            org_doc['unknown_members'] = list(unknown_members)

        r, _, _, status = current_app.put_internal(
            'organizations', remove_private_keys(org_doc), _id=org_id)
        if status != 200:
            self._log.error(
                'Error updating organization; status should be 200, not %i: %s',
                status, r)
            raise ValueError(
                f'Unable to update organization, status code {status}')
        org_doc.update(r)

        # Update the roles for the affected member.
        if user_id:
            self.refresh_roles(user_id)

        return org_doc

    def _get_org(self, org_id: bson.ObjectId, *, projection=None):
        """Returns the organization, or raises a ValueError."""

        assert isinstance(org_id, bson.ObjectId)

        org_coll = current_app.db('organizations')
        org = org_coll.find_one(org_id, projection=projection)
        if org is None:
            raise ValueError(f'Organization {org_id} not found')
        return org

    def refresh_all_user_roles(self, org_id: bson.ObjectId):
        """Refreshes the roles of all members."""

        assert isinstance(org_id, bson.ObjectId)

        org = self._get_org(org_id, projection={'members': 1})
        members = org.get('members')
        if not members:
            self._log.info(
                'Organization %s has no members, nothing to refresh.', org_id)
            return

        for uid in members:
            self.refresh_roles(uid)

    def refresh_roles(self, user_id: bson.ObjectId) -> typing.Set[str]:
        """Refreshes the user's roles to own roles + organizations' roles.

        :returns: the applied set of roles.
        """

        assert isinstance(user_id, bson.ObjectId)

        from pillar.api.service import do_badger

        self._log.info('Refreshing roles for user %s', user_id)

        org_coll = current_app.db('organizations')
        tokens_coll = current_app.db('tokens')

        def aggr_roles(coll, match: dict) -> typing.Set[str]:
            query = coll.aggregate([{
                '$match': match
            }, {
                '$project': {
                    'org_roles': 1
                }
            }, {
                '$unwind': {
                    'path': '$org_roles'
                }
            }, {
                '$group': {
                    '_id': None,
                    'org_roles': {
                        '$addToSet': '$org_roles'
                    },
                }
            }])

            # If the user has no organizations/tokens at all, the query will have no results.
            try:
                org_roles_doc = query.next()
            except StopIteration:
                return set()
            return set(org_roles_doc['org_roles'])

        # Join all organization-given roles and roles from the tokens collection.
        org_roles = aggr_roles(org_coll, {'members': user_id})
        self._log.debug('Organization-given roles for user %s: %s', user_id,
                        org_roles)
        token_roles = aggr_roles(tokens_coll, {
            'user': user_id,
            'expire_time': {
                "$gt": utcnow()
            },
        })
        self._log.debug('Token-given roles for user %s: %s', user_id,
                        token_roles)
        org_roles.update(token_roles)

        users_coll = current_app.db('users')
        user_doc = users_coll.find_one(user_id, projection={'roles': 1})
        if not user_doc:
            self._log.warning(
                'Trying refresh roles of non-existing user %s, ignoring',
                user_id)
            return set()

        all_user_roles = set(user_doc.get('roles') or [])
        existing_org_roles = {
            role
            for role in all_user_roles if role.startswith('org-')
        }

        grant_roles = org_roles - all_user_roles
        revoke_roles = existing_org_roles - org_roles

        if grant_roles:
            do_badger('grant', roles=grant_roles, user_id=user_id)
        if revoke_roles:
            do_badger('revoke', roles=revoke_roles, user_id=user_id)

        return all_user_roles.union(grant_roles) - revoke_roles

    def user_is_admin(self, org_id: bson.ObjectId) -> bool:
        """Returns whether the currently logged in user is the admin of the organization."""

        from pillar.api.utils.authentication import current_user_id

        uid = current_user_id()
        if uid is None:
            return False

        org = self._get_org(org_id, projection={'admin_uid': 1})
        return org.get('admin_uid') == uid

    def unknown_member_roles(self, member_email: str) -> typing.Set[str]:
        """Returns the set of organization roles for this user.

        Assumes the user is not yet known, i.e. part of the unknown_members lists.
        """

        org_coll = current_app.db('organizations')

        # Aggregate all org-given roles for this user.
        query = org_coll.aggregate([{
            '$match': {
                'unknown_members': member_email
            }
        }, {
            '$project': {
                'org_roles': 1
            }
        }, {
            '$unwind': {
                'path': '$org_roles'
            }
        }, {
            '$group': {
                '_id': None,
                'org_roles': {
                    '$addToSet': '$org_roles'
                },
            }
        }])

        # If the user has no organizations at all, the query will have no results.
        try:
            org_roles_doc = query.next()
        except StopIteration:
            return set()

        return set(org_roles_doc['org_roles'])

    def make_member_known(self, member_uid: bson.ObjectId, member_email: str):
        """Moves the given member from the unknown_members to the members lists."""

        # This uses a direct PyMongo query rather than using Eve's put_internal,
        # to prevent simultaneous updates from dropping users.

        org_coll = current_app.db('organizations')
        for org in org_coll.find({'unknown_members': member_email}):
            self._log.info(
                'Updating organization %s, marking member %s/%s as known',
                org['_id'], member_uid, member_email)
            org_coll.update_one({'_id': org['_id']}, {
                '$addToSet': {
                    'members': member_uid
                },
                '$pull': {
                    'unknown_members': member_email
                }
            })

    def org_members(
            self, member_sting_ids: typing.Iterable[str]) -> typing.List[dict]:
        """Returns the user documents of the organization members.

        This is a workaround to provide membership information for
        organizations without giving 'mortal' users access to /api/users.
        """
        from pillar.api.utils import str2id

        if not member_sting_ids:
            return []

        member_ids = [str2id(uid) for uid in member_sting_ids]
        users_coll = current_app.db('users')
        users = users_coll.find({'_id': {
            '$in': member_ids
        }},
                                projection={
                                    '_id': 1,
                                    'full_name': 1,
                                    'email': 1,
                                    'avatar': 1
                                })
        return list(users)

    def user_has_organizations(self, user_id: bson.ObjectId) -> bool:
        """Returns True iff the user has anything to do with organizations.

        That is, if the user is admin for and/or member of any organization.
        """

        org_coll = current_app.db('organizations')

        org_count = org_coll.count_documents(
            {'$or': [{
                'admin_uid': user_id
            }, {
                'members': user_id
            }]})

        return bool(org_count)

    def user_is_unknown_member(self, member_email: str) -> bool:
        """Return True iff the email is an unknown member of some org."""

        org_coll = current_app.db('organizations')
        org_count = org_coll.count_documents({'unknown_members': member_email})
        return bool(org_count)

    def roles_for_ip_address(self, remote_addr: str) -> typing.Set[str]:
        """Find the roles given to the user via org IP range definitions."""

        from . import ip_ranges

        org_coll = current_app.db('organizations')
        try:
            q = ip_ranges.query(remote_addr)
        except ValueError as ex:
            self._log.warning(
                'Invalid remote address %s, ignoring IP-based roles: %s',
                remote_addr, ex)
            return set()

        orgs = org_coll.find(
            {'ip_ranges': q},
            projection={'org_roles': True},
        )
        return set(role for org in orgs for role in org.get('org_roles', []))

    def roles_for_request(self) -> typing.Set[str]:
        """Find roles for user via the request's remote IP address."""

        try:
            remote_addr = flask.request.access_route[0]
        except IndexError:
            return set()

        if not remote_addr:
            return set()

        roles = self.roles_for_ip_address(remote_addr)
        self._log.debug('Roles for IP address %s: %s', remote_addr, roles)

        return roles
Exemplo n.º 12
0
class TaskManager(object):
    _log = attrs_extra.log('%s.TaskManager' % __name__)

    def collection(self) -> pymongo.collection.Collection:
        """Returns the Mongo database collection."""
        from flamenco import current_flamenco

        return current_flamenco.db('tasks')

    def api_create_task(self, job, commands, name, parents=None, priority=50,
                        status='queued', *, task_type: str) -> bson.ObjectId:
        """Creates a task in MongoDB for the given job, executing commands.

        Returns the ObjectId of the created task.
        """

        task = {
            'job': job['_id'],
            'manager': job['manager'],
            'user': job['user'],
            'name': name,
            'status': status,
            'job_type': job['job_type'],
            'task_type': task_type,
            'commands': [cmd.to_dict() for cmd in commands],
            'job_priority': job['priority'],
            'priority': priority,
            'project': job['project'],
        }
        # Insertion of None parents is not supported
        if parents:
            task['parents'] = parents

        self._log.info('Creating task %s for manager %s, user %s',
                       name, job['manager'], job['user'])

        r, _, _, status = current_app.post_internal('flamenco_tasks', task)
        if status != 201:
            self._log.error('Error %i creating task %s: %s',
                            status, task, r)
            raise wz_exceptions.InternalServerError('Unable to create task')

        return r['_id']

    def tasks_for_job(self, job_id, status=None, *,
                      page=1, max_results=250,
                      extra_where: dict = None):
        from .sdk import Task

        api = pillar_api()

        where = {'job': str(job_id)}
        if extra_where:
            where.update(extra_where)

        payload = {
            'where': where,
            'sorted': [
                ('priority', -1),
                ('_id', 1),
            ],
            'max_results': max_results,
            'page': page,
        }
        if status:
            payload['where']['status'] = status

        tasks = Task.all(payload, api=api)
        self._log.debug(
            'task_for_job: where=%s  -> %i tasks in total, fetched page %i (%i per page)',
            payload['where'], tasks['_meta']['total'], page, max_results)
        return tasks

    def tasks_for_project(self, project_id):
        """Returns the tasks for the given project.

        :returns: {'_items': [task, task, ...], '_meta': {Eve metadata}}
        """
        from .sdk import Task

        api = pillar_api()
        try:
            tasks = Task.all({
                'where': {
                    'project': project_id,
                }}, api=api)
        except ResourceNotFound:
            return {'_items': [], '_meta': {'total': 0}}

        return tasks

    def web_set_task_status(self, task_id, new_status):
        """Web-level call to updates the task status."""
        from .sdk import Task

        api = pillar_api()
        task = Task({'_id': task_id})
        task.patch({'op': 'set-task-status',
                    'status': new_status}, api=api)

    def api_set_task_status_for_job(self, job_id: bson.ObjectId, from_status: str, to_status: str,
                                    *, now: datetime.datetime = None):
        """Updates the task status for all tasks of a job that have a particular status."""

        self._log.info('Flipping all tasks of job %s from status %r to %r',
                       job_id, from_status, to_status)

        from flamenco import current_flamenco

        current_flamenco.update_status_q('tasks',
                                         {'job': job_id, 'status': from_status},
                                         to_status,
                                         now=now)

    def api_set_activity(self, task_query: dict, new_activity: str):
        """Updates the activity for all tasks that match the query."""

        import uuid
        from bson import tz_util

        update = {
            'activity': new_activity,
            '_etag': uuid.uuid4().hex,
            '_updated': datetime.datetime.now(tz=tz_util.utc),
        }

        tasks_coll = self.collection()
        tasks_coll.update_many(task_query, {'$set': update})

    def api_find_job_enders(self, job_id):
        """Returns a list of tasks that could be the last tasks of a job.

        In other words, returns all tasks that are not a parent of other tasks.

        :returns: list of task IDs
        :rtype: list
        """

        tasks_coll = self.collection()

        # Get the distinct set of tasks used as parents.
        parent_tasks = tasks_coll.aggregate([
            {'$match': {'job': job_id}},
            {'$project': {'parents': 1}},
            {'$unwind': {'path': '$parents'}},
            {'$group': {'_id': '$parents'}},
        ])
        parent_ids = [t['_id'] for t in parent_tasks]

        # Get all the tasks that do not have such an ID.
        tasks = tasks_coll.find({'job': job_id,
                                 '_id': {'$nin': parent_ids}},
                                projection={'_id': 1})

        tids = [t['_id'] for t in tasks]
        return tids

    def api_delete_tasks_for_job(self, job_id: bson.ObjectId):
        """Deletes all tasks for a given job.

        NOTE: this breaks references in the task log database.
        """

        from pymongo.results import DeleteResult

        self._log.info('Deleting all tasks of job %s', job_id)
        tasks_coll = self.collection()
        delres: DeleteResult = tasks_coll.delete_many({'job': job_id})
        self._log.info('Deleted %i tasks of job %s', delres.deleted_count, job_id)
Exemplo n.º 13
0
class BlenderRender(AbstractJobCompiler):
    """Basic Blender render job."""
    _log = attrs_extra.log('%s.BlenderRender' % __name__)

    REQUIRED_SETTINGS = ('blender_cmd', 'filepath', 'render_output', 'frames',
                         'chunk_size')

    def _compile(self, job):
        self._log.info('Compiling job %s', job['_id'])
        self.validate_job_settings(job)

        # The render path contains a filename pattern, most likely '######' or
        # something similar. This has to be removed, so that we end up with
        # the directory that will contain the frames.
        self.render_output = pathlib.PurePath(job['settings']['render_output'])
        self.final_dir = self.render_output.parent
        self.render_dir = self.final_dir.with_name(self.final_dir.name +
                                                   '__intermediate')

        render_tasks = self._make_render_tasks(job)
        self._make_move_to_final_task(job, render_tasks)

        task_count = len(render_tasks) + 1
        self._log.info('Created %i tasks for job %s', task_count, job['_id'])

    def _make_move_to_final_task(
            self, job,
            parent_task_ids: typing.List[bson.ObjectId]) -> bson.ObjectId:
        """Creates a MoveToFinal command to back up existing frames, and wraps it in a task.

        :returns: the ObjectId of the created task.
        """

        cmd = commands.MoveToFinal(
            src=str(self.render_dir),
            dest=str(self.final_dir),
        )

        task_id = self._create_task(job, [cmd],
                                    'move-to-final',
                                    'file-management',
                                    parents=parent_task_ids)
        return task_id

    def _make_render_tasks(self, job) -> typing.List[bson.ObjectId]:
        """Creates the render tasks for this job.

        :returns: the list of task IDs.
        """
        from flamenco.utils import iter_frame_range, frame_range_merge

        job_settings = job['settings']

        task_ids = []
        for chunk_frames in iter_frame_range(job_settings['frames'],
                                             job_settings['chunk_size']):
            frame_range = frame_range_merge(chunk_frames)
            frame_range_bstyle = frame_range_merge(chunk_frames,
                                                   blender_style=True)

            task_cmds = [
                commands.BlenderRender(
                    blender_cmd=job_settings['blender_cmd'],
                    filepath=job_settings['filepath'],
                    format=job_settings.get('format'),
                    render_output=str(self.render_dir /
                                      self.render_output.name),
                    frames=frame_range_bstyle)
            ]

            name = 'blender-render-%s' % frame_range
            task_ids.append(
                self._create_task(job, task_cmds, name, 'blender-render'))

        return task_ids
Exemplo n.º 14
0
class AbstractJobCompiler(object, metaclass=abc.ABCMeta):
    task_manager = attr.ib(cmp=False, hash=False)
    job_manager = attr.ib(cmp=False, hash=False)
    _log = attrs_extra.log('%s.AbstractJobType' % __name__)

    REQUIRED_SETTINGS = []

    def compile(self, job: dict):
        """Compiles the job into a list of tasks.

        Calls self.task_manager.create_task(...) to create the task in the database.
        """

        if not isinstance(job.get('_id'), bson.ObjectId):
            raise TypeError("job['_id'] should be an ObjectId, not %s" %
                            job.get('_id'))

        self._compile(job)
        self._flip_status(job)

    @abc.abstractmethod
    def _compile(self, job: dict):
        """Compiles the job into a list of tasks.

        Implement this in a subclass. Ensure that self._create_task(...) is used to create the
        tasks. This is important to prevent race conditions between job compilation and the Manager
        fetching tasks.
        """

    def _flip_status(self, job: dict):
        """Flips the job & tasks status from 'under-construction' to 'queued'."""

        import datetime
        from bson import tz_util

        # Flip all tasks for this job from 'under-construction' to 'queued', and do the same
        # with the job. This must all happen using a single '_updated' timestamp to prevent
        # race conditions.
        now = datetime.datetime.now(tz=tz_util.utc)

        # handle 'start paused' flag
        if job.get('start_paused', False):
            new_status = 'paused'
        else:
            new_status = 'queued'

        self.task_manager.api_set_task_status_for_job(job['_id'],
                                                      'under-construction',
                                                      new_status,
                                                      now=now)
        self.job_manager.api_set_job_status(job['_id'], new_status, now=now)

    def _create_task(self, job, commands, name, task_type,
                     **kwargs) -> bson.ObjectId:
        """Creates an under-construction task.

        Use this to construct tasks, rather than calling self.task_manager.api_create_task directly.
        This is important to prevent race conditions between job compilation and the Manager
        fetching tasks.
        """

        return self.task_manager.api_create_task(job,
                                                 commands,
                                                 name,
                                                 status='under-construction',
                                                 task_type=task_type,
                                                 **kwargs)

    def validate_job_settings(self, job):
        """Raises an exception if required settings are missing.

        :raises: flamenco.exceptions.JobSettingError
        """
        from pillarsdk import Resource

        job_settings = job['settings']
        if isinstance(job_settings, Resource):
            job_settings = job_settings.to_dict()

        missing = [
            key for key in self.REQUIRED_SETTINGS if key not in job_settings
        ]

        if not missing:
            return

        from flamenco import exceptions
        job_id = job.get('_id', '')
        if job_id:
            job_id = ' ' + job_id
        if len(missing) == 1:
            setting = 'setting'
        else:
            setting = 'settings'

        raise exceptions.JobSettingError('Job%s is missing required %s: %s' %
                                         (job_id, setting, ', '.join(missing)))
Exemplo n.º 15
0
class BlenderRender(AbstractBlenderJobCompiler):
    """Basic Blender render job."""
    _log = attrs_extra.log('%s.BlenderRender' % __name__)

    REQUIRED_SETTINGS = ('filepath', 'render_output', 'frames', 'chunk_size')

    def validate_job_settings(self, job):
        super().validate_job_settings(job)

        if hasattr(job, 'to_dict'):
            job = job.to_dict()

        filepath = job['settings']['filepath']
        if not filepath.lower().endswith('.blend'):
            raise exceptions.JobSettingError(
                f'filepath should end in .blend, but is {filepath!r}')

        fps = job['settings'].get('fps')
        if fps is not None and not isinstance(fps, (int, float)):
            raise exceptions.JobSettingError(
                f'Job {job["_id"]} has non-numerical "fps" setting {fps!r}')

        rna_overrides = job['settings'].get('rna_overrides') or []
        if not all(isinstance(override, str) for override in rna_overrides):
            raise exceptions.JobSettingError(
                f'Job {job["_id"]} has non-string element in '
                f'"rna_overrides" setting {rna_overrides!r}')

    def _compile(self, job):
        self._log.info('Compiling job %s', job['_id'])
        self.validate_job_settings(job)

        rna_overrides_task_id = self._make_rna_overrides_task(job)

        # The render path contains a filename pattern, most likely '######' or
        # something similar. This has to be removed, so that we end up with
        # the directory that will contain the frames.
        self.render_output = pathlib.PurePath(job['settings']['render_output'])
        self.final_dir = self.render_output.parent
        self.render_dir = intermediate_path(job, self.final_dir)

        render_tasks, parent_tasks = self._make_render_tasks(
            job, rna_overrides_task_id)
        create_video_task = self._make_create_video_task(job, parent_tasks)

        if create_video_task is None:
            final_parents = parent_tasks
        else:
            final_parents = [create_video_task]
        self._make_move_to_final_task(job, final_parents)

        task_count = len(render_tasks) + 1 + (create_video_task is not None)
        self._log.info('Created %i tasks for job %s', task_count, job['_id'])

    def _make_move_to_final_task(
            self, job,
            parent_task_ids: typing.List[bson.ObjectId]) -> bson.ObjectId:
        """Creates a MoveToFinal command to back up existing frames, and wraps it in a task.

        :returns: the ObjectId of the created task.
        """

        cmd = commands.MoveToFinal(
            src=str(self.render_dir),
            dest=str(self.final_dir),
        )

        task_id = self._create_task(job, [cmd],
                                    'move-to-final',
                                    'file-management',
                                    parents=parent_task_ids)
        return task_id

    def _make_create_video_task(self, job, parent_task_ids: typing.List[bson.ObjectId]) \
            -> typing.Optional[bson.ObjectId]:
        """Creates a CreateVideo command to render a video, and wraps it in a task.

        :returns: the ObjectId of the created task, or None if this task should not
            be created for this job.
        """

        job_id: bson.ObjectId = job['_id']
        job_settings = job['settings']
        if hasattr(job_settings, 'to_dict'):
            # Convert from PillarSDK Resource to a dictionary.
            job_settings = job_settings.to_dict()

        # Check whether we should create this task at all.
        images_or_video = job_settings.get('images_or_video', '-not set-')
        if images_or_video != 'images':
            self._log.debug(
                'Not creating create-video task for job %s with images_or_video=%s',
                job_id, images_or_video)
            return None

        try:
            fps = job_settings['fps']
        except KeyError:
            self._log.debug(
                'Not creating create-video task for job %s without fps setting',
                job_id)
            return None

        try:
            output_file_extension = job_settings['output_file_extension']
        except KeyError:
            self._log.debug(
                'Not creating create-video task for job %s without '
                'output_file_extension setting', job_id)
            return None

        # Check the Manager to see if the task type we need is supported at all.
        manager_id: bson.ObjectId = job.get('manager')
        if not manager_id:
            self._log.error(
                'Job %s is not assigned to a manager; not creating create-video task',
                job_id)
            return None
        manager = current_flamenco.db('managers').find_one(
            manager_id, projection={'worker_task_types': 1})
        if not manager:
            self._log.error(
                'Job %s has non-existant manager %s; not creating create-video task',
                job_id, manager_id)
            return None
        if 'worker_task_types' not in manager:
            self._log.info(
                'Manager %s for job %s has no known worker task types; '
                'not creating create-video task', job_id, manager_id)
            return None
        if 'video-encoding' not in manager['worker_task_types']:
            self._log.info(
                'Manager %s for job %s does not support the video-encoding task type; '
                'not creating create-video task', job_id, manager_id)
            return None

        blendfile = pathlib.Path(job_settings['filepath'])
        stem = blendfile.stem.replace('.flamenco', '')
        outfile = self.render_dir / f'{stem}-{job_settings["frames"]}.mkv'

        cmd = commands.CreateVideo(
            input_files=str(self.render_dir / f'*{output_file_extension}'),
            output_file=str(outfile),
            fps=fps,
        )

        task_id = self._create_task(job, [cmd],
                                    'create-video',
                                    'video-encoding',
                                    parents=parent_task_ids)
        return task_id

    def _make_render_tasks(self, job, parent_task_id: typing.Optional[bson.ObjectId]) \
            -> typing.Tuple[typing.List[bson.ObjectId], typing.List[bson.ObjectId]]:
        """Creates the render tasks for this job.

        :returns: two lists of task IDs: (all tasks, parent tasks for next command)
        """
        from flamenco.utils import iter_frame_range, frame_range_merge

        job_settings = job['settings']
        parent_task_ids = [parent_task_id] if parent_task_id else None

        task_ids = []
        for chunk_frames in iter_frame_range(job_settings['frames'],
                                             job_settings['chunk_size']):
            frame_range = frame_range_merge(chunk_frames)
            frame_range_bstyle = frame_range_merge(chunk_frames,
                                                   blender_style=True)

            task_cmds = [
                commands.BlenderRender(
                    blender_cmd=job_settings.get('blender_cmd', '{blender}'),
                    filepath=job_settings['filepath'],
                    format=job_settings.get('format'),
                    render_output=str(self.render_dir /
                                      self.render_output.name),
                    frames=frame_range_bstyle)
            ]

            name = 'blender-render-%s' % frame_range
            task_ids.append(
                self._create_task(job,
                                  task_cmds,
                                  name,
                                  'blender-render',
                                  parents=parent_task_ids))

        return task_ids, task_ids

    def insert_rna_overrides_task(self, job: dict) -> bson.ObjectId:
        """Inject a new RNA Overrides task into an existing job.

        Returns the new task ID.
        """
        return self._insert_rna_overrides_task(job, {})
Exemplo n.º 16
0
class ManagerManager(object):
    _log = attrs_extra.log('%s.ManagerManager' % __name__)

    def create_manager(self, project, manager_type=None, parent=None):
        """Creates a new manager, owned by the current user.

        :rtype: pillarsdk.Node
        """

        from pillar.web.jinja import format_undertitle

        api = pillar_api()
        node_type = project.get_node_type(node_type_manager['name'])
        if not node_type:
            raise ValueError('Project %s not set up for Flamenco' %
                             project._id)

        node_props = dict(
            name='New manager',
            project=project['_id'],
            user=flask_login.current_user.objectid,
            node_type=node_type['name'],
            properties={
                'status': node_type['dyn_schema']['status']['default'],
            },
        )

        if manager_type:
            node_props['name'] = format_undertitle(manager_type)
            node_props['properties']['manager_type'] = manager_type
        if parent:
            node_props['parent'] = parent

        manager = pillarsdk.Node(node_props)
        manager.create(api=api)
        return manager

    def edit_manager(self, manager_id, **fields):
        """Edits a manager.

        :type manager_id: str
        :type fields: dict
        :rtype: pillarsdk.Node
        """

        api = pillar_api()
        manager = pillarsdk.Node.find(manager_id, api=api)

        manager._etag = fields.pop('_etag')
        manager.name = fields.pop('name')
        manager.description = fields.pop('description')
        manager.properties.status = fields.pop('status')
        manager.properties.manager_type = fields.pop('manager_type',
                                                     '').strip() or None

        users = fields.pop('users', None)
        manager.properties.assigned_to = {'users': users or []}

        self._log.info('Saving manager %s', manager.to_dict())

        if fields:
            self._log.warning(
                'edit_manager(%r, ...) called with unknown fields %r; ignoring them.',
                manager_id, fields)

        manager.update(api=api)
        return manager

    def delete_manager(self, manager_id, etag):
        api = pillar_api()

        self._log.info('Deleting manager %s', manager_id)
        manager = pillarsdk.Node({'_id': manager_id, '_etag': etag})
        manager.delete(api=api)

    def managers_for_user(self, user_id):
        """Returns the managers for the given user.

        :returns: {'_items': [manager, manager, ...], '_meta': {Eve metadata}}
        """

        api = pillar_api()

        # TODO: also include managers assigned to any of the user's groups.
        managers = pillarsdk.Node.all(
            {
                'where': {
                    'properties.assigned_to.users': user_id,
                    'node_type': node_type_manager['name'],
                }
            },
            api=api)

        return managers

    def managers_for_project(self, project_id):
        """Returns the managers for the given project.

        :returns: {'_items': [manager, manager, ...], '_meta': {Eve metadata}}
        """

        api = pillar_api()
        managers = pillarsdk.Node.all(
            {
                'where': {
                    'project': project_id,
                    'node_type': node_type_manager['name'],
                }
            },
            api=api)
        return managers

    def api_manager_for_shortcode(self, shortcode):
        """Returns the manager for the given shortcode.

        :returns: the manager Node, or None if not found.
        """

        db = flask.current_app.db()
        manager = db['nodes'].find_one({
            'properties.shortcode': shortcode,
            'node_type': node_type_manager['name'],
        })

        return manager
Exemplo n.º 17
0
class BlenderVideoChunks(blender_render.AbstractBlenderJobCompiler):
    """Render video as chunks, then use ffmpeg to merge.

    Creates a render task for each frame chunk, and then merges the output
    files with ffmpeg to produce the final video.

    Intermediary files are created in a subdirectory of the render output path.
    """

    _log = attrs_extra.log('%s.BlenderVideoChunks' % __name__)
    REQUIRED_SETTINGS = ('filepath', 'render_output', 'frames', 'chunk_size',
                         'output_file_extension', 'images_or_video', 'fps',
                         'extract_audio')

    def validate_job_settings(self, job):
        super().validate_job_settings(job)

        if hasattr(job, 'to_dict'):
            job = job.to_dict()

        img_or_vid = job['settings']['images_or_video']
        if img_or_vid != 'video':
            raise exceptions.JobSettingError(
                f'Job {job["_id"]} is rendering {img_or_vid}, but job type requires video'
            )

        extract_audio = job['settings']['extract_audio']
        if not isinstance(extract_audio, bool):
            raise exceptions.JobSettingError(
                f'Job {job["_id"]} setting "extract_audio" is {extract_audio!r},'
                f' expected a boolean')

    def _compile(self, job):
        self._log.info('Compiling job %s', job['_id'])
        self.validate_job_settings(job)

        # For this job type, the filename in the render output is irrelevant.
        self.final_output_dir = PurePath(
            job['settings']['render_output']).parent
        self.frames_dir = self.final_output_dir / 'frames'

        self.audio_path = self.frames_dir / 'audio.aac'
        self.video_path = self.frames_dir / 'video.mkv'
        self.muxed_path = self.frames_dir / 'muxed.mkv'

        # Determine final output file.
        blendfile = PurePath(job['settings']['filepath'])
        output_file_extension = job['settings']['output_file_extension']
        stem = blendfile.stem.replace('.flamenco', '')
        now = datetime.datetime.now()
        outfname = f'{now:%Y_%m_%d}-{stem}{output_file_extension}'
        self.final_output_path = self.final_output_dir / outfname

        # Construct the tasks.
        moow_tid = self._make_moow_task(job)
        rna_overrides_task_id = self._make_rna_overrides_task(job, moow_tid)
        render_tasks, parent_tasks = self._make_render_tasks(
            job, rna_overrides_task_id or moow_tid)

        audio_tid = self._make_extract_audio_task(job, [moow_tid])
        video_tid = self._make_concat_video_task(job, parent_tasks)

        final_parent_tid = video_tid
        if audio_tid is not None:
            final_parent_tid = self._make_mux_audio_task(
                job, [audio_tid, video_tid])
        self._make_move_with_counter_task(job, [final_parent_tid])

        task_count = len(render_tasks) + 3
        if audio_tid is not None:
            task_count += 2
        self._log.info('Created %i tasks for job %s', task_count, job['_id'])

    def _make_moow_task(self, job) -> ObjectId:
        """Make the move-out-of-way task."""

        cmd = commands.MoveOutOfWay(src=str(self.frames_dir))
        return self._create_task(job, [cmd], 'move-out-of-way',
                                 'file-management')

    def _make_render_tasks(self, job, render_parent_tid: ObjectId) \
            -> typing.Tuple[typing.List[ObjectId], typing.List[ObjectId]]:
        """Creates the render tasks for this job.

        :returns: the list of task IDs.
        """
        from flamenco.utils import iter_frame_range, frame_range_merge

        job_settings = job['settings']

        task_ids = []
        parent_task_ids = []

        for chunk_frames in iter_frame_range(job_settings['frames'],
                                             job_settings['chunk_size']):
            frame_range = frame_range_merge(chunk_frames)
            frame_range_bstyle = frame_range_merge(chunk_frames,
                                                   blender_style=True)

            first_frame = chunk_frames[0]
            last_frame = chunk_frames[-1]
            chunk_name = 'chunk-%05d-%05d' % (first_frame, last_frame)
            render_output = self.frames_dir / chunk_name / '######.png'

            # Export to PNG frames.
            task_cmds = [
                commands.BlenderRender(
                    blender_cmd=job_settings.get('blender_cmd', '{blender}'),
                    filepath=job_settings['filepath'],
                    format='PNG',
                    render_output=str(render_output),
                    frames=frame_range_bstyle,
                )
            ]
            name = 'frame-chunk-%s' % frame_range
            render_task_id = self._create_task(job,
                                               task_cmds,
                                               name,
                                               'blender-render',
                                               parents=[render_parent_tid])
            task_ids.append(render_task_id)

            # Encode PNG frames to video.
            file_extension = job_settings['output_file_extension']
            task_cmds = [
                commands.CreateVideo(
                    input_files=str(render_output.with_name('*.png')),
                    output_file=str(self.frames_dir /
                                    (chunk_name + file_extension)),
                    fps=job_settings['fps'],
                )
            ]
            name = 'video-chunk-%s' % frame_range
            encoding_task_id = self._create_task(job,
                                                 task_cmds,
                                                 name,
                                                 'video-encoding',
                                                 parents=[render_task_id])
            task_ids.append(encoding_task_id)
            parent_task_ids.append(encoding_task_id)

        return task_ids, parent_task_ids

    def _make_concat_video_task(self, job, parent_task_ids: typing.List[ObjectId]) \
            -> ObjectId:
        """Creates a MergeVideos command to merge the separate video chunks.

        :returns: the ObjectId of the created task.
        """

        job_settings = job['settings']
        output_file_extension = job_settings['output_file_extension']

        cmd = commands.ConcatenateVideos(
            input_files=str(self.frames_dir /
                            f'chunk-*{output_file_extension}'),
            output_file=str(self.video_path),
        )

        return self._create_task(job, [cmd],
                                 'concatenate-videos',
                                 'video-encoding',
                                 parents=parent_task_ids)

    def _make_extract_audio_task(self, job, parent_task_ids: typing.List[ObjectId]) \
            -> typing.Optional[ObjectId]:
        job_settings = job['settings']
        if not job_settings.get('extract_audio', False):
            return None

        from flamenco.utils import frame_range_start_end

        # BIG FAT ASSUMPTION that the frame range is continuous.
        frame_start, frame_end = frame_range_start_end(job_settings['frames'])

        flac_file = self.audio_path.with_suffix('.flac')
        cmd = commands.BlenderRenderAudio(
            blender_cmd=job_settings.get('blender_cmd', '{blender}'),
            filepath=job_settings['filepath'],
            render_output=str(flac_file),
            frame_start=frame_start,
            frame_end=frame_end,
        )
        extract_tid = self._create_task(job, [cmd],
                                        'render-audio',
                                        'blender-render',
                                        parents=parent_task_ids)

        cmd = commands.EncodeAudio(
            input_file=str(flac_file),
            output_file=str(self.audio_path),
            # Hard-coded for now:
            codec='aac',
            bitrate='192k',
        )
        return self._create_task(job, [cmd],
                                 'encode-audio',
                                 'video-encoding',
                                 parents=[extract_tid])

    def _make_mux_audio_task(self, job, parent_task_ids: typing.List[ObjectId]) \
            -> ObjectId:

        cmd = commands.MuxAudio(
            audio_file=str(self.audio_path),
            video_file=str(self.video_path),
            output_file=str(self.muxed_path),
        )
        return self._create_task(job, [cmd],
                                 'mux-audio-video',
                                 'video-encoding',
                                 parents=parent_task_ids)

    def _make_move_with_counter_task(self, job, parent_task_ids: typing.List[ObjectId]) \
            -> ObjectId:
        cmd = commands.MoveWithCounter(
            src=str(self.muxed_path),
            dest=str(self.final_output_path),
        )
        return self._create_task(job, [cmd],
                                 'move-with-counter',
                                 'file-management',
                                 parents=parent_task_ids)

    def insert_rna_overrides_task(self, job: dict) -> ObjectId:
        """Inject a new RNA Overrides task into an existing job.

        Returns the new task ID.
        """
        return self._insert_rna_overrides_task(job,
                                               {'name': 'move-out-of-way'})