Пример #1
0
    def upload_file(cls, uri, fobj):
        """
        Given a file-like object, copy it to the user's specified
        destination path (locally). We have to copy, since the encoded
        file resides in a temporary file that is deleted once the file-like
        object is garbage collected.

        :param str uri: The URI to copy the finished encoding to.
        :param file fobj: The file-like object of the finished encoding.
        :rtype: str
        :returns: The path to the final destination for the encoding.
        """
        # This is the path to the temp file that the encoding was saved to.
        tempfile_path = fobj.name
        # This is the path of the eventual destination file.
        outfile_path = cls._get_path_from_uri(uri)

        logger.debug("FileBackend.upload_file(): "\
                     "Copying tempfile '%s' to outfile %s" % (
            tempfile_path, outfile_path
        ))

        # The temp file will be deleted once the encoding is done, so make
        # a copy where the user requested the final file to end up.
        shutil.copyfile(tempfile_path, outfile_path)

        logger.debug("FileBackend.upload_file(): Copy complete.")

        return outfile_path
Пример #2
0
    def download_file(cls, uri, fobj):
        """
        Given a URI, download the file to the ``fobj`` file-like object.

        :param str uri: The URI of a file to download.
        :param file fobj: A file-like object to download the file to.
        :rtype: file
        :returns: A file handle to the downloaded file.
        """
        # Breaks the URI into usable componenents.
        values = get_values_from_media_uri(uri)

        conn = cls._get_aws_s3_connection(values['username'],
                                          values['password'])
        bucket = conn.get_bucket(values['host'])
        key = bucket.get_key(values['path'])

        logger.debug("S3Backend.download_file(): " \
                     "Downloading: %s" % uri)

        try:
            key.get_contents_to_file(fobj)
        except AttributeError:
            # Raised by ResumableDownloadHandler in boto when the given S3
            # key can't be found.
            message = "The specified input file cannot be found."
            raise InfileNotFoundException(message)

        logger.debug("S3Backend.download_file(): " \
                     "Download of %s completed." % uri)
        return fobj
Пример #3
0
    def abandon_stale_jobs(cls):
        """
        On rare occasions, nommers crash so hard that no ``ERROR`` state change
        is made, and the job just gets stuck in a permanent unfinished state
        (``DOWNLOADING``, ``ENCODING``, ``UPLOADING``, etc). Rather than hang
        on to these indefinitely, abandon them by setting their state to
        ``ABANDONED``.

        The threshold for which jobs are considered abandoned is configurable
        via the
        :py:data:`FEEDERD_ABANDON_INACTIVE_JOBS_THRESH <media_nommer.conf.settings.FEEDERD_ABANDON_INACTIVE_JOBS_THRESH>`
        setting.
        """
        logger.debug("JobCache.abandon_stale_jobs(): "\
                     "Looking for stale jobs.")
        for id, job in cls.get_cached_jobs().items():
            if not job.is_finished():
                now_dtime = datetime.datetime.now()
                last_mod = job.last_modified_dtime

                tdelta = now_dtime - last_mod
                inactive_seconds = total_seconds(tdelta)

                if inactive_seconds >= settings.FEEDERD_ABANDON_INACTIVE_JOBS_THRESH:
                    cls.remove_job(job)
                    job.set_job_state('ABANDONED', job.job_state_details)
Пример #4
0
    def refresh_jobs_with_state_changes(cls):
        """
        Looks at the state SQS queue specified by the
        :py:data:`SQS_JOB_STATE_CHANGE_QUEUE_NAME <media_nommer.conf.settings.SQS_JOB_STATE_CHANGE_QUEUE_NAME>`
        setting and refreshes any jobs that have changed. This simply reloads
        the job's details from SimpleDB_.
        
        :rtype: ``list`` of :py:class:`EncodingJob <media_nommer.core.job_state_backend.EncodingJob>`
        :returns: A list of changed :py:class:`EncodingJob` objects.
        """
        logger.debug("JobCache.refresh_jobs_with_state_changes(): " \
                     "Checking state change queue.")
        changed_jobs = JobStateBackend.pop_state_changes_from_queue(10)

        if changed_jobs:
            logger.info("Job state changes found: %s" % changed_jobs)
            for job in changed_jobs:
                if cls.is_job_cached(job):
                    current_state = cls.get_job(job).job_state
                    new_state = job.job_state

                    if current_state != new_state:
                        logger.info("* Job state changed %s: %s -> %s" % (
                            job.unique_id,
                            # Current job state in cache
                            current_state,
                            # New incoming job state
                            new_state,
                        ))
                        cls.update_job(job)
        return changed_jobs
Пример #5
0
    def set_job_state(self, job_state, details=None):
        """
        Sets the job's state and saves it to the backend. Sends a notification
        to :doc:`../feederd` to re-load the job's data from SimpleDB_ via
        SQS_.
        
        :param str job_state: The state to set the job to.
        :keyword str job_state_details: Any details to go along with whatever
            job state this job is in. For example, if job_state is `ERROR`,
            this keyword might contain an error message.
        """
        if job_state not in JobStateBackend.JOB_STATES:
            raise Exception('Invalid job state: % s' % job_state)

        logger.debug("EncodingJob.set_job_state(): " \
                     "Setting job state on %s to %s" % (
                        self.unique_id, job_state))

        self.job_state = job_state
        self.job_state_details = details

        # Write the changes to the backend.
        self.save()
        # Announce a change in state, if the backend supports such a thing.
        self._send_state_change_notification()
Пример #6
0
    def download_source_file(self):
        """
        Download the source file to a temporary file.
        """
        self.wrapped_set_job_state('DOWNLOADING')

        # This is the remote path.
        file_uri = self.job.source_path
        logger.debug("BaseNommer.download_source_file(): " \
                     "Attempting to download %s" % file_uri)
        # Figure out which backend to use for the protocol in the URI.
        storage = get_backend_for_uri(file_uri)
        # Create a temporary file which will be auto deleted when
        # garbage collected.
        fobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True)
        # Using the correct backend, download the file to the given
        # file-like object.
        storage.download_file(file_uri, fobj)
        # flush and fsync to force writing to the file object. Doesn't always
        # happen otherwise.
        fobj.flush()
        os.fsync(fobj.fileno())

        logger.debug("BaseNommer.download_source_file(): " \
                     "Downloaded %s to %s" % (file_uri, fobj.name))

        # As soon as this fobj is garbage collected, it is closed(). Be
        # careful to continue its existence if you need it.
        return fobj
Пример #7
0
    def save(self):
        """
        Serializes and saves the job to SimpleDB_. In the case of a newly
        instantiated job, also handles queueing the job up into the new job
        queue.
        
        :rtype: str
        :returns: The unique ID of the job.
        """
        # Is this a new job that needs creation?
        is_new_job = not self.unique_id
        # Generate this once so our microseconds stay the same from
        # creation time to updated time.
        now_dtime = datetime.datetime.now()

        if is_new_job:
            # This serves as the "FK" equivalent.
            self.unique_id = self._generate_unique_job_id()
            # Create the item in the domain.
            job = JobStateBackend._get_sdb_job_state_domain().new_item(self.unique_id)
            # Start populating values.
            self.creation_dtime = now_dtime
            self.job_state = 'PENDING'
        else:
            # Retrieve the existing item for the job.
            job = JobStateBackend._get_sdb_job_state_domain().get_item(self.unique_id)
            if job is None:
                msg = 'EncodingJob.save(): ' \
                      'No match found in DB for ID: %s' % self.unique_id
                raise Exception(msg)

        if self.job_state_details and isinstance(self.job_state_details,
                                                 basestring):
            # Get within AWS's limitations. We'll assume that the error message
            # is probably near the tail end of the output (hopefully). Not
            # a great assumption, but it'll have to do.
            self.job_state_details = self.job_state_details[-1023:]

        job['unique_id'] = self.unique_id
        job['source_path'] = self.source_path
        job['dest_path'] = self.dest_path
        job['nommer'] = '%s.%s' % (self.nommer.__class__.__module__,
                                   self.nommer.__class__.__name__)
        job['job_options'] = json.dumps(self.job_options)
        job['job_state'] = self.job_state
        job['job_state_details'] = self.job_state_details
        job['notify_url'] = self.notify_url
        job['last_modified_dtime'] = now_dtime
        job['creation_dtime'] = self.creation_dtime

        logger.debug("EncodingJob.save(): Item pre-save values: %s" % job)

        job.save()

        if is_new_job:
            logger.debug("EncodingJob.save(): Enqueueing new job: %s" % self.unique_id)
            sqs_message = Message(body=job['unique_id'])
            JobStateBackend._get_sqs_new_job_queue().write(sqs_message)

        return job['unique_id']
Пример #8
0
    def set_job_state(self, job_state, details=None):
        """
        Sets the job's state and saves it to the backend. Sends a notification
        to :doc:`../feederd` to re-load the job's data from SimpleDB_ via
        SQS_.
        
        :param str job_state: The state to set the job to.
        :keyword str job_state_details: Any details to go along with whatever
            job state this job is in. For example, if job_state is `ERROR`,
            this keyword might contain an error message.
        """
        if job_state not in JobStateBackend.JOB_STATES:
            raise Exception('Invalid job state: % s' % job_state)

        logger.debug("EncodingJob.set_job_state(): " \
                     "Setting job state on %s to %s" % (
                        self.unique_id, job_state))

        self.job_state = job_state
        self.job_state_details = details

        # Write the changes to the backend.
        self.save()
        # Announce a change in state, if the backend supports such a thing.
        self._send_state_change_notification()
Пример #9
0
    def abandon_stale_jobs(cls):
        """
        On rare occasions, nommers crash so hard that no ``ERROR`` state change
        is made, and the job just gets stuck in a permanent unfinished state
        (``DOWNLOADING``, ``ENCODING``, ``UPLOADING``, etc). Rather than hang
        on to these indefinitely, abandon them by setting their state to
        ``ABANDONED``.

        The threshold for which jobs are considered abandoned is configurable
        via the
        :py:data:`FEEDERD_ABANDON_INACTIVE_JOBS_THRESH <media_nommer.conf.settings.FEEDERD_ABANDON_INACTIVE_JOBS_THRESH>`
        setting.
        """
        logger.debug("JobCache.abandon_stale_jobs(): "\
                     "Looking for stale jobs.")
        for id, job in cls.get_cached_jobs().items():
            if not job.is_finished():
                now_dtime = datetime.datetime.now()
                last_mod = job.last_modified_dtime

                tdelta = now_dtime - last_mod
                inactive_seconds = total_seconds(tdelta)

                if inactive_seconds >= settings.FEEDERD_ABANDON_INACTIVE_JOBS_THRESH:
                    cls.remove_job(job)
                    job.set_job_state('ABANDONED', job.job_state_details)
Пример #10
0
    def download_source_file(self):
        """
        Download the source file to a temporary file.
        """
        self.wrapped_set_job_state('DOWNLOADING')

        # This is the remote path.
        file_uri = self.job.source_path
        logger.debug("BaseNommer.download_source_file(): " \
                     "Attempting to download %s" % file_uri)
        # Figure out which backend to use for the protocol in the URI.
        storage = get_backend_for_uri(file_uri)
        # Create a temporary file which will be auto deleted when
        # garbage collected.
        fobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True)
        # Using the correct backend, download the file to the given
        # file-like object.
        storage.download_file(file_uri, fobj)
        # flush and fsync to force writing to the file object. Doesn't always
        # happen otherwise.
        fobj.flush()
        os.fsync(fobj.fileno())

        logger.debug("BaseNommer.download_source_file(): " \
                     "Downloaded %s to %s" % (file_uri, fobj.name))

        # As soon as this fobj is garbage collected, it is closed(). Be
        # careful to continue its existence if you need it.
        return fobj
Пример #11
0
    def download_file(cls, uri, fobj):
        """
        Given a URI, download the file to the ``fobj`` file-like object.

        :param str uri: The URI of a file to download.
        :param file fobj: A file-like object to download the file to.
        :rtype: file
        :returns: A file handle to the downloaded file.
        """
        # Breaks the URI into usable componenents.
        values = get_values_from_media_uri(uri)

        conn = cls._get_aws_s3_connection(values['username'],
                                          values['password'])
        bucket = conn.get_bucket(values['host'])
        key = bucket.get_key(values['path'])

        logger.debug("S3Backend.download_file(): " \
                     "Downloading: %s" % uri)

        try:
            key.get_contents_to_file(fobj)
        except AttributeError:
            # Raised by ResumableDownloadHandler in boto when the given S3
            # key can't be found.
            message = "The specified input file cannot be found."
            raise InfileNotFoundException(message)

        logger.debug("S3Backend.download_file(): " \
                     "Download of %s completed." % uri)
        return fobj
Пример #12
0
 def _send_state_change_notification(self):
     """
     Send a message to a state change SQS that lets feederd know to
     re-load the job from memory.
     """
     logger.debug("EncodingJob._send_state_change_notification(): " \
                  "Sending job state change for %s" % self.unique_id)
     sqs_message = Message()
     sqs_message.set_body(self.unique_id)
     JobStateBackend._get_sqs_state_change_queue().write(sqs_message)
Пример #13
0
 def _send_state_change_notification(self):
     """
     Send a message to a state change SQS that lets feederd know to
     re-load the job from memory.
     """
     logger.debug("EncodingJob._send_state_change_notification(): " \
                  "Sending job state change for %s" % self.unique_id)
     sqs_message = Message()
     sqs_message.set_body(self.unique_id)
     JobStateBackend._get_sqs_state_change_queue().write(sqs_message)
Пример #14
0
    def upload_to_destination(self, fobj):
        """
        Upload the output file to the destination specified by the user.
        """
        self.wrapped_set_job_state('UPLOADING')

        file_uri = self.job.dest_path
        logger.debug("BaseNommer.upload_to_destination(): " \
                     "Attempting to upload %s to %s" % (fobj.name, file_uri))
        storage = get_backend_for_uri(file_uri)
        storage.upload_file(file_uri, fobj)
        logger.debug("BaseNommer.upload_to_destination(): " \
                     "Finished uploading %s to %s" % (fobj.name, file_uri))
Пример #15
0
    def upload_to_destination(self, fobj):
        """
        Upload the output file to the destination specified by the user.
        """
        self.wrapped_set_job_state('UPLOADING')

        file_uri = self.job.dest_path
        logger.debug("BaseNommer.upload_to_destination(): " \
                     "Attempting to upload %s to %s" % (fobj.name, file_uri))
        storage = get_backend_for_uri(file_uri)
        storage.upload_file(file_uri, fobj)
        logger.debug("BaseNommer.upload_to_destination(): " \
                     "Finished uploading %s to %s" % (fobj.name, file_uri))
Пример #16
0
def register_tasks():
    """
    Registers all tasks. Called by the :doc:`../feederd` Twisted_ plugin.
    """
    task.LoopingCall(task_check_for_job_state_changes).start(
        settings.FEEDERD_JOB_STATE_CHANGE_CHECK_INTERVAL, now=False)

    task.LoopingCall(task_prune_jobs).start(
        settings.FEEDERD_PRUNE_JOBS_INTERVAL, now=True)

    # Only register the instance auto-spawning if enabled.
    if settings.FEEDERD_ALLOW_EC2_LAUNCHES:
        logger.debug("feederd will automatically scale EC2 instances.")
        task.LoopingCall(task_manage_ec2_instances).start(
            settings.FEEDERD_AUTO_SCALE_INTERVAL, now=False)
Пример #17
0
class HTTPBackend(BaseStorageBackend):
    """
    Abstracts access to HTTP via the common set of file storage backend methods.

    .. note:: ``upload_file`` is not implemented yet, not sure how
        it should work.
    """
    @classmethod
    def download_file(cls, uri, fobj):
        """
        Given a URI, download the file to the ``fobj`` file-like object.
        
        :param str uri: The URI of a file to download.
        :param file fobj: A file-like object to download the file to.
        :rtype: file
        :returns: A file handle to the downloaded file.
        """
        request = urllib2.Request(uri)

        try:
            download = urllib2.urlopen(request)
        except urllib2.URLError, e:
            message = "The specified input file cannot be found: %s" % e
            raise InfileNotFoundException(message)

        fobj.write(download.read())

        logger.debug("HTTPBackend.download_file(): " \
                     "Download of %s completed." % uri)
        return fobj
Пример #18
0
def register_tasks():
    """
    Registers all tasks. Called by the :doc:`../feederd` Twisted_ plugin.
    """
    task.LoopingCall(task_check_for_job_state_changes).start(
                            settings.FEEDERD_JOB_STATE_CHANGE_CHECK_INTERVAL,
                            now=False)

    task.LoopingCall(task_prune_jobs).start(
                            settings.FEEDERD_PRUNE_JOBS_INTERVAL,
                            now=False)

    # Only register the instance auto-spawning if enabled.
    if settings.FEEDERD_ALLOW_EC2_LAUNCHES:
        logger.debug("feederd will automatically scale EC2 instances.")
        task.LoopingCall(task_manage_ec2_instances).start(
                            settings.FEEDERD_AUTO_SCALE_INTERVAL, now=False)
Пример #19
0
    def __assemble_ffmpeg_cmd_list(self,
                                   encoding_pass_options,
                                   infile_obj,
                                   outfile_obj,
                                   is_two_pass=False,
                                   is_second_pass=False):
        """
        Assembles a command list that subprocess.Popen() will use within
        self.__run_ffmpeg().
        
        :param file infile_obj: A file-like object for input.
        :param file outfile_obj: A file-like object to store the output.
        :rtype: list
        :returns: A list to be passed to subprocess.Popen().
        """
        #ffmpeg [[infile options][-i infile]]... {[outfile options] outfile}...
        ffmpeg_cmd = ['ffmpeg', '-y']

        # Form the ffmpeg infile and outfile options from the options
        # stored in the SimpleDB domain.
        if encoding_pass_options.has_key('infile_options'):
            infile_opts = encoding_pass_options['infile_options']
            self.__append_inout_opts_to_cmd_list(infile_opts, ffmpeg_cmd)

        # Specify infile
        ffmpeg_cmd += ['-i', infile_obj.name]

        if encoding_pass_options.has_key('outfile_options'):
            outfile_opts = encoding_pass_options['outfile_options']
            self.__append_inout_opts_to_cmd_list(outfile_opts, ffmpeg_cmd)

        if is_two_pass and not is_second_pass:
            # First pass of a 2-pass encoding.
            ffmpeg_cmd.append('/dev/null')
        else:
            # Second pass of a 2-pass encoding, or one-pass.
            ffmpeg_cmd.append(outfile_obj.name)

        logger.debug("FFmpegNommer.__run_ffmpeg(): Command to run: %s" %
                     ' '.join(ffmpeg_cmd))

        return ffmpeg_cmd
Пример #20
0
    def __assemble_ffmpeg_cmd_list(self, encoding_pass_options, infile_obj,
                                   outfile_obj, is_two_pass=False,
                                   is_second_pass=False):
        """
        Assembles a command list that subprocess.Popen() will use within
        self.__run_ffmpeg() to run ffmpeg.
        
        :param file infile_obj: A file-like object for input.
        :param file outfile_obj: A file-like object to store the output.
        :rtype: list
        :returns: A list to be passed to subprocess.Popen().
        """
        #ffmpeg [[infile options][-i infile]]... {[outfile options] outfile}...
        ffmpeg_cmd = ['ffmpeg', '-y']

        # Form the ffmpeg infile and outfile options from the options
        # stored in the SimpleDB domain.
        if encoding_pass_options.has_key('infile_options'):
            infile_opts = encoding_pass_options['infile_options']
            self.__append_inout_opts_to_cmd_list(infile_opts, ffmpeg_cmd)

        # Specify infile
        ffmpeg_cmd += ['-i', infile_obj.name]

        if encoding_pass_options.has_key('outfile_options'):
            outfile_opts = encoding_pass_options['outfile_options']
            self.__append_inout_opts_to_cmd_list(outfile_opts, ffmpeg_cmd)

        if is_two_pass and not is_second_pass:
            # First pass of a 2-pass encoding.
            ffmpeg_cmd.append('/dev/null')
        else:
            # Second pass of a 2-pass encoding, or one-pass.
            ffmpeg_cmd.append(outfile_obj.name)

        logger.debug(
            "FFmpegNommer.__run_ffmpeg(): Command to run: %s" % ' '.join(
                ffmpeg_cmd
            )
        )

        return ffmpeg_cmd
Пример #21
0
    def refresh_jobs_with_state_changes(cls):
        """
        Looks at the state SQS queue specified by the
        :py:data:`SQS_JOB_STATE_CHANGE_QUEUE_NAME <media_nommer.conf.settings.SQS_JOB_STATE_CHANGE_QUEUE_NAME>`
        setting and refreshes any jobs that have changed. This simply reloads
        the job's details from SimpleDB_.

        :rtype: ``list`` of :py:class:`EncodingJob <media_nommer.core.job_state_backend.EncodingJob>`
        :returns: A list of changed :py:class:`EncodingJob` objects.
        """
        logger.debug("JobCache.refresh_jobs_with_state_changes(): " \
                    "Checking state change queue.")
        # Pops up to 10 changed jobs that we think may have changed. There are
        # some false alarms in here, whch brings us to...
        popped_changed_jobs = JobStateBackend.pop_state_changes_from_queue(10)
        # A temporary list that stores the jobs that actually changed. This
        # will be returned at the completion of this method's path.
        changed_jobs = []

        if popped_changed_jobs:
            logger.debug("Potential job state changes found: %s" %
                         popped_changed_jobs)
            for job in popped_changed_jobs:
                if cls.is_job_cached(job):
                    current_state = cls.get_job(job).job_state
                    new_state = job.job_state

                    if current_state != new_state:
                        logger.info("* Job state changed %s: %s -> %s" % (
                            job.unique_id,
                            # Current job state in cache
                            current_state,
                            # New incoming job state
                            new_state,
                        ))
                        cls.update_job(job)
                        # This one actually changed, append this for returning.
                        changed_jobs.append(job)
                        if new_state == 'ERROR':
                            logger.error('Error trace from ec2nommerd:')
                            logger.error(job.job_state_details)
        return changed_jobs
Пример #22
0
    def refresh_jobs_with_state_changes(cls):
        """
        Looks at the state SQS queue specified by the
        :py:data:`SQS_JOB_STATE_CHANGE_QUEUE_NAME <media_nommer.conf.settings.SQS_JOB_STATE_CHANGE_QUEUE_NAME>`
        setting and refreshes any jobs that have changed. This simply reloads
        the job's details from SimpleDB_.

        :rtype: ``list`` of :py:class:`EncodingJob <media_nommer.core.job_state_backend.EncodingJob>`
        :returns: A list of changed :py:class:`EncodingJob` objects.
        """
        logger.debug("JobCache.refresh_jobs_with_state_changes(): " \
                    "Checking state change queue.")
        # Pops up to 10 changed jobs that we think may have changed. There are
        # some false alarms in here, whch brings us to...
        popped_changed_jobs = JobStateBackend.pop_state_changes_from_queue(10)
        # A temporary list that stores the jobs that actually changed. This
        # will be returned at the completion of this method's path.
        changed_jobs = []

        if popped_changed_jobs:
            logger.debug("Potential job state changes found: %s" % popped_changed_jobs)
            for job in popped_changed_jobs:
                if cls.is_job_cached(job):
                    current_state = cls.get_job(job).job_state
                    new_state = job.job_state

                    if current_state != new_state:
                        logger.info("* Job state changed %s: %s -> %s" % (
                            job.unique_id,
                            # Current job state in cache
                            current_state,
                            # New incoming job state
                            new_state,
                        ))
                        cls.update_job(job)
                        # This one actually changed, append this for returning.
                        changed_jobs.append(job)
                        if new_state == 'ERROR':
                            logger.error('Error trace from ec2nommerd:')
                            logger.error(job.job_state_details)
        return changed_jobs
Пример #23
0
    def __assemble_qtfaststart_cmd_list(self, outfile_obj):
        """
        Assembles a command list that subprocess.Popen() will use within
        self.__run_ffmpeg() to run qtfaststart.

        :param file outfile_obj: A file-like object to store the output.
        :rtype: list
        :returns: A list to be passed to subprocess.Popen().
        """
        qtf_cmd = [
            settings.NOMMERD_QTFASTSTART_BIN_PATH,
            outfile_obj.name
        ]

        logger.debug(
            "FFmpegNommer.__run_ffmpeg(): Command to run: %s" % ' '.join(
                qtf_cmd
            )
        )

        return qtf_cmd
Пример #24
0
    def upload_file(cls, uri, fobj):
        """
        Given a file-like object, upload it to the specified URI.

        :param str uri: The URI to upload the file to.
        :param file fobj: The file-like object to populate the S3 key from.
        :rtype: :py:class:`boto.s3.key.Key`
        :returns: The newly set boto key.
        """
        # Breaks the URI into usable componenents.
        values = get_values_from_media_uri(uri)
        logger.debug("S3Backend.upload_file(): Received: %s" % values)

        conn = cls._get_aws_s3_connection(values['username'],
                                          values['password'])
        bucket = conn.create_bucket(values['host'])
        key = bucket.new_key(values['path'])

        logger.debug("S3Backend.upload_file(): "\
                     "Settings contents of '%s' key from %s" % (
            values['path'], fobj.name))
        key.set_contents_from_filename(fobj.name)

        logger.debug("S3Backend.upload_file(): Upload complete.")
        return key
Пример #25
0
def task_check_for_new_jobs():
    """
    Looks at the number of currently active threads and compares it against the 
    :py:data:`MAX_ENCODING_JOBS_PER_EC2_INSTANCE <media_nommer.conf.settings.MAX_ENCODING_JOBS_PER_EC2_INSTANCE>` 
    setting. If we are under the max, fire up another thread for encoding 
    additional job(s). 
    
    The interval at which :doc:`../ec2nommerd` checks for new jobs is 
    determined by the 
    :py:data:`NOMMERD_NEW_JOB_CHECK_INTERVAL <media_nommer.conf.settings.NOMMERD_NEW_JOB_CHECK_INTERVAL>`
    setting.
    
    Calls :py:func:`threaded_encode_job` for any jobs to encode.
    """
    num_active_threads = NodeStateManager.get_num_active_threads()
    max_threads = settings.MAX_ENCODING_JOBS_PER_EC2_INSTANCE
    num_jobs_to_pop = max(0, max_threads - num_active_threads)

    if num_jobs_to_pop > 0:
        # We have more room for encoding threads, determine how many.
        logger.debug("task_check_for_new_jobs: " \
                     "Popping up to %d new jobs." % num_jobs_to_pop)
        # This is an iterable of BaseEncodingJob sub-classed instances for
        # each job returned from the queue.
        jobs = JobStateBackend.pop_new_jobs_from_queue(num_jobs_to_pop)
        if jobs:
            logger.debug("* Popped %d jobs from the queue." % len(jobs))

        for job in jobs:
            # For each job returned, render in another thread.
            logger.debug("* Starting encoder thread for job: %s" % job.unique_id)
            reactor.callInThread(threaded_encode_job, job)
Пример #26
0
    def upload_file(cls, uri, fobj):
        """
        Given a file-like object, upload it to the specified URI.

        :param str uri: The URI to upload the file to.
        :param file fobj: The file-like object to populate the S3 key from.
        :rtype: :py:class:`boto.s3.key.Key`
        :returns: The newly set boto key.
        """
        # Breaks the URI into usable componenents.
        values = get_values_from_media_uri(uri)
        logger.debug("S3Backend.upload_file(): Received: %s" % values)

        conn = cls._get_aws_s3_connection(values['username'],
                                          values['password'])
        bucket = conn.create_bucket(values['host'])
        key = bucket.new_key(values['path'])

        logger.debug("S3Backend.upload_file(): "\
                     "Settings contents of '%s' key from %s" % (
            values['path'], fobj.name))
        key.set_contents_from_filename(fobj.name)

        logger.debug("S3Backend.upload_file(): Upload complete.")
        return key
Пример #27
0
    def download_file(cls, uri, fobj):
        """
        Given a URI, open said file from the local file system.
        
        :param str uri: The URI of a file to open.
        :param file fobj: This is unused in this backend, but here for
            the sake of consistency.
        :rtype: file
        :returns: A file handle to the given file.
        """
        infile_path = cls._get_path_from_uri(uri)
        if not os.path.exists(infile_path):
            message = "The specified input file cannot be found: %s" % infile_path
            raise InfileNotFoundException(message)

        logger.debug("FileBackend.download_file(): " \
                     "Opening of %s completed." % uri)

        infile = open(infile_path, 'rb')
        # Inefficient, but meh. We'll come up with something more clever later.
        for line in infile:
            fobj.write(line)

        return fobj
Пример #28
0
    def spawn_if_needed(cls):
        """
        Spawns additional EC2 instances if needed.
        
        :rtype: :py:class:`boto.ec2.instance.Reservation` or ``None``
        :returns: If instances are spawned, return a boto Reservation
            object. If no instances are spawned, ``None`` is returned.
        """
        instances = cls.get_instances()
        num_instances = len(instances)
        logger.debug("EC2InstanceManager.spawn_if_needed(): " \
                     "Current active instances: %d" % num_instances)

        if num_instances >= settings.MAX_NUM_EC2_INSTANCES:
            # No more instances, no spawning allowed.
            return

        unfinished_jobs = JobStateBackend.get_unfinished_jobs()
        num_unfinished_jobs = len(unfinished_jobs)
        logger.debug("EC2InstanceManager.spawn_if_needed(): " \
                     "Current unfinished jobs: %d" % num_unfinished_jobs)

        if num_unfinished_jobs == 0:
            # No unfinished jobs, no need to go any further.
            return

        job_capacity = num_instances * settings.MAX_ENCODING_JOBS_PER_EC2_INSTANCE

        if job_capacity == 0:
            # Don't factor in overflow thresh or anything if we have no
            # instances or capacity.
            cap_plus_thresh = 0
        else:
            cap_plus_thresh = job_capacity + settings.JOB_OVERFLOW_THRESH

        logger.debug("EC2InstanceManager.spawn_if_needed(): " \
                     "Job capacity (%d w/ thresh): %d" % (job_capacity,
                                                         cap_plus_thresh))

        is_over_capacity = num_unfinished_jobs >= cap_plus_thresh
        # Disgregard the overflow thresh if there are jobs but no instances.
        if is_over_capacity or num_instances == 0:
            overage = num_unfinished_jobs - job_capacity
            if job_capacity > 0:
                # Only factor overhold threshold in when we have capacity
                # available in some form.
                overage -= settings.JOB_OVERFLOW_THRESH

            if overage <= 0:
                # Adding in the overflow thresh brought this under the
                # overage level. No need for spawning instances.
                return None

            logger.info("EC2InstanceManager.spawn_if_needed(): " \
                         "Observed labor shortage of: %d" % overage)

            # Raw # of instances needing to be spawned.
            num_new_instances = overage / settings.MAX_ENCODING_JOBS_PER_EC2_INSTANCE
            # At this point, we know there's an overage, even with the overflow
            # thresh factored in (if there is at least one EC2 instance
            # already running).
            num_new_instances = max(num_new_instances, 1)
            # Also don't spawn more than the max configured instances.
            num_new_instances = min(num_new_instances,
                                    settings.MAX_NUM_EC2_INSTANCES)

            # The boto Reservation object. Its 'instances' attribute is the
            # important bit.
            if num_new_instances > 0:
                return cls.spawn_instances(num_new_instances)
        # No new instances.
        return None
    def spawn_if_needed(cls):
        """
        Spawns additional EC2 instances if needed.
        
        :rtype: :py:class:`boto.ec2.instance.Reservation` or ``None``
        :returns: If instances are spawned, return a boto Reservation
            object. If no instances are spawned, ``None`` is returned.
        """
        instances = cls.get_instances()
        num_instances = len(instances)
        logger.debug("EC2InstanceManager.spawn_if_needed(): " \
                     "Current active instances: %d" % num_instances)

        if num_instances >= settings.MAX_NUM_EC2_INSTANCES:
            # No more instances, no spawning allowed.
            return

        unfinished_jobs = JobStateBackend.get_unfinished_jobs()
        num_unfinished_jobs = len(unfinished_jobs)
        logger.debug("EC2InstanceManager.spawn_if_needed(): " \
                     "Current unfinished jobs: %d" % num_unfinished_jobs)

        if num_unfinished_jobs == 0:
            # No unfinished jobs, no need to go any further.
            return

        job_capacity = num_instances * settings.MAX_ENCODING_JOBS_PER_EC2_INSTANCE

        if job_capacity == 0:
            # Don't factor in overflow thresh or anything if we have no
            # instances or capacity.
            cap_plus_thresh = 0
        else:
            cap_plus_thresh = job_capacity + settings.JOB_OVERFLOW_THRESH

        logger.debug("EC2InstanceManager.spawn_if_needed(): " \
                     "Job capacity (%d w/ thresh): %d" % (job_capacity,
                                                         cap_plus_thresh))

        is_over_capacity = num_unfinished_jobs >= cap_plus_thresh
        # Disgregard the overflow thresh if there are jobs but no instances.
        if is_over_capacity or num_instances == 0:
            overage = num_unfinished_jobs - job_capacity
            if job_capacity > 0:
                # Only factor overhold threshold in when we have capacity
                # available in some form.
                overage -= settings.JOB_OVERFLOW_THRESH
            logger.info("EC2InstanceManager.spawn_if_needed(): " \
                         "Observed labor shortage of: %d" % overage)

            # Raw # of instances needing to be spawned.
            num_new_instances = overage / settings.MAX_ENCODING_JOBS_PER_EC2_INSTANCE
            # At this point, we know there's an overage, even with the overflow
            # thresh factored in (if there is at least one EC2 instance
            # already running).
            num_new_instances = max(num_new_instances, 1)
            # Also don't spawn more than the max configured instances.
            num_new_instances = min(num_new_instances, settings.MAX_NUM_EC2_INSTANCES)

            # The boto Reservation object. Its 'instances' attribute is the
            # important bit.
            if num_new_instances > 0:
                return cls.spawn_instances(num_new_instances)
        # No new instances.
        return None
Пример #30
0
    def save(self):
        """
        Serializes and saves the job to SimpleDB_. In the case of a newly
        instantiated job, also handles queueing the job up into the new job
        queue.
        
        :rtype: str
        :returns: The unique ID of the job.
        """
        # Is this a new job that needs creation?
        is_new_job = not self.unique_id
        # Generate this once so our microseconds stay the same from
        # creation time to updated time.
        now_dtime = datetime.datetime.now()

        if is_new_job:
            # This serves as the "FK" equivalent.
            self.unique_id = self._generate_unique_job_id()
            # Create the item in the domain.
            job = JobStateBackend._get_sdb_job_state_domain().new_item(
                self.unique_id)
            # Start populating values.
            self.creation_dtime = now_dtime
            self.job_state = 'PENDING'
        else:
            # Retrieve the existing item for the job.
            job = JobStateBackend._get_sdb_job_state_domain().get_item(
                self.unique_id)
            if job is None:
                msg = 'EncodingJob.save(): ' \
                      'No match found in DB for ID: %s' % self.unique_id
                raise Exception(msg)

        if self.job_state_details and isinstance(self.job_state_details,
                                                 basestring):
            # Get within AWS's limitations. We'll assume that the error message
            # is probably near the tail end of the output (hopefully). Not
            # a great assumption, but it'll have to do.
            self.job_state_details = self.job_state_details[-1023:]

        job['unique_id'] = self.unique_id
        job['source_path'] = self.source_path
        job['dest_path'] = self.dest_path
        job['nommer'] = '%s.%s' % (self.nommer.__class__.__module__,
                                   self.nommer.__class__.__name__)
        job['job_options'] = json.dumps(self.job_options)
        job['job_state'] = self.job_state
        job['job_state_details'] = self.job_state_details
        job['notify_url'] = self.notify_url
        job['last_modified_dtime'] = now_dtime
        job['creation_dtime'] = self.creation_dtime

        logger.debug("EncodingJob.save(): Item pre-save values: %s" % job)

        job.save()

        if is_new_job:
            logger.debug("EncodingJob.save(): Enqueueing new job: %s" %
                         self.unique_id)
            sqs_message = Message(body=job['unique_id'])
            JobStateBackend._get_sqs_new_job_queue().write(sqs_message)

        return job['unique_id']