Exemplo n.º 1
0
def clean_job_for_quality(batch_job_dict, fallback=True):
    """
    Removes jobs from the batch description that are not suitable for the master
    video's quality. It applies only for encoding jobs!
    @param batch_job_dict: the dict containing the batch description
    @type batch_job_dict: dict
    @param
    @return: the cleaned dict
    @rtype: dict
    """
    survived_jobs = []
    fallback_jobs = []
    other_jobs = []
    for job in batch_job_dict["jobs"]:
        if job["mode"] == "encode":
            if getval(job, "fallback") and fallback:
                fallback_jobs.append(job)
            if getval(job, "enforce"):
                survived_jobs.append(job)
            else:
                profile = None
                if getval(job, "profile"):
                    profile = get_encoding_profile(job["profile"])
                if assure_quality(
                    input_file=batch_job_dict["input"],
                    aspect=chose2("aspect", job, profile),
                    target_width=chose2("width", job, profile),
                    target_height=chose2("height", job, profile),
                    target_bitrate=chose2("videobitrate", job, profile),
                ):
                    survived_jobs.append(job)
        else:
            other_jobs.append(job)
    if survived_jobs:
        survived_jobs.extend(other_jobs)
        new_jobs = survived_jobs
    else:
        fallback_jobs.extend(other_jobs)
        new_jobs = fallback_jobs
    pprint(locals())
    batch_job_dict["jobs"] = new_jobs
    return batch_job_dict
Exemplo n.º 2
0
def clean_job_for_quality(batch_job_dict, fallback=True):
    """
    Removes jobs from the batch description that are not suitable for the master
    video's quality. It applies only for encoding jobs!
    @param batch_job_dict: the dict containing the batch description
    @type batch_job_dict: dict
    @param
    @return: the cleaned dict
    @rtype: dict
    """
    survived_jobs = []
    fallback_jobs = []
    other_jobs = []
    for job in batch_job_dict['jobs']:
        if job['mode'] == 'encode':
            if getval(job, 'fallback') and fallback:
                fallback_jobs.append(job)
            if getval(job, 'enforce'):
                survived_jobs.append(job)
            else:
                profile = None
                if getval(job, 'profile'):
                    profile = get_encoding_profile(job['profile'])
                if assure_quality(input_file=batch_job_dict['input'],
                                  aspect=chose2('aspect', job, profile),
                                  target_width=chose2('width', job, profile),
                                  target_height=chose2('height', job, profile),
                                  target_bitrate=chose2(
                                      'videobitrate', job, profile)):
                    survived_jobs.append(job)
        else:
            other_jobs.append(job)
    if survived_jobs:
        survived_jobs.extend(other_jobs)
        new_jobs = survived_jobs
    else:
        fallback_jobs.extend(other_jobs)
        new_jobs = fallback_jobs
    pprint(locals())
    batch_job_dict['jobs'] = new_jobs
    return batch_job_dict
Exemplo n.º 3
0
def clean_job_for_quality(batch_job_dict, fallback=True):
    """
    Removes jobs from the batch description that are not suitable for the master
    video's quality. It applies only for encoding jobs!
    @param batch_job_dict: the dict containing the batch description
    @type batch_job_dict: dict
    @param
    @return: the cleaned dict
    @rtype: dict
    """
    survived_jobs = []
    fallback_jobs = []
    other_jobs = []
    for job in batch_job_dict['jobs']:
        if job['mode'] == 'encode':
            if getval(job, 'fallback') and fallback:
                fallback_jobs.append(job)
            if getval(job, 'enforce'):
                survived_jobs.append(job)
            else:
                profile = None
                if getval(job, 'profile'):
                    profile = get_encoding_profile(job['profile'])
                if assure_quality(input_file=batch_job_dict['input'],
                        aspect=chose2('aspect', job, profile),
                        target_width=chose2('width', job, profile),
                        target_height=chose2('height', job, profile),
                        target_bitrate=chose2('videobitrate', job, profile)):
                    survived_jobs.append(job)
        else:
            other_jobs.append(job)
    if survived_jobs:
        survived_jobs.extend(other_jobs)
        new_jobs = survived_jobs
    else:
        fallback_jobs.extend(other_jobs)
        new_jobs = fallback_jobs
    pprint(locals())
    batch_job_dict['jobs'] = new_jobs
    return batch_job_dict
Exemplo n.º 4
0
def encode_video(input_file, output_file,
                 acodec=None, vcodec=None,
                 abitrate=None, vbitrate=None,
                 resolution=None,
                 passes=1,
                 special=None, specialfirst=None, specialsecond=None,
                 metadata=None,
                 width=None, height=None, aspect=None,
                 profile=None,
                 update_fnc=task_update_progress,
                 message_fnc=write_message
                 ):
    """ Starts an ffmpeg encoding process based on the given parameters.
    The encoding is run as a subprocess. The progress of the subprocess is
    continiously written to the given messaging functions. In a normale case,
    these should be the ones of BibTask.

    @param input_file: Path to the input video.
    @type input_file: string

    @param output_file: Path to the output file. If no other parameters are giv
    than input and output files, FFmpeg tries to auto-discover the right codecs
    for the given file extension. In this case, every other aspect like
    resolution and bitrates will be the same as in the input video.
    @type output_file: string

    @param acodec: The audio codec to use. This must be an available codec of
    libavcodec within FFmpeg.
    @type acodec: string

    @param vcodec: The video codec to use. This must be an available codec of
    libavcodec within FFmpeg.
    @type vcodec: string

    @param abitrate: Bitrate of the audio stream. In bit/s.
    @type abitrate: int

    @param vbitrate: Bitrate of the video stream. In bit/s.
    @type vbitrate: int

    @param resolution: Fixed size of the frames in the transcoded video.
    FFmpeg notation: 'WxH' or preset like 'vga'. See also 'width'

    @param passes: Number of encoding passes. Either 1 or 2.
    @type passes: int

    @param special: Additional FFmpeg parameters.
    @type special: string

    @param specialfirst: Additional FFmpeg parameters for the first pass.
    The 'special' parameter is ignored if this ist not 'None'
    @type specialfirst: string

    @param specialsecond: Additional FFmpeg parameters for the second pass.
    The 'special' parameter is ignored if this is not 'None'
    @type specialsecond: string

    @param metadata: Metadata that should be added to the transcoded video.
    This must be a dictionary. As with as metadata in FFmpeg, there is no
    guarantee that the metadata specified in the dictionary will really be added
    to the file, because it will largelydepend on the container format and its
    supported fields.
    @type metadata: dict

    @param width: Instead of giving a fixed resolution, you can use width and
    height as dimensional constrains. The algorithm will try to preserve the
    original aspect and fit the new frame size into the given dimensions.
    @type width: int

    @param height: see 'width'
    @type height: int

    @param aspect: A float representing the aspect ratio of the video:
    4:3 equals 1.33 and 16:9 equals 1.77.
    This is a fallback in case the algorithm fails to determine the real aspect
    ratio from the video. See also 'width'
    @type aspect: float or "4:3" like string

    @param profile: A profile to use. The priority is on the parameters
    directly given to the function.
    @type profile: string

    @param update_fnc: A function called to display or log an the encoding
    status. This function must accept a string.
    @type update_fnc: function

    @param message_fnc: A function to log important messages or errors.
    This function must accept a string.
    @type message_fnc: function

    @return: True if the encoding was successful, False if not
    @rtype: boolean
    """

    def encode():
        """ Subfunction to run the acutal encoding
        """
        ## Start process
        process = subprocess.Popen(command,
                                   stderr=log_file_handle,
                                   close_fds=True)
        ## While the process is running
        time.sleep(1)
        while process.poll() is None:
            # Update the status in bibsched
            update_status()
            time.sleep(4)
        ## If the process was terminated
        if process.poll() == -15:
            # Encoding was terminated by system
            message_fnc("FFMPEG was terminated")
            update_fnc("  FFMPEG was terminated")
            return 0
        ## If there was an error during encoding
        if process.poll() == 1:
            update_fnc("  An FFMPEG error has appeared, see log")
            message_fnc("An FFMPEG error has appeared encoding %s" % output_file)
            message_fnc("Command was: %s" % ' '.join(command))
            message_fnc("Last lines of the FFmpeg log:")
            ## open the logfile again an retrieve the size
            log_file_handle2 = open(log_file_name, 'rb')
            size = os.fstat(log_file_handle2.fileno())[6]
            ## Read the last lines
            log_file_handle2.seek(-min(size, 10000), 2)
            lastlines = log_file_handle2.read().splitlines()[-5:]
            for line in lastlines:
                message_fnc(line)
            return 0
        ## If everything went fine
        if process.poll() == 0:
            message_fnc("Encoding of %s done" % output_file)
            update_fnc("Encoding of %s done" % output_file)
            return 1

    def build_command(nofpass=1):
        """ Builds the ffmpeg command according to the function params
        """
        def insert(key, value):
            """ Shortcut for inserting parameters into the arg list
            """
            base_args.insert(-1, key)
            base_args.insert(-1, value)

        ## Determine base command arguments from the pass to run
        base_args = None
        if passes == 1:
            base_args = [CFG_PATH_FFMPEG, '-y', '-i', input_file, output_file]
        elif passes == 2:
            if nofpass == 1:
                base_args = [CFG_PATH_FFMPEG, '-y', '-i', input_file,
                             '-pass', '1', '-passlogfile', pass_log_file,
                             '-an', '-f', 'rawvideo', '/dev/null']
            elif nofpass == 2:
                base_args = [CFG_PATH_FFMPEG, '-y', '-i', input_file,
                             '-pass', '2', '-passlogfile',
                             pass_log_file, output_file]
        ## Insert additional arguments
        if acodec is not None:
            insert('-acodec', acodec)
        if vcodec is not None:
            insert('-vcodec', vcodec)
        if abitrate is not None:
            insert('-b:a', str(abitrate))
        if vbitrate is not None:
            insert('-b:v', str(vbitrate))

        ## If a resolution is given
        if resolution:
            insert('-s', resolution)
        ## If not, you can give width and height and generate the resolution
        else:
            ## Use our new function to get the size of the input
            nresolution = determine_resolution_preserving_aspect(input_file,
                                                                 width,
                                                                 height,
                                                                 aspect)
            insert('-s', nresolution)
        ## Metadata additions
        if type(metadata) is type(dict()):
            ## build metadata arguments for ffmpeg
            for key, value in metadata.iteritems():
                if value is not None:
                    meta_arg = (
                        CFG_BIBENCODE_FFMPEG_METADATA_ARGUMENT % (key, value)
                        )
                    insert("-metadata", meta_arg)
        ## Special argument additions
        if passes == 1:
            if passes == 1 and special is not None:
                for val in special.split():
                    base_args.insert(-1, val)
        elif passes == 2:
            if nofpass == 1:
                if specialfirst is not None:
                    for val in specialfirst.split():
                        base_args.insert(-1, val)
            if nofpass == 2:
                if specialsecond is not None:
                    for val in specialsecond.split():
                        base_args.insert(-1, val)
        return base_args

    def update_status():
        """ Parses the encoding status and updates the task in bibsched
        """

        def graphical(value):
            """ Converts a percentage value to a nice graphical representation
            """
            ## If the given value is a valid precentage
            if value >= 0 and value <= 100:
                ## This is to get nice, aligned output in bibsched
                oval = str(value).zfill(3)
                return (
                    "[" + "#"*(value/10) + " "*(10-(value/10)) +
                    "][%d/%d] %s%%" % (nofpass, passes, oval)
                    )
            else:
                ## Sometimes the parsed values from FFMPEG are totaly off.
                ## Or maybe nneeded values are not avail. for the given video.
                ## In this case there is no estimate.
                return "[  no est. ][%d/%d]     " % (nofpass, passes)

        ## init variables
        time_string = '0.0'
        percentage_done = -1
        ## try to read the encoding log
        try:
            filehandle = open(log_file_name, 'rb')
        except IOError:
            message_fnc("Error opening %s" % log_file_name)
            update_fnc("Could not open encoding log")
            return
        ## Check the size of the file before reading from the end
        size = os.path.getsize(log_file_name)
        if not size:
            return
        ## Go to the end of the log
        filehandle.seek(-min(10000, size), 2)
        chunk = filehandle.read()
        lines = chunk.splitlines()

        ## try to parse the status
        for line in reversed(lines):
            if CFG_BIBENCODE_FFMPEG_ENCODE_TIME.match(line):
                time_string = (
                    CFG_BIBENCODE_FFMPEG_ENCODE_TIME.match(line).groups()
                    )[0]
                break
        filehandle.close()
        try:
            percentage_done = int(timecode_to_seconds(time_string) / total_seconds * 100)
        except:
            precentage_done = -1
        ## Now update the bibsched progress
        opath, ofile = os.path.split(output_file)
        if len(opath) > 8:
            opath = "..." + opath[-8:]
        ohint = opath + '/' + ofile
        update_fnc(graphical(percentage_done) + " > " + ohint)

    #------------------#
    # PROFILE HANDLING #
    #------------------#

    if profile:
        profile = get_encoding_profile(profile)
        acodec = chose(acodec, 'audiocodec', profile)
        vcodec = chose(vcodec, 'videocodec', profile)
        abitrate = chose(abitrate, 'audiobitrate', profile)
        vbitrate = chose(vbitrate, 'videobitrate', profile)
        resolution = chose(resolution, 'resolution', profile)
        passes = getval(profile, 'passes', 1)
        special = chose(special, 'special', profile)
        specialfirst = chose(specialfirst, 'special_firstpass', profile)
        specialsecond = chose(specialsecond, 'special_secondpass', profile)
        metadata = chose(metadata, 'metadata', profile)
        width = chose(width, 'width', profile)
        height = chose(height, 'height', profile)
        aspect = chose(aspect, 'aspect', profile)

    #----------#
    # ENCODING #
    #----------#

    ## Mark Task as stoppable
    # task_sleep_now_if_required()

    tech_metadata = ffprobe_metadata(input_file)
    try:
        total_seconds = float(tech_metadata['format']['duration'])
    except:
        total_seconds = 0.0


    ## Run the encoding
    pass_log_file = CFG_BIBENCODE_FFMPEG_PASSLOGFILE_PREFIX % (
                    os.path.splitext(os.path.split(input_file)[1])[0],
                    str(uuid.uuid4()))
    no_error = True
    ## For every encoding pass to do
    for apass in range(0, passes):
        nofpass = apass + 1
        if no_error:
            ## Create Logfiles
            log_file_name = _filename_log(output_file, nofpass)
            try:
                log_file_handle = open(log_file_name, 'w')
            except IOError:
                message_fnc("Error creating %s" % log_file_name)
                update_fnc("Error creating logfile")
                return 0
            ## Build command for FFMPEG
            command = build_command(nofpass)
            ## Start encoding, result will define to continue or not to
            no_error = encode()
    ## !!! Status Update
    return no_error
Exemplo n.º 5
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successfull, 0 if not
    @rtype; int
    """

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job['recid']) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description', description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat)
                if (comment == m_comment and
                    description == m_description and
                    subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found"
                          % batch_job['recid'])
            task_update_progress("Video master for record %d not found"
                                 % batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc"
                          % bibdoc_video_docname)
            master_format = compose_format(
                                    bibdoc_video_extension,
                                    getval(batch_job, 'bibdoc_master_subformat', 'master')
                                    )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                    batch_job['input'],
                    version=1,
                    description=getval(batch_job, 'bibdoc_master_description'),
                    comment=getval(batch_job, 'bibdoc_master_comment'),
                    docformat=master_format
                    )

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------"
                           % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(
                                                 bibdoc_video_directory,
                                                 bibdoc_slave_video_docname,
                                                 bibdoc_video_extension
                                                 )
            _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname,
                                bibdoc_video_extension,
                                bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                 input_file=batch_job['input'],
                 output_file=bibdoc_video_fullpath,
                 acodec=getval(job, 'audiocodec'),
                 vcodec=getval(job, 'videocodec'),
                 abitrate=getval(job, 'videobitrate'),
                 vbitrate=getval(job, 'audiobitrate'),
                 resolution=getval(job, 'resolution'),
                 passes=getval(job, 'passes', 1),
                 special=getval(job, 'special'),
                 specialfirst=getval(job, 'specialfirst'),
                 specialsecond=getval(job, 'specialsecond'),
                 metadata=getval(job, 'metadata'),
                 width=getval(job, 'width'),
                 height=getval(job, 'height'),
                 aspect=getval(batch_job, 'aspect'), # Aspect for every job
                 profile=getval(job, 'profile'),
                 update_fnc=_task_update_overall_status,
                 message_fnc=_task_write_message
                 )
            return_code &= encoding_result
            ## only on success
            if  encoding_result:
                ## Rename it, adding the subformat
                os.rename(bibdoc_video_fullpath,
                          compose_file(bibdoc_video_directory,
                                       bibdoc_video_extension,
                                       bibdoc_video_subformat,
                                       1,
                                       bibdoc_slave_video_docname)
                          )
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                              bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(getval(job, 'bibdoc_description'),
                                                 bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.'
                        + getval(profile, 'extension',
                        getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(input_file=batch_job['input'],
                           output_file=tmpfname,
                           size=getval(job, 'size'),
                           positions=getval(job, 'positions'),
                           numberof=getval(job, 'numberof'),
                           width=getval(job, 'width'),
                           height=getval(job, 'height'),
                           aspect=getval(batch_job, 'aspect'),
                           profile=getval(job, 'profile'),
                           update_fnc=_task_update_overall_status,
                           )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc"
                                  % (bibdoc_frame_docname,
                                     getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                                    fname,
                                    version=1,
                                    description=getval(job, 'bibdoc_description'),
                                    comment=getval(job, 'bibdoc_comment'),
                                    docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'),
                                 pbcoreIdentifier = batch_job['recid'],
                                 aspect_override = getval(batch_job, 'aspect'))
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str()) ):
                _notify_error_admin(batch_job,
                                    getval(batch_job, 'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
    return 1
Exemplo n.º 6
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successfull, 0 if not
    @rtype; int
    """
    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_' + str(batch_job['recid']) + '_' + str(
            uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR,
                                    xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job['recid']) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description',
                                       description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat',
                                     subformat)
                if (comment == m_comment and description == m_description
                        and subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(
                            124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found" %
                                batch_job['recid'])
            task_update_progress("Video master for record %d not found" %
                                 batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(
        batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job,
                                            'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc" %
                                bibdoc_video_docname)
            master_format = compose_format(
                bibdoc_video_extension,
                getval(batch_job, 'bibdoc_master_subformat', 'master'))
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                batch_job['input'],
                version=1,
                description=getval(batch_job, 'bibdoc_master_description'),
                comment=getval(batch_job, 'bibdoc_master_comment'),
                docformat=master_format)

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------" %
                            (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(
                job['bibdoc_docname']).safe_substitute(
                    {'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(
                    job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname',
                                                bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(bibdoc_video_directory,
                                                 bibdoc_slave_video_docname,
                                                 bibdoc_video_extension)
            _task_write_message(
                "Transcoding %s to %s;%s" %
                (bibdoc_slave_video_docname, bibdoc_video_extension,
                 bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                input_file=batch_job['input'],
                output_file=bibdoc_video_fullpath,
                acodec=getval(job, 'audiocodec'),
                vcodec=getval(job, 'videocodec'),
                abitrate=getval(job, 'videobitrate'),
                vbitrate=getval(job, 'audiobitrate'),
                resolution=getval(job, 'resolution'),
                passes=getval(job, 'passes', 1),
                special=getval(job, 'special'),
                specialfirst=getval(job, 'specialfirst'),
                specialsecond=getval(job, 'specialsecond'),
                metadata=getval(job, 'metadata'),
                width=getval(job, 'width'),
                height=getval(job, 'height'),
                aspect=getval(batch_job, 'aspect'),  # Aspect for every job
                profile=getval(job, 'profile'),
                update_fnc=_task_update_overall_status,
                message_fnc=_task_write_message)
            return_code &= encoding_result
            ## only on success
            if encoding_result:
                ## Rename it, adding the subformat
                os.rename(
                    bibdoc_video_fullpath,
                    compose_file(bibdoc_video_directory,
                                 bibdoc_video_extension,
                                 bibdoc_video_subformat, 1,
                                 bibdoc_slave_video_docname))
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                             bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(
                        getval(job, 'bibdoc_description'), bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname',
                                          bibdoc_video_docname)
            tmpfname = (
                tmpdir + "/" + bibdoc_frame_docname + '.' +
                getval(profile, 'extension', getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(
                input_file=batch_job['input'],
                output_file=tmpfname,
                size=getval(job, 'size'),
                positions=getval(job, 'positions'),
                numberof=getval(job, 'numberof'),
                width=getval(job, 'width'),
                height=getval(job, 'height'),
                aspect=getval(batch_job, 'aspect'),
                profile=getval(job, 'profile'),
                update_fnc=_task_update_overall_status,
            )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(
                        filename)
                    _task_write_message("Creating new bibdoc for %s" %
                                        bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(
                            docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(
                        bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(
                            bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc" %
                                        (bibdoc_frame_docname,
                                         getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                        fname,
                        version=1,
                        description=getval(job, 'bibdoc_description'),
                        comment=getval(job, 'bibdoc_comment'),
                        docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file=getval(batch_job, 'input'),
                                 pbcoreIdentifier=batch_job['recid'],
                                 aspect_override=getval(batch_job, 'aspect'))
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern',
                      '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(
                getval(batch_job, 'notify_user'),
                getval(batch_job, 'submission_filename', batch_job['input']),
                getval(batch_job, 'recid'),
                getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str())):
                _notify_error_admin(batch_job, getval(batch_job,
                                                      'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(
                getval(batch_job, 'notify_user'),
                getval(batch_job, 'submission_filename', batch_job['input']),
                getval(batch_job, 'recid'),
                getval(batch_job, 'submission_title', ""))
    return 1