Exemplo n.º 1
0
    def _save_data_elements(self):
        """
        Serialize out data elements in mapping into the root directory.
        """
        with self._element_map_lock:
            self._log.debug("Serializing data elements into: %s",
                            self._root_dir)
            for uuid, de in self._element_map.iteritems():
                # Remove any temporary files an element may have generated
                de.clean_temp()

                md5 = de.md5()
                # Leaving off trailing chunk so that we don't have a single
                # directory per md5-sum.
                containing_dir = \
                    os.path.join(self._root_dir,
                                 *partition_string(md5, self._md5_chunk))
                if not os.path.isdir(containing_dir):
                    safe_create_dir(containing_dir)

                output_fname = os.path.join(
                    containing_dir,
                    self.SERIAL_FILE_TEMPLATE % (str(uuid), md5)
                )
                with open(output_fname, 'wb') as ofile:
                    cPickle.dump(de, ofile)
            self._log.debug("Serializing data elements -- Done")
Exemplo n.º 2
0
    def _containing_dir(self, uuid):
        """
        Return the containing directory for something with the given UUID value
        """
        if not self._uuid_chunk:
            # No sub-directory storage configured
            return self._root_dir

        return osp.join(self._root_dir, *partition_string(uuid, self._uuid_chunk))
Exemplo n.º 3
0
    def __init__(self,
                 type_name,
                 uuid,
                 save_dir,
                 subdir_split=None,
                 pickle_protocol=-1):
        """
        Initialize a file-base descriptor element.

        :param type_name: Type of classification. This is usually the name of
            the classifier that generated this result.
        :type type_name: str

        :param uuid: uuid for this classification
        :type uuid: collections.Hashable

        :param save_dir: Directory to save this element's contents. If this path
            is relative, we interpret as relative to the current working
            directory.
        :type save_dir: str | unicode

        :param subdir_split: If a positive integer, this will cause us to store
            the vector file in a subdirectory under the ``save_dir`` that was
            specified. The integer value specifies the number of splits that we
            will make in the stringification of this descriptor's UUID. If there
            happen to be dashes in this stringification, we will remove them
            (as would happen if given an uuid.UUID instance as the uuid
            element).
        :type subdir_split: None | int

        :param pickle_protocol: Pickling protocol to use. We will use -1 by
            default (latest version, probably binary).
        :type pickle_protocol: int

        """
        super(FileClassificationElement, self).__init__(type_name, uuid)

        self.save_dir = osp.abspath(osp.expanduser(save_dir))
        self.pickle_protocol = pickle_protocol

        # Saving components
        self.subdir_split = subdir_split
        if subdir_split and int(subdir_split) > 0:
            self.subdir_split = subdir_split = int(subdir_split)
            # Using all but the last split segment. This is so we don't create
            # a whole bunch of directories with a single element in them.
            save_dir = osp.join(
                self.save_dir,
                *partition_string(str(uuid).replace('-', ''),
                                  subdir_split)[:subdir_split - 1])
        else:
            save_dir = self.save_dir

        self.filepath = osp.join(
            save_dir,
            "%s.%s.classification.pickle" % (self.type_name, str(self.uuid)))
Exemplo n.º 4
0
    def _containing_dir(self, uuid):
        """
        Return the containing directory for something with the given UUID value
        """
        if not self._uuid_chunk:
            # No sub-directory storage configured
            return self._root_dir

        return osp.join(self._root_dir,
                        *partition_string(uuid, self._uuid_chunk))
Exemplo n.º 5
0
    def __init__(self, type_name, uuid, save_dir, subdir_split=None,
                 pickle_protocol=-1):
        """
        Initialize a file-base descriptor element.

        :param type_name: Type of classification. This is usually the name of
            the classifier that generated this result.
        :type type_name: str

        :param uuid: uuid for this classification
        :type uuid: collections.Hashable

        :param save_dir: Directory to save this element's contents. If this path
            is relative, we interpret as relative to the current working
            directory.
        :type save_dir: str | unicode

        :param subdir_split: If a positive integer, this will cause us to store
            the vector file in a subdirectory under the ``save_dir`` that was
            specified. The integer value specifies the number of splits that we
            will make in the stringification of this descriptor's UUID. If there
            happen to be dashes in this stringification, we will remove them
            (as would happen if given an uuid.UUID instance as the uuid
            element).
        :type subdir_split: None | int

        :param pickle_protocol: Pickling protocol to use. We will use -1 by
            default (latest version, probably binary).
        :type pickle_protocol: int

        """
        super(FileClassificationElement, self).__init__(type_name, uuid)

        self.save_dir = osp.abspath(osp.expanduser(save_dir))
        self.pickle_protocol = pickle_protocol

        # Saving components
        self.subdir_split = subdir_split
        if subdir_split and int(subdir_split) > 0:
            self.subdir_split = subdir_split = int(subdir_split)
            # Using all but the last split segment. This is so we don't create
            # a whole bunch of directories with a single element in them.
            save_dir = osp.join(self.save_dir,
                                *partition_string(str(uuid).replace('-', ''),
                                                  subdir_split)[:subdir_split-1]
                                )
        else:
            save_dir = self.save_dir

        self.filepath = osp.join(save_dir,
                                 "%s.%s.classification.pickle"
                                 % (self.type_name, str(self.uuid)))
Exemplo n.º 6
0
    def _get_checkpoint_dir(self, data):
        """
        The directory that contains checkpoint material for a given data element

        :param data: Data element
        :type data: smqtk.representation.DataElement

        :return: directory path
        :rtype: str

        """
        d = osp.join(self._work_dir, *partition_string(str(data.uuid()), 10))
        file_utils.safe_create_dir(d)
        return d
Exemplo n.º 7
0
    def _get_checkpoint_dir(self, data):
        """
        The directory that contains checkpoint material for a given data element

        :param data: Data element
        :type data: smqtk.data_rep.DataElement

        :return: directory path
        :rtype: str

        """
        d = osp.join(self._work_dir, *partition_string(data.md5(), 8))
        safe_create_dir(d)
        return d
Exemplo n.º 8
0
    def _get_checkpoint_dir(self, data):
        """
        The directory that contains checkpoint material for a given data element

        :param data: Data element
        :type data: smqtk.representation.DataElement

        :return: directory path
        :rtype: str

        """
        d = osp.join(self._work_dir, *partition_string(str(data.uuid()), 10))
        file_utils.safe_create_dir(d)
        return d
Exemplo n.º 9
0
    def _containing_dir(self, uuid):
        """
        Return the containing directory for something with the given UUID value
        """
        if not self._uuid_chunk:
            # No sub-directory storage configured
            return self._root_dir

        str_uuid = str(uuid)
        # TODO(paul.tunison): Modify uuid string if to short for set UUID chunk.
        #     e.g. if uuid is the integer 1 and chunk size is 10, we should
        #     convert strigified result to be at least length 10?
        #     Do this in _fp_for_uuid method?
        leading_parts = partition_string(str_uuid, self._uuid_chunk)[:-1]
        return osp.join(self._root_dir, *leading_parts)
Exemplo n.º 10
0
    def _containing_dir(self, uuid):
        """
        Return the containing directory for something with the given UUID value
        """
        if not self._uuid_chunk:
            # No sub-directory storage configured
            return self._root_dir

        str_uuid = str(uuid)
        # TODO(paul.tunison): Modify uuid string if to short for set UUID chunk.
        #     e.g. if uuid is the integer 1 and chunk size is 10, we should
        #     convert strigified result to be at least length 10?
        #     Do this in _fp_for_uuid method?
        leading_parts = partition_string(str_uuid, self._uuid_chunk)[:-1]
        return osp.join(self._root_dir, *leading_parts)
Exemplo n.º 11
0
    def __init__(self, type_str, uuid, save_dir, subdir_split=None):
        """
        Initialize a file-base descriptor element.

        :param type_str: Type of descriptor. This is usually the name of the
            content descriptor that generated this vector.
        :type type_str: str

        :param uuid: uuid for this descriptor
        :type uuid: collections.Hashable

        :param save_dir: Directory to save this element's contents. If this path
            is relative, we interpret as relative to the current working
            directory.
        :type save_dir: str | unicode

        :param subdir_split: If a positive integer and greater than 1, this will
            cause us to store the vector file in a subdirectory under the
            ``save_dir`` based on our ``uuid``. The integer value specifies the
            number of splits that we will make in the stringification of this
            descriptor's UUID. The last split component is left off when
            determining the save directory (thus the >1 above).

            Dashes are stripped from this string (as would happen if given an
            uuid.UUID instance as the uuid element).
        :type subdir_split: None | int

        """
        super(DescriptorFileElement, self).__init__(type_str, uuid)
        self._save_dir = osp.abspath(osp.expanduser(save_dir))
        self._subdir_split = subdir_split

        # Generate filepath from parameters
        if self._subdir_split and int(self._subdir_split) > 1:
            save_dir = osp.join(
                self._save_dir,
                *partition_string(str(self.uuid()).replace('-', ''),
                                  int(self._subdir_split))[:-1]
            )
        else:
            save_dir = self._save_dir
        self._vec_filepath = osp.join(save_dir,
                                      "%s.%s.vector.npy" % (self.type(),
                                                            str(self.uuid())))
Exemplo n.º 12
0
    def __init__(self, type_str, uuid, save_dir, subdir_split=None):
        """
        Initialize a file-base descriptor element.

        :param type_str: Type of descriptor. This is usually the name of the
            content descriptor that generated this vector.
        :type type_str: str

        :param uuid: uuid for this descriptor
        :type uuid: collections.Hashable

        :param save_dir: Directory to save this element's contents. If this path
            is relative, we interpret as relative to the current working
            directory.
        :type save_dir: str | unicode

        :param subdir_split: If a positive integer, this will cause us to store
            the vector file in a subdirectory under the ``save_dir`` that was
            specified. The integer value specifies the number of splits that we
            will make in the stringification of this descriptor's UUID. If there
            happen to be dashes in this stringification, we will remove them
            (as would happen if given an uuid.UUID instance as the uuid
            element).
        :type subdir_split: None | int

        """
        super(DescriptorFileElement, self).__init__(type_str, uuid)

        self._save_dir = osp.abspath(osp.expanduser(save_dir))

        # Saving components
        self._subdir_split = subdir_split
        if subdir_split and int(subdir_split) > 0:
            save_dir = osp.join(self._save_dir,
                                *partition_string(str(uuid).replace('-', ''),
                                                  int(subdir_split))
                                )
        else:
            save_dir = self._save_dir

        self._vec_filepath = osp.join(save_dir,
                                      "%s.%s.vector.npy" % (self._type_label,
                                                            str(uuid)))
Exemplo n.º 13
0
    def __init__(self, type_str, uuid, save_dir, subdir_split=None):
        """
        Initialize a file-base descriptor element.

        :param type_str: Type of descriptor. This is usually the name of the
            content descriptor that generated this vector.
        :type type_str: str

        :param uuid: uuid for this descriptor
        :type uuid: collections.Hashable

        :param save_dir: Directory to save this element's contents. If this path
            is relative, we interpret as relative to the current working
            directory.
        :type save_dir: str | unicode

        :param subdir_split: If a positive integer, this will cause us to store
            the vector file in a subdirectory under the ``save_dir`` that was
            specified. The integer value specifies the number of splits that we
            will make in the stringification of this descriptor's UUID. If there
            happen to be dashes in this stringification, we will remove them
            (as would happen if given an uuid.UUID instance as the uuid
            element).
        :type subdir_split: None | int

        """
        super(DescriptorFileElement, self).__init__(type_str, uuid)

        self._save_dir = osp.abspath(osp.expanduser(save_dir))

        # Saving components
        self._subdir_split = subdir_split
        if subdir_split and int(subdir_split) > 0:
            save_dir = osp.join(self._save_dir,
                                *partition_string(str(uuid).replace('-', ''),
                                                  int(subdir_split))
                                )
        else:
            save_dir = self._save_dir

        self._vec_filepath = osp.join(save_dir,
                                      "%s.%s.vector.npy" % (self._type_label,
                                                            str(uuid)))
Exemplo n.º 14
0
def ffmpeg_extract_frame_map(working_dir,
                             video_filepath,
                             second_offset=0,
                             second_interval=0,
                             max_duration=0,
                             frames=(),
                             output_image_ext="png",
                             parallel=None,
                             ffmpeg_exe='ffmpeg'):
    """
    Return a mapping of video frame index to image file in the given output
    format.

    If frames requested have not yet been extracted (based on what's contained
    in the specified output directory), they are done now. This means that this
    method could take a little time to complete if there are many frames in the
    video file and this is the first time this is being called.

    This may return an empty list if there are no frames in the video for
    the specified, or default, constraints.

    Extracted frames are cached in a directory structure under the provided
    ``working_dir`` directory path: ``<working_dir>/VideoFrameExtraction``.
    Frames are extracted into separate directories based on the SHA1 checksum of
    the video file.

    :raises RuntimeError: No frames were extracted.

    :param working_dir: Working directory for frame extraction to occur in.
    :type working_dir: str

    :param video_filepath: Path to the video to extract frames from.
    :type video_filepath: str

    :param second_offset: Seconds into the video to start extracting
    :type second_offset: float

    :param second_interval: Number of seconds between extracted frames
    :type second_interval: float

    :param max_duration: Maximum number of seconds worth of extracted frames
        (starting from the specified offset). If <=0, we extract until the end
        of the video.
    :type max_duration: float

    :param frames: Specific exact frame numbers within the video to extract.
        Providing explicit frames causes offset, interval and duration
        parameters to be ignored and only the frames specified here to be
        extracted and returned.
    :type frames: collections.Iterable[int]

    :param output_image_ext: Extension to use for output images.
    :type output_image_ext: str

    :param parallel: Number of processes to use for frame extraction. This is
        None by default, meaning that all available cores/threads are used.
    :type parallel: int or None

    :param ffmpeg_exe: ffmpeg executable to use for frame extraction. By
        default, we attempt to use what is available of the PATH.
    :type ffmpeg_exe: str or unicode

    :return: Map of frame-to-filepath for requested video frames
    :rtype: dict[int, str]

    """
    log = logging.getLogger('smqtk.utils.video_utils.extract_frame_map')

    video_md = get_metadata_info(video_filepath)
    video_sha1sum = hashlib.sha1(open(video_filepath, 'rb').read()).hexdigest()
    frame_output_dir = os.path.join(working_dir, "VideoFrameExtraction",
                                    *string_utils.partition_string(
                                        video_sha1sum, 10)
                                    # 40 hex chars split into chunks of 4
                                    )
    file_utils.safe_create_dir(frame_output_dir)

    def filename_for_frame(frame, ext):
        """
        method standard filename for a given frame file
        """
        return "%08d.%s" % (frame, ext.lstrip('.'))

    def iter_frames_for_interval():
        """
        Return a generator expression yielding frame numbers from the input
        video that match the given query parameters. Indices returned are
        0-based (i.e. first frame is 0, not 1).

        We are making a sensible assumption that we are not dealing with frame
        speeds of over 1000Hz and rounding frame frame times to the neared
        thousandth of a second to mitigate floating point error.

        :rtype: list of int

        """
        _log = logging.getLogger('smqtk.utils.video_utils.extract_frame_map'
                                 '.iter_frames_for_interval')
        num_frames = int(video_md.fps * video_md.duration)
        first_frame = second_offset * video_md.fps
        _log.debug("First frame: %f", first_frame)
        if max_duration > 0:
            cutoff_frame = min(float(num_frames),
                               (max_duration + second_offset) * video_md.fps)
        else:
            cutoff_frame = float(num_frames)
        _log.debug("Cutoff frame: %f", cutoff_frame)
        if second_interval:
            incr = second_interval * video_md.fps
        else:
            incr = 1.0
        _log.debug("Frame increment: %f", incr)

        # Interpolate
        yield first_frame
        next_frm = first_frame + incr
        while next_frm < cutoff_frame:
            _log.debug("-- adding frame: %f", next_frm)
            yield int(next_frm)
            next_frm += incr

    # noinspection PyShadowingNames
    def extract_frames(frames_to_process):
        """
        Extract specific frames from the input video file using ffmpeg. If not
        all frames could be extracted, we return what we were able to extract.

        :param frames_to_process: Mapping of frame-number:filepath pairs to
            extract from the input video.
        :type frames_to_process: dict[int,str or unicode]

        :return: List of frames that were successfully extracted.
        :rtype: list[int]

        """
        _log = logging.getLogger('smqtk.utils.video_utils.extract_frame_map'
                                 '.extract_frames')

        # Setup temp extraction directory
        tmp_extraction_dir = os.path.join(frame_output_dir, ".TMP")
        if os.path.isdir(tmp_extraction_dir):
            _log.debug("Existing temp director found, removing and starting "
                       "over")
            shutil.rmtree(tmp_extraction_dir, ignore_errors=True)
        os.makedirs(tmp_extraction_dir)

        p = multiprocessing.Pool(parallel)
        # Mapping of frame to (result, output_filepath)
        #: :type: dict[int, (AsyncResult, str)]
        rmap = {}
        for f, ofp in six.iteritems(frames_to_process):
            tfp = os.path.join(tmp_extraction_dir,
                               filename_for_frame(f, output_image_ext))
            t = f / video_md.fps
            rmap[f] = (p.apply_async(ffmpeg_extract_frame,
                                     args=(t, video_filepath, tfp,
                                           ffmpeg_exe)), tfp)
        p.close()
        # Check for failures
        extracted_frames = []
        for f, ofp in six.iteritems(frames_to_process):
            r, tfp = rmap[f]
            r.get()  # wait for finish
            if not os.path.isfile(tfp):
                _log.warn("Failed to generated file for frame %d", f)
            else:
                extracted_frames.append(f)
                os.rename(tfp, ofp)
        p.join()
        del p

        os.removedirs(tmp_extraction_dir)
        _log.debug("Frame extraction complete")

        return extracted_frames

    # Determine frames to extract from video
    extract_indices = set()
    if frames:
        log.debug("Only extracting specified frames: %s", frames)
        extract_indices.update(frames)
    else:
        log.debug(
            "Determining frames needed for specification: "
            "offset: %f, interval: %f, max_duration: %f", second_offset,
            second_interval, max_duration)
        extract_indices.update(iter_frames_for_interval())

    if not extract_indices:
        return {}

    # frame/filename map that will be returned based on requested frames
    frame_map = dict((i,
                      os.path.join(frame_output_dir,
                                   filename_for_frame(i, output_image_ext)))
                     for i in extract_indices)

    ###
    # Acquire a file-base lock in output directory so that we don't conflict
    # with another process extracting frames to the same directory.
    #
    # NOTE: This method is prone to starvation if many processes are trying
    #       to extract to the same video frames, but not yet probably due to
    #       existing use cases.
    #
    lock_file = os.path.join(frame_output_dir, '.lock')
    log.debug("Acquiring file lock in '%s'...", frame_output_dir)
    while not file_utils.exclusive_touch(lock_file):
        # This is sufficiently small to be fine grained, but not so small to
        # burn the CPU.
        time.sleep(0.01)
    log.debug("Acquiring file lock -> Acquired!")

    try:
        ###
        # Determine frames to actually extract base on existing files (if any)
        #
        #: :type: dict[int, str]
        frames_to_process = {}
        existing_frames = []
        for i, img_file in sorted(frame_map.items()):
            if not os.path.isfile(img_file):
                log.debug('frame %d needs processing', i)
                frames_to_process[i] = img_file
            else:
                existing_frames.append(i)

        ###
        # Extract needed frames via hook function that provides
        # implementation.
        #
        if frames_to_process:
            frames_extracted = extract_frames(frames_to_process)

            if (len(existing_frames) + len(frames_extracted)) == 0:
                raise RuntimeError("Failed to extract any frames for video")

        return frame_map
    finally:
        os.remove(lock_file)
Exemplo n.º 15
0
 def _containing_dir(self, uuid):
     """
     Return the containing directory for something with the given UUID value
     """
     return osp.join(self._root_dir,
                     *partition_string(uuid, self._uuid_chunk))
Exemplo n.º 16
0
def ffmpeg_extract_frame_map(
    working_dir,
    video_filepath,
    second_offset=0,
    second_interval=0,
    max_duration=0,
    frames=(),
    output_image_ext="png",
    parallel=None,
    ffmpeg_exe="ffmpeg",
):
    """
    Return a mapping of video frame index to image file in the given output
    format.

    If frames requested have not yet been extracted (based on what's contained
    in the specified output directory), they are done now. This means that this
    method could take a little time to complete if there are many frames in the
    video file and this is the first time this is being called.

    This may return an empty list if there are no frames in the video for
    the specified, or default, constraints.

    Extracted frames are cached in a directory structure under the provided
    ``working_dir`` directory path: ``<working_dir>/VideoFrameExtraction``.
    Frames are extracted into separate directories based on the SHA1 checksum of
    the video file.

    :raises RuntimeError: No frames were extracted.

    :param working_dir: Working directory for frame extraction to occur in.
    :type working_dir: str

    :param video_filepath: Path to the video to extract frames from.
    :type video_filepath: str

    :param second_offset: Seconds into the video to start extracting
    :type second_offset: float

    :param second_interval: Number of seconds between extracted frames
    :type second_interval: float

    :param max_duration: Maximum number of seconds worth of extracted frames
        (starting from the specified offset). If <=0, we extract until the end
        of the video.
    :type max_duration: float

    :param frames: Specific exact frame numbers within the video to extract.
        Providing explicit frames causes offset, interval and duration
        parameters to be ignored and only the frames specified here to be
        extracted and returned.
    :type frames: collections.Iterable[int]

    :param parallel: Number of processes to use for frame extraction. This is
        None by default, meaning that all available cores/threads are used.
    :type parallel: int or None

    :param ffmpeg_exe: ffmpeg executable to use for frame extraction. By
        default, we attempt to use what is available of the PATH.
    :type ffmpeg_exe: str or unicode

    :return: Map of frame-to-filepath for requested video frames
    :rtype: dict of (int, str)

    """
    log = logging.getLogger("smqtk.utils.video_utils.extract_frame_map")

    video_md = get_metadata_info(video_filepath)
    video_sha1sum = hashlib.sha1(open(video_filepath, "rb").read()).hexdigest()
    frame_output_dir = os.path.join(
        working_dir,
        "VideoFrameExtraction",
        *string_utils.partition_string(video_sha1sum, 10)
        # 40 hex chars split into chunks of 4
    )
    file_utils.safe_create_dir(frame_output_dir)

    def filename_for_frame(frame, ext):
        """
        method standard filename for a given frame file
        """
        return "%08d.%s" % (frame, ext.lstrip("."))

    def iter_frames_for_interval():
        """
        Return a generator expression yielding frame numbers from the input
        video that match the given query parameters. Indices returned are
        0-based (i.e. first frame is 0, not 1).

        We are making a sensible assumption that we are not dealing with frame
        speeds of over 1000Hz and rounding frame frame times to the neared
        thousandth of a second to mitigate floating point error.

        :rtype: list of int

        """
        _log = logging.getLogger("smqtk.utils.video_utils.extract_frame_map" ".iter_frames_for_interval")
        num_frames = int(video_md.fps * video_md.duration)
        first_frame = second_offset * video_md.fps
        _log.debug("First frame: %f", first_frame)
        if max_duration > 0:
            cutoff_frame = min(float(num_frames), (max_duration + second_offset) * video_md.fps)
        else:
            cutoff_frame = float(num_frames)
        _log.debug("Cutoff frame: %f", cutoff_frame)
        if second_interval:
            incr = second_interval * video_md.fps
        else:
            incr = 1.0
        _log.debug("Frame increment: %f", incr)

        # Interpolate
        yield first_frame
        next_frm = first_frame + incr
        while next_frm < cutoff_frame:
            _log.debug("-- adding frame: %f", next_frm)
            yield int(next_frm)
            next_frm += incr

    def extract_frames(frames_to_process):
        """
        Extract specific frames from the input video file using ffmpeg. If not
        all frames could be extracted, we return what we were able to extract.

        :param frames_to_process: Mapping of frame-number:filepath pairs to
            extract from the input video.
        :type frames_to_process: dict[int,str or unicode]

        :return: List of frames that were successfully extracted.
        :rtype: list[int]

        """
        _log = logging.getLogger("smqtk.utils.video_utils.extract_frame_map" ".extract_frames")

        # Setup temp extraction directory
        tmp_extraction_dir = os.path.join(frame_output_dir, ".TMP")
        if os.path.isdir(tmp_extraction_dir):
            _log.debug("Existing temp director found, removing and starting " "over")
            shutil.rmtree(tmp_extraction_dir, ignore_errors=True)
        os.makedirs(tmp_extraction_dir)

        p = multiprocessing.Pool(parallel)
        # Mapping of frame to (result, output_filepath)
        #: :type: dict of (int, (AsyncResult, str))
        rmap = {}
        for f, ofp in frames_to_process.iteritems():
            tfp = os.path.join(tmp_extraction_dir, filename_for_frame(f, output_image_ext))
            t = f / video_md.fps
            rmap[f] = (p.apply_async(ffmpeg_extract_frame, args=(t, video_filepath, tfp, ffmpeg_exe)), tfp)
        p.close()
        # Check for failures
        extracted_frames = []
        for f, ofp in frames_to_process.iteritems():
            r, tfp = rmap[f]
            r.get()  # wait for finish
            if not os.path.isfile(tfp):
                _log.warn("Failed to generated file for frame %d", f)
            else:
                extracted_frames.append(f)
                os.rename(tfp, ofp)
        p.join()
        del p

        os.removedirs(tmp_extraction_dir)
        _log.debug("Frame extraction complete")

        return extracted_frames

    # Determine frames to extract from video
    extract_indices = set()
    if frames:
        log.debug("Only extracting specified frames: %s", frames)
        extract_indices.update(frames)
    else:
        log.debug(
            "Determining frames needed for specification: " "offset: %f, interval: %f, max_duration: %f",
            second_offset,
            second_interval,
            max_duration,
        )
        extract_indices.update(iter_frames_for_interval())

    if not extract_indices:
        return {}

    # frame/filename map that will be returned based on requested frames
    frame_map = dict(
        (i, os.path.join(frame_output_dir, filename_for_frame(i, output_image_ext))) for i in extract_indices
    )

    ###
    # Acquire a file-base lock in output directory so that we don't conflict
    # with another process extracting frames to the same directory.
    #
    # NOTE: This method is prone to starvation if many processes are trying
    #       to extract to the same video frames, but not yet probably due to
    #       existing use cases.
    #
    lock_file = os.path.join(frame_output_dir, ".lock")
    log.debug("Acquiring file lock in '%s'...", frame_output_dir)
    while not file_utils.exclusive_touch(lock_file):
        # This is sufficiently small to be fine grained, but not so small to
        # burn the CPU.
        time.sleep(0.01)
    log.debug("Acquiring file lock -> Acquired!")

    try:
        ###
        # Determine frames to actually extract base on existing files (if any)
        #
        #: :type: dict[int, str]
        frames_to_process = {}
        existing_frames = []
        for i, img_file in sorted(frame_map.items()):
            if not os.path.isfile(img_file):
                log.debug("frame %d needs processing", i)
                frames_to_process[i] = img_file
            else:
                existing_frames.append(i)

        ###
        # Extract needed frames via hook function that provides
        # implementation.
        #
        if frames_to_process:
            frames_extracted = extract_frames(frames_to_process)

            if (len(existing_frames) + len(frames_extracted)) == 0:
                raise RuntimeError("Failed to extract any frames for video")

        return frame_map
    finally:
        os.remove(lock_file)