Exemplo n.º 1
0
    def __init__(self, id_vector, bg_clip_ids, feature_mat, kernel_mat,
                 rw_lock=None):
        """ Initialize this FeatureMemory object

        This class must be used with numpy ndarray and matrix classes for shared
        memory purposes.

        NOTE: Arrays and matrices given here must own their data! This is
        currently required in order to resize them later when updating with new
        feature vectors. A ValueError will be thrown if an given array/matrix
        does not own its data.

        TODO: Allow kernel matrix to be optional, causing it to be built from
        the provided feature matrix (not a recommended action).

        :param id_vector: (numpy) Array of clip IDs. This is used as the map
            from an index position to the clip ID its associated with in the
            kernel and distance kernel matrices.
        :type id_vector: ndarray of int
        :param bg_clip_ids: Set of clip IDs that are to be treated as background
            clip IDs.
        :type bg_clip_ids: set of int
        :param feature_mat: (numpy) Matrix of features for clip IDs. Features
            should be stored vertically, i.e. Each row is a feature for a
            particular clip ID (id_vector being the index-to-clipID map).
        :type feature_mat: matrix of double
        :param kernel_mat: (numpy) Matrix detailing the distances between
            feature vectors. This must be a square, symmetric matrix.
        :type kernel_mat: matrix of double
        :param rw_lock: Optional ReadWriteLock for this instance to use. If not
            provided, we will create our own.
        :type rw_lock: None or ReadWriteLock

        """
        # assert isinstance(id_vector, (ndarray, ArrayProxy)), \
        #     "ID vector not given as a numpy.ndarray!"
        assert isinstance(bg_clip_ids, (set, frozenset)), \
            "Background ID vector not a numpy.ndarray!"
        # assert isinstance(feature_mat, (matrix, MatrixProxy)), \
        #     "Kernel matrix not a numpy.matrix!"
        # assert isinstance(kernel_mat, (matrix, MatrixProxy)), \
        #     "Distance kernel not a numpy.matrix!"

        # noinspection PyUnresolvedReferences
        # -> base IS a member of the matrix class...
        if id_vector.base is not None:
            raise ValueError("Given ``id_vector`` does not own its data! It "
                             "will not be transformable later.")
        elif feature_mat.base is not None:
            raise ValueError("Given ``feature_mat`` does not own its data! It "
                             "will not be transformable later.")
        elif kernel_mat.base is not None:
            raise ValueError("Given ``kernel_mat`` does not own its data! It "
                             "will not be transformable later.")

        # The kernel should be square and should be the same size as the feature
        # matrix's number or rows (unique stored clip features).
        if not (kernel_mat.shape[0] == kernel_mat.shape[1] == feature_mat.shape[0]):
            raise ValueError("The distance kernel matrix provided is either "
                             "misshapen or conflicts with the dimensions of "
                             "the provided feature matrix. (kernel matrix "
                             "shape: %s, num feature vectors: %d"
                             % (kernel_mat.shape, feature_mat.shape[0]))

        self._log.debug("Lock given: %s", rw_lock)
        if rw_lock:
            assert isinstance(rw_lock, ReadWriteLock), \
                "Not given a value ReadWriteLock instance!"
            self._rw_lock = rw_lock
        else:
            self._log.debug("Falling back on bad lock given (given: %s)",
                            type(rw_lock))
            self._rw_lock = ReadWriteLock()

        self._id_vector = id_vector
        self._bg_clip_ids = bg_clip_ids
        self._feature_mat = feature_mat
        self._kernel_mat = kernel_mat

        # Helper structure mapping clipIDs to their row index
        self._cid2idx_map = dict((cid, idx) for idx, cid
                                 in enumerate(self._id_vector))
Exemplo n.º 2
0
    def __init__(self,
                 row_id_index_map,
                 col_id_index_map,
                 kernel_mat,
                 bg_clip_ids=None,
                 rw_lock=None):
        """
        Initialize the kernel matrix. The initialization values will more than
        likely be proxies to np.matrix objects.

        The ``bg_clip_ids`` array may be given when this kernel matrix is to be
        a square, symmetric kernel and activates the use of the
        ``symmetric_submatrix`` method. This array must list clip IDs that are
        to be considered "background" IDs, or clips that are to always be
        considered negative. These clip IDs must be included in symmetric
        sub-matrices.

        This array must be the same dimension as
        the row and column indices, containing boolean flags. These flags mark
        that the clip ID found at the same index in the edge ID maps should be
        considered a "background" ID, or one that is always treated as a
        negative. This is for the stipulation in the symmetric_submatrix method
        that it always includes the background ID set in the submatrix.

        :param row_id_index_map: Array of clip IDs associated to row indices.
            Contents will be treated as ints.
        :type row_id_index_map: ndarray of int
        :param col_id_index_map: Array of clip IDs associated to row indices.
            Contents will be treated as ints.
        :type col_id_index_map: ndarray of int
        :param kernel_mat: Kernel data matrix.
        :type kernel_mat: matrix
        :param bg_clip_ids: Optional array of boolean flags, marking whether an
            index should be considered a "background" video. Contents will be
            treated as ints.
        :type bg_clip_ids: set of int
        :param rw_lock: Read-Write lock for data provided. This should be
            provided if the any of the data is shared with other objects/
            sources. If this is given None (default), then a lock is created.
        :type rw_lock: ReadWriteLock or None

        """
        # TODO: Possibly add checks for the id arrays like there is for the
        #       bgclipid array (int-able contents)
        assert row_id_index_map.shape[0] == kernel_mat.shape[0], \
            "Length of row index map and kernel row count did not match! " \
            "(row index map: %d, kernel row count: %d)" \
            % (row_id_index_map.shape[0], kernel_mat.shape[0])
        assert col_id_index_map.shape[0] == kernel_mat.shape[1], \
            "Length of col index map and kernel col count did not match! " \
            "(col index map: %d, kernel col count: %d)" \
            % (col_id_index_map.shape[0], kernel_mat.shape[1])

        self._row_id_index_map = row_id_index_map
        self._col_id_index_map = col_id_index_map
        self._kernel = kernel_mat

        assert ((bg_clip_ids is None)
                or isinstance(bg_clip_ids, (set, frozenset))), \
            "Must either given None or a set for the bg_clip_ids " \
            "vector. Got: %s" % type(bg_clip_ids)
        self._bg_cid_set = bg_clip_ids
        if bg_clip_ids is not None:
            try:
                [int(e) for e in bg_clip_ids]
            except Exception:
                raise ValueError("Not all of the contents of of bg_clip_ids "
                                 "could be treated as ints!")

        if rw_lock:
            assert isinstance(rw_lock, ReadWriteLock), \
                "Did not receive valid istance of RW Lock. Got '%s'" \
                % type(rw_lock)
            self._rw_lock = rw_lock
        else:
            self._rw_lock = ReadWriteLock()
Exemplo n.º 3
0
class FeatureMemory (object):
    """
    Class for encapsulating and managing feature and kernel matrices for
    different feature types
    """

    @classmethod
    def construct_from_files(cls, id_vector_file, bg_flags_file,
                             feature_mat_file, kernel_mat_file, rw_lock=None):
        """ Initialize FeatureMemory object from file sources.

        :param id_vector_file: File containing the numpy.savetxt(...) output of
            clip ID values in the order in which they associate to the rows of
            the kernel matrix.
        :type id_vector_file: str
        :param feature_mat_file: File containing the kernel matrix as saved by
            numpy.save(...) (saved as an ndarray, converted to matrix on load).
        :type feature_mat_file: str
        :param kernel_mat_file: File containing the kernel matrix as saved by
            numpy.save(...) (saved as an ndarray, converted to matrix on load).
        :type kernel_mat_file: str
        :param bg_flags_file: Optional file containing output of
            numpy.savetxt(...) where each index maps a row index of the kernel
            to whether or not the associated clip ID should be considered a
            background video or not.
        :type bg_flags_file: str
        :return: Symmetric FeatureMemory constructed with the data provided in
            the provided files.
        :rtype: FeatureMemory

        """
        clip_ids = np.array(np.load(id_vector_file))
        bg_flags = np.array(np.load(bg_flags_file))
        # noinspection PyCallingNonCallable
        feature_mat = np.matrix(np.load(feature_mat_file))
        # noinspection PyCallingNonCallable
        kernel_mat = np.matrix(np.load(kernel_mat_file))

        bg_clips = set([clip_ids[i]
                        for i, f in enumerate(bg_flags)
                        if f])

        return FeatureMemory(clip_ids, bg_clips, feature_mat, kernel_mat,
                             rw_lock=rw_lock)

    @property
    def _log(self):
        return logging.getLogger('.'.join([self.__module__,
                                           self.__class__.__name__]))

    def __init__(self, id_vector, bg_clip_ids, feature_mat, kernel_mat,
                 rw_lock=None):
        """ Initialize this FeatureMemory object

        This class must be used with numpy ndarray and matrix classes for shared
        memory purposes.

        NOTE: Arrays and matrices given here must own their data! This is
        currently required in order to resize them later when updating with new
        feature vectors. A ValueError will be thrown if an given array/matrix
        does not own its data.

        TODO: Allow kernel matrix to be optional, causing it to be built from
        the provided feature matrix (not a recommended action).

        :param id_vector: (numpy) Array of clip IDs. This is used as the map
            from an index position to the clip ID its associated with in the
            kernel and distance kernel matrices.
        :type id_vector: ndarray of int
        :param bg_clip_ids: Set of clip IDs that are to be treated as background
            clip IDs.
        :type bg_clip_ids: set of int
        :param feature_mat: (numpy) Matrix of features for clip IDs. Features
            should be stored vertically, i.e. Each row is a feature for a
            particular clip ID (id_vector being the index-to-clipID map).
        :type feature_mat: matrix of double
        :param kernel_mat: (numpy) Matrix detailing the distances between
            feature vectors. This must be a square, symmetric matrix.
        :type kernel_mat: matrix of double
        :param rw_lock: Optional ReadWriteLock for this instance to use. If not
            provided, we will create our own.
        :type rw_lock: None or ReadWriteLock

        """
        # assert isinstance(id_vector, (ndarray, ArrayProxy)), \
        #     "ID vector not given as a numpy.ndarray!"
        assert isinstance(bg_clip_ids, (set, frozenset)), \
            "Background ID vector not a numpy.ndarray!"
        # assert isinstance(feature_mat, (matrix, MatrixProxy)), \
        #     "Kernel matrix not a numpy.matrix!"
        # assert isinstance(kernel_mat, (matrix, MatrixProxy)), \
        #     "Distance kernel not a numpy.matrix!"

        # noinspection PyUnresolvedReferences
        # -> base IS a member of the matrix class...
        if id_vector.base is not None:
            raise ValueError("Given ``id_vector`` does not own its data! It "
                             "will not be transformable later.")
        elif feature_mat.base is not None:
            raise ValueError("Given ``feature_mat`` does not own its data! It "
                             "will not be transformable later.")
        elif kernel_mat.base is not None:
            raise ValueError("Given ``kernel_mat`` does not own its data! It "
                             "will not be transformable later.")

        # The kernel should be square and should be the same size as the feature
        # matrix's number or rows (unique stored clip features).
        if not (kernel_mat.shape[0] == kernel_mat.shape[1] == feature_mat.shape[0]):
            raise ValueError("The distance kernel matrix provided is either "
                             "misshapen or conflicts with the dimensions of "
                             "the provided feature matrix. (kernel matrix "
                             "shape: %s, num feature vectors: %d"
                             % (kernel_mat.shape, feature_mat.shape[0]))

        self._log.debug("Lock given: %s", rw_lock)
        if rw_lock:
            assert isinstance(rw_lock, ReadWriteLock), \
                "Not given a value ReadWriteLock instance!"
            self._rw_lock = rw_lock
        else:
            self._log.debug("Falling back on bad lock given (given: %s)",
                            type(rw_lock))
            self._rw_lock = ReadWriteLock()

        self._id_vector = id_vector
        self._bg_clip_ids = bg_clip_ids
        self._feature_mat = feature_mat
        self._kernel_mat = kernel_mat

        # Helper structure mapping clipIDs to their row index
        self._cid2idx_map = dict((cid, idx) for idx, cid
                                 in enumerate(self._id_vector))

    @staticmethod
    def _histogram_intersection_distance(a, b):
        """
        Calculates distance between two vectors using histogram intersection.

        Non-branching version of the histogram intersection algorithm.

        :param a: A vector in array form.
        :type a: ndarray
        :param b: A vector in array form.
        :type b: ndarray

        :return: Histogram Intersection (HI) distance scalar
        :rtype: double

        """
        # noinspection PyUnresolvedReferences
        return (a + b - np.abs(a - b)).sum() * 0.5

    def get_ids(self):
        """
        NOTE: NOT THREAD SAFE. Use the returned structure only in conjunction
        with this object's lock when in a parallel environment to prevent
        possible memory corruption.

        :return: Ordered vector of clip IDs along the row-edge of this object's
            feature matrix and along both edges of the kernel matrix.
        :rtype: numpy.core.multiarray.ndarray

        """
        return self._id_vector

    def get_bg_ids(self):
        """
        NOTE: NOT THREAD SAFE. Use the returned structure only in conjunction
        with this object's lock when in a parallel environment to prevent
        possible memory corruption.

        :return: Ordered vector of clip IDs that we are treating as background
            clips.
        :rtype: ndarray

        """
        return frozenset(self._bg_clip_ids)

    def get_feature_matrix(self):
        """
        NOTE: NOT THREAD SAFE. Use the returned structure only in conjunction
        with this object's lock when in a parallel environment to prevent
        possible memory corruption.

        :return: Matrix recording feature vectors for a feature type. See the
            id vector for row-wise index-to-clipID association.
        :rtype: numpy.matrixlib.defmatrix.matrix

        """
        return self._feature_mat

    def get_kernel_matrix(self):
        """
        NOTE: NOT THREAD SAFE. Use the returned structure only in conjunction
        with this object's lock when in a parallel environment to prevent
        possible memory corruption.

        :return: Symmetric matrix detailing the distances between any two clip
            ID features. Distances are computed via histogram intersection.
        :rtype: matrix

        """
        return self._kernel_mat

    def get_lock(self):
        """
        :return: a reference to this object's read/write lock.
        :rtype: ReadWriteLock

        """
        return self._rw_lock

    def get_distance_kernel(self):
        """
        DistanceKernel object constructed from this feature's current state.

        :return: This feature distance kernel.
        :rtype: DistanceKernel

        """
        with self._rw_lock.read_lock():
            return DistanceKernel(self._id_vector, self._id_vector,
                                  self._kernel_mat, self._bg_clip_ids,
                                  self._rw_lock)

    def get_feature(self, *clip_id_or_ids):
        """
        Return the a matrix where each row is the feature vector for one or more
        clip IDs. The given list of clip IDs given acts as the index-to-clipID
        map for the returned matrix's rows. If repeat clip IDs are provided in
        the input, there will be repeat feature vectors in the returned matrix.

        Raises ValueError if the given clip ID is not represented in the current
        matrix.

        :param clip_id_or_ids: One or more integer clip IDs to retrieve the
            feature vectors for.
        :type clip_id_or_ids: tuple of int

        :return: NxM matrix, where N is the number of clip IDs requested and M
            is the length of a feature vector for this vector.
        :rtype: np.matrix

        """
        assert all(isinstance(e, int) for e in clip_id_or_ids), \
            "Not given an integer or a valid iterable over integers!"

        with self._rw_lock.read_lock():
            # rows = num of IDs given, cols = width of feature matrix
            with SimpleTimer("Allocating return matrix", self._log.debug):
                # noinspection PyUnresolvedReferences
                # -> matrix class DOES have ``dtype`` property...
                ret_mat = matrix(ndarray((len(clip_id_or_ids),
                                          self._feature_mat.shape[1]),
                                         self._feature_mat.dtype))
            for i, cid in enumerate(clip_id_or_ids):
                feature_idx = self._cid2idx_map[cid]
                ret_mat[i, :] = self._feature_mat[feature_idx, :]
            return ret_mat

    # noinspection PyUnresolvedReferences,PyCallingNonCallable
    def update(self, clip_id, feature_vec=None, is_background=False, timeout=None):
        """
        Update this feature with a feature vector associated with a clip ID. If
        clip ID is already in the feature matrix, we replace the current vector
        with the given one.

        Either way, the distance kernel is updated with either a new row/column,
        or updating relevant slots in the existing distance kernel.

        :raise ValueError: if the given feature vector is not compatible with
            our feature vector.
        :raise RuntimeError: If a timeout is given and the underlying write lock
            doesn't acquire in that amount of time.

        :param clip_id: The ID of the clip the given ``feature_vec`` represents.
        :type clip_id: int
        :param feature_vec: Feature vector associated to the given clip ID.
        :type feature_vec: ndarray
        :param is_background: Flag declaring that this clip ID represents a
            background feature.
        :type is_background: bool
        :param timeout: Timeout seconds for the underlying write lock to acquire
            before a RuntimeError is thrown.
        :type timeout: None or int or float

        """
        with self._rw_lock.write_lock(timeout):
            clip_id = int(clip_id)
            if feature_vec is not None and \
                    not (feature_vec.ndim == 1
                         and len(feature_vec) == self._feature_mat.shape[1]):
                raise ValueError("Given feature vector not compatible "
                                 "(dimensionality or length does not match)")

            # Update the given feature vector and kernel distances
            if self._cid2idx_map.get(clip_id, None) is not None:
                # In all cases, update the background status of the clip
                if is_background:
                    self._bg_clip_ids.add(clip_id)
                else:
                    self._bg_clip_ids.discard(clip_id)

                # If we were given a new feature vector, update entries
                if feature_vec is not None:
                    idx = self._cid2idx_map[clip_id]
                    self._feature_mat[idx] = feature_vec
                    new_dist = np.mat(tuple(
                        self._histogram_intersection_distance(feature_vec, fv)
                        for fv in self._feature_mat
                    ))
                    self._kernel_mat[idx, :] = new_dist
                    self._kernel_mat[:, idx] = new_dist

            # Given a new feature to add.
            else:
                if feature_vec is None:
                    raise ValueError("Update given a new clip ID, but no "
                                     "feature vector provided.")

                # Update internal feature matrix with added vector
                self._cid2idx_map[clip_id] = self._id_vector.size
                self._id_vector.resize((self._id_vector.size + 1,),
                                       refcheck=False)
                self._id_vector[-1] = clip_id

                if is_background:
                    self._bg_clip_ids.add(clip_id)

                # noinspection PyUnresolvedReferences
                if self._feature_mat.base is not None:
                    raise RuntimeError("Feature matrix does not own its data")
                # Since we're only adding a new row, this resize does not affect
                # the positioning of the existing data.
                # noinspection PyUnresolvedReferences
                self._feature_mat.resize((self._feature_mat.shape[0] + 1,
                                          self._feature_mat.shape[1]),
                                         refcheck=False
                                         )
                self._feature_mat[-1, :] = feature_vec

                # Need to add a new row AND column to the distance kernel.
                if self._kernel_mat.base is not None:
                    raise RuntimeError("kernel matrix does not own its data")
                assert self._kernel_mat.shape[0] == self._kernel_mat.shape[1], \
                    "kernel matrix is not symmetric for some reason???"
                # noinspection PyPep8Naming
                # -> because I like ``N`` better...
                N = self._kernel_mat.shape[0]
                kernel_copy = np.matrix(self._kernel_mat)
                self._kernel_mat.resize((N+1, N+1), refcheck=False)
                self._kernel_mat[:N, :N] = kernel_copy
                del kernel_copy

                # Computing new feature distance (histogram intersection). Only
                # need to compute this once because of HI being being
                # commutative and the kernel matrix being symmetric.
                dist_vec = np.mat(tuple(
                    self._histogram_intersection_distance(feature_vec, fv)
                    for fv in self._feature_mat
                ))
                self._kernel_mat[-1, :] = dist_vec
                self._kernel_mat[:, -1] = dist_vec.T
Exemplo n.º 4
0
class DistanceKernel(object):
    """
    Feature Distance Kernel object.

    This class allows the kernel to either be symmetric or not. If it is
    symmetric, the ``symmetric_submatrix`` function becomes available.

    Intended to be used with ProxyManager proxy objects (given at
    construction)

    MONKEY PATCHING:
    When using this object directly (not using the ProxyManager stuff) and
    sending it over pipes, the ReadWriteLock needs to be monkey patched out (the
    multiprocessing.Condition variable doesn't play nicely). Need to set an
    instance of a DummyRWLock to the DistanceKernel._rw_lock property. For
    example:

        ...
        dk = ...
        dk._rw_lock = DummyRWLock()
        <send dk into a pipe>
        ...

    """
    @classmethod
    def construct_symmetric_from_files(cls,
                                       id_vector_file,
                                       kernel_mat_file,
                                       bg_flags_file=None):
        """
        Construct a symmetric DistanceKernel object, requiring a background
        flags file to denote clip IDs that are to be treated as background
        clips (required to activate symmetric_submatrix function). Such a

        DistanceKernel is usually used with event learning and should be
        provided a background flags file also.

        :param id_vector_file: File containing the numpy.savetxt(...) output of
            clip ID values in the order in which they associate to the rows of
            the kernel matrix.
        :type id_vector_file: str
        :param kernel_mat_file: File containing the kernel matrix as saved by
            numpy.save(...) (saved as an ndarray, converted to matrix on load).
        :type kernel_mat_file: str
        :param bg_flags_file: Optional file containing output of
            numpy.savetxt(...) where each index maps a row index of the kernel
            to whether or not the associated clip ID should be considered a
            background video or not.
        :type bg_flags_file: str
        :return: Symmetric DistanceKernel constructed with the data provided in
            the provided files.
        :rtype: DistanceKernel

        """
        clip_ids = np.array(np.loadtxt(id_vector_file))
        # noinspection PyCallingNonCallable
        kernel_mat = np.matrix(np.load(kernel_mat_file))

        if bg_flags_file is not None:
            bg_flags = np.array(np.loadtxt(bg_flags_file))
            bg_clips = np.array(
                [clip_ids[i] for i, e in enumerate(bg_flags) if e])
        else:
            bg_clips = None

        return DistanceKernel(clip_ids, clip_ids, kernel_mat, bg_clips)

    @classmethod
    def construct_asymmetric_from_files(cls, row_ids_file, col_ids_file,
                                        kernel_mat_file):
        """
        Construct an asymmetric DistanceKernel object, usually used for archive
        searches.

        No option for providing background clip IDs as asymmetric kernels are
        NOT used for learning purposes.

        :param row_ids_file: File containing the numpy.savetxt(...) output of
            clip ID values in the order in which they associate to the rows of
            the given kernel matrix.
        :type row_ids_file: str
        :param col_ids_file: File containing the numpy.savetxt(...) output of
            clip ID values in the order in which they associate to the columns
            of the given kernel matrix.
        :type col_ids_file: str
        :param kernel_mat_file: File containing the kernel matrix as saved by
            numpy.save(...) (saved as an ndarray, converted to matrix on load).
        :type kernel_mat_file: str
        :return: Asymmetric DistanceKernel constructed with the data provided in
            the provided files.
        :rtype: DistanceKernel

        """
        row_cids = np.array(np.loadtxt(row_ids_file))
        col_cids = np.array(np.loadtxt(col_ids_file))
        # noinspection PyCallingNonCallable
        kernel_mat = np.matrix(np.load(kernel_mat_file))
        return DistanceKernel(row_cids, col_cids, kernel_mat)

    @property
    def _log(self):
        return logging.getLogger('.'.join(
            [self.__module__, self.__class__.__name__]))

    def __init__(self,
                 row_id_index_map,
                 col_id_index_map,
                 kernel_mat,
                 bg_clip_ids=None,
                 rw_lock=None):
        """
        Initialize the kernel matrix. The initialization values will more than
        likely be proxies to np.matrix objects.

        The ``bg_clip_ids`` array may be given when this kernel matrix is to be
        a square, symmetric kernel and activates the use of the
        ``symmetric_submatrix`` method. This array must list clip IDs that are
        to be considered "background" IDs, or clips that are to always be
        considered negative. These clip IDs must be included in symmetric
        sub-matrices.

        This array must be the same dimension as
        the row and column indices, containing boolean flags. These flags mark
        that the clip ID found at the same index in the edge ID maps should be
        considered a "background" ID, or one that is always treated as a
        negative. This is for the stipulation in the symmetric_submatrix method
        that it always includes the background ID set in the submatrix.

        :param row_id_index_map: Array of clip IDs associated to row indices.
            Contents will be treated as ints.
        :type row_id_index_map: ndarray of int
        :param col_id_index_map: Array of clip IDs associated to row indices.
            Contents will be treated as ints.
        :type col_id_index_map: ndarray of int
        :param kernel_mat: Kernel data matrix.
        :type kernel_mat: matrix
        :param bg_clip_ids: Optional array of boolean flags, marking whether an
            index should be considered a "background" video. Contents will be
            treated as ints.
        :type bg_clip_ids: set of int
        :param rw_lock: Read-Write lock for data provided. This should be
            provided if the any of the data is shared with other objects/
            sources. If this is given None (default), then a lock is created.
        :type rw_lock: ReadWriteLock or None

        """
        # TODO: Possibly add checks for the id arrays like there is for the
        #       bgclipid array (int-able contents)
        assert row_id_index_map.shape[0] == kernel_mat.shape[0], \
            "Length of row index map and kernel row count did not match! " \
            "(row index map: %d, kernel row count: %d)" \
            % (row_id_index_map.shape[0], kernel_mat.shape[0])
        assert col_id_index_map.shape[0] == kernel_mat.shape[1], \
            "Length of col index map and kernel col count did not match! " \
            "(col index map: %d, kernel col count: %d)" \
            % (col_id_index_map.shape[0], kernel_mat.shape[1])

        self._row_id_index_map = row_id_index_map
        self._col_id_index_map = col_id_index_map
        self._kernel = kernel_mat

        assert ((bg_clip_ids is None)
                or isinstance(bg_clip_ids, (set, frozenset))), \
            "Must either given None or a set for the bg_clip_ids " \
            "vector. Got: %s" % type(bg_clip_ids)
        self._bg_cid_set = bg_clip_ids
        if bg_clip_ids is not None:
            try:
                [int(e) for e in bg_clip_ids]
            except Exception:
                raise ValueError("Not all of the contents of of bg_clip_ids "
                                 "could be treated as ints!")

        if rw_lock:
            assert isinstance(rw_lock, ReadWriteLock), \
                "Did not receive valid istance of RW Lock. Got '%s'" \
                % type(rw_lock)
            self._rw_lock = rw_lock
        else:
            self._rw_lock = ReadWriteLock()

    def get_lock(self):
        """
        :return: This object's read/write lock.
        :rtype: ReadWriteLock
        """
        return self._rw_lock

    def row_id_map(self):
        """
        :return: Row index-to-clipID map
        :rtype: ndarray
        """
        with self.get_lock().read_lock():
            return self._row_id_index_map

    def col_id_map(self):
        """
        :return: Column index-to-clipID map
        :rtype: ndarray
        """
        with self.get_lock().read_lock():
            return self._col_id_index_map

    def get_kernel_matrix(self):
        """
        RETURNED OBJECTS NOT THREAD/PROCESS SAFE. Once retrieved, if
        matrix may be modified by another thread/process

        :return: The underlying kernel matrix.
        :rtype: matrix

        """
        with self.get_lock().read_lock():
            return self._kernel

    def get_background_ids(self):
        """
        RETURNED OBJECTS NOT THREAD/PROCESS SAFE

        :return: The set of background clip IDs. May be None if there was no
            background set initialized.
        :rtype: None or frozenset

        """
        with self.get_lock().read_lock():
            return frozenset(self._bg_cid_set) \
                if self._bg_cid_set is not None \
                else frozenset()

    def is_symmetric(self):
        """
        :return: True if this is a square kernel matrix. This means that clip
            IDs along the row and column axes are the same and in the same order
            (starting from [0,0] and moving outwards).
        :rtype: bool

        """
        with self._rw_lock.read_lock():
            # Doing shape equality short circuit because the return value of
            # numpy.array equality changes depending on this condition, meaning
            # the use of the ...all() member function on the result is not
            # universally possible (i.e. when it returns a bool value when
            # shapes are not equal).

            # noinspection PyUnresolvedReferences
            return (self._row_id_index_map.shape
                    == self._col_id_index_map.shape and
                    (self._row_id_index_map == self._col_id_index_map).all())

    def symmetric_submatrix(self, *clip_ids):
        """
        Return a symmetric sub NxN matrix of the total distance kernel based on
        the clip IDs provided. The background clips will always be included in
        the matrix if this DistanceKernel was constructed with a list of
        background clip IDs.

        Clip IDs provided will be assumed non-background, or positive
        event examples. If the clip ID of a background video is provided as an
        argument, we will reconsider it as a non-background video in the
        returned index-to-is-background mapping (tuple).

        Note: The matrix returned will always be a new instance and not set up
        to use shared memory. When directly used with shared memory objects, it
        will be passed by value, not by reference.

        :param clip_ids: Integer clip IDs to include in the returned matrix. The
            returned matrix will contain all background clip IDs.
        :type clip_ids: Iterable of int
        :return: The index-to-clipID map (tuple), the index-to-is-background map
            (tuple) and the symmetric NxN submatrix, where N is the number of
            clip IDs provided as arguments plus the number of background IDs,
            minus the overlap between those two sets.
        :rtype: tuple of int, tuple of bool, numpy.matrixlib.defmatrix.matrix

        """
        with self._rw_lock.read_lock():
            with SimpleTimer("Checking inputs", self._log.debug):
                if not self.is_symmetric():
                    raise RuntimeError("Cannot get a symmetric sub-matrix if "
                                       "the kernel is not square!")
                # DEPRECATED: Allowing the use of this method without explicitly
                #             providing background cIDs. This object will
                #             probably not ever be used this way, but there's no
                #             reason to explicitly disallow it.
                # if self._bg_cid_vec is None:
                #     raise RuntimeError("Cannot create the square submatrix "
                #                        "without the background flag vector!")

                try:
                    clip_ids = [int(e) for e in clip_ids]
                except:
                    raise ValueError("Not all clip IDs could be used as ints!")

                id_diff = set(clip_ids).difference(self._row_id_index_map)
                assert not id_diff, \
                    "Not all clip IDs provided are represented in this " \
                    "distance kernel matrix! (difference: %s)" \
                    % id_diff
                del id_diff

            with SimpleTimer("Computing union of BG clips and provided IDs",
                             self._log.debug):
                if self._bg_cid_set is not None:
                    all_cids = self._bg_cid_set.union(clip_ids)
                else:
                    all_cids = set(clip_ids)

            # Reorder the given clip IDs so that they are in the same relative
            # order as the kernel matrix edges.
            focus_indices = []
            focus_clipids = []
            for idx, cid in enumerate(self._row_id_index_map):
                if (cid in all_cids) and (cid not in focus_clipids):
                    focus_indices.append(idx)
                    focus_clipids.append(cid)

            # index-to-isBG map for return
            # -> IDs provided as arguments are to be considered non-background,
            # even if a the ID is in the background set. All other IDs in the
            # union then must be from the background set.
            focus_id2isbg = []
            for idx in focus_indices:
                cid = self._row_id_index_map[idx]
                focus_id2isbg.append(False if cid in clip_ids else True)

            ret_mat = self._kernel[focus_indices, :][:, focus_indices]
            return focus_clipids, focus_id2isbg, ret_mat

    # noinspection PyPep8Naming
    def extract_rows(self, *clipID_or_IDs):
        """
        Find and return the v-stacked distance vectors, in kernel row order
        (i.e. not in the order given as arguments), of the kernel rows matching
        the given clip IDs.

        Note: The matrix returned will always be a new instance and not set up
        to use shared memory. When directly used with shared memory objects, it
        will be passed by value, not by reference.

        :param clipID_or_IDs: The integer clip ID or IDs of which to get the
            distance vectors for.
        :type clipID_or_IDs: int or Iterable of int

        :return: The row-wise index-to-clipID map (tuple), the column-wise
            index-to-clipID map (tuple), and the KxL shape matrix, where K is
            the number of clip IDs given to the method, and L is the width
            (columns) of the distance kernel.
        :rtype: tuple of int, tuple of int, matrix

        """
        with self._rw_lock.read_lock():
            with SimpleTimer("Checking inputs", self._log.debug):
                try:
                    clipID_or_IDs = frozenset(int(e) for e in clipID_or_IDs)
                except Exception, ex:
                    raise ValueError("Not all clip IDs could be used as ints: "
                                     "%s" % str(ex))

                id_diff = clipID_or_IDs.difference(self._row_id_index_map)
                assert not id_diff, \
                    "Not all clip IDs provided are represented in this " \
                    "distance kernel matrix! (difference: %s)" \
                    % id_diff
                del id_diff

            # Reorder the given clip IDs so that they are in the same relative
            # order as the kernel matrix edge order
            with SimpleTimer("Creating focus index/cid sequence",
                             self._log.debug):
                focus_row_indices = []
                focus_row_clipids = []
                for idx, cid in enumerate(self._row_id_index_map):
                    # if ((cid in clipID_or_IDs)
                    #         and (cid not in focus_row_clipids)):
                    if cid in clipID_or_IDs:
                        focus_row_indices.append(idx)
                        focus_row_clipids.append(cid)

            with SimpleTimer("Cropping kernel to focus range",
                             self._log.debug):
                return (tuple(focus_row_clipids),
                        tuple(self._col_id_index_map),
                        self._kernel[focus_row_indices, :])
Exemplo n.º 5
0
    def __init__(self,
                 id_vector,
                 bg_clip_ids,
                 feature_mat,
                 kernel_mat,
                 rw_lock=None):
        """ Initialize this FeatureMemory object

        This class must be used with numpy ndarray and matrix classes for shared
        memory purposes.

        NOTE: Arrays and matrices given here must own their data! This is
        currently required in order to resize them later when updating with new
        feature vectors. A ValueError will be thrown if an given array/matrix
        does not own its data.

        TODO: Allow kernel matrix to be optional, causing it to be built from
        the provided feature matrix (not a recommended action).

        :param id_vector: (numpy) Array of clip IDs. This is used as the map
            from an index position to the clip ID its associated with in the
            kernel and distance kernel matrices.
        :type id_vector: ndarray of int
        :param bg_clip_ids: Set of clip IDs that are to be treated as background
            clip IDs.
        :type bg_clip_ids: set of int
        :param feature_mat: (numpy) Matrix of features for clip IDs. Features
            should be stored vertically, i.e. Each row is a feature for a
            particular clip ID (id_vector being the index-to-clipID map).
        :type feature_mat: matrix of double
        :param kernel_mat: (numpy) Matrix detailing the distances between
            feature vectors. This must be a square, symmetric matrix.
        :type kernel_mat: matrix of double
        :param rw_lock: Optional ReadWriteLock for this instance to use. If not
            provided, we will create our own.
        :type rw_lock: None or ReadWriteLock

        """
        # assert isinstance(id_vector, (ndarray, ArrayProxy)), \
        #     "ID vector not given as a numpy.ndarray!"
        assert isinstance(bg_clip_ids, (set, frozenset)), \
            "Background ID vector not a numpy.ndarray!"
        # assert isinstance(feature_mat, (matrix, MatrixProxy)), \
        #     "Kernel matrix not a numpy.matrix!"
        # assert isinstance(kernel_mat, (matrix, MatrixProxy)), \
        #     "Distance kernel not a numpy.matrix!"

        # noinspection PyUnresolvedReferences
        # -> base IS a member of the matrix class...
        if id_vector.base is not None:
            raise ValueError("Given ``id_vector`` does not own its data! It "
                             "will not be transformable later.")
        elif feature_mat.base is not None:
            raise ValueError("Given ``feature_mat`` does not own its data! It "
                             "will not be transformable later.")
        elif kernel_mat.base is not None:
            raise ValueError("Given ``kernel_mat`` does not own its data! It "
                             "will not be transformable later.")

        # The kernel should be square and should be the same size as the feature
        # matrix's number or rows (unique stored clip features).
        if not (kernel_mat.shape[0] == kernel_mat.shape[1] ==
                feature_mat.shape[0]):
            raise ValueError("The distance kernel matrix provided is either "
                             "misshapen or conflicts with the dimensions of "
                             "the provided feature matrix. (kernel matrix "
                             "shape: %s, num feature vectors: %d" %
                             (kernel_mat.shape, feature_mat.shape[0]))

        self._log.debug("Lock given: %s", rw_lock)
        if rw_lock:
            assert isinstance(rw_lock, ReadWriteLock), \
                "Not given a value ReadWriteLock instance!"
            self._rw_lock = rw_lock
        else:
            self._log.debug("Falling back on bad lock given (given: %s)",
                            type(rw_lock))
            self._rw_lock = ReadWriteLock()

        self._id_vector = id_vector
        self._bg_clip_ids = bg_clip_ids
        self._feature_mat = feature_mat
        self._kernel_mat = kernel_mat

        # Helper structure mapping clipIDs to their row index
        self._cid2idx_map = dict(
            (cid, idx) for idx, cid in enumerate(self._id_vector))
Exemplo n.º 6
0
class FeatureMemory(object):
    """
    Class for encapsulating and managing feature and kernel matrices for
    different feature types
    """
    @classmethod
    def construct_from_files(cls,
                             id_vector_file,
                             bg_flags_file,
                             feature_mat_file,
                             kernel_mat_file,
                             rw_lock=None):
        """ Initialize FeatureMemory object from file sources.

        :param id_vector_file: File containing the numpy.savetxt(...) output of
            clip ID values in the order in which they associate to the rows of
            the kernel matrix.
        :type id_vector_file: str
        :param feature_mat_file: File containing the kernel matrix as saved by
            numpy.save(...) (saved as an ndarray, converted to matrix on load).
        :type feature_mat_file: str
        :param kernel_mat_file: File containing the kernel matrix as saved by
            numpy.save(...) (saved as an ndarray, converted to matrix on load).
        :type kernel_mat_file: str
        :param bg_flags_file: Optional file containing output of
            numpy.savetxt(...) where each index maps a row index of the kernel
            to whether or not the associated clip ID should be considered a
            background video or not.
        :type bg_flags_file: str
        :return: Symmetric FeatureMemory constructed with the data provided in
            the provided files.
        :rtype: FeatureMemory

        """
        clip_ids = np.array(np.load(id_vector_file))
        bg_flags = np.array(np.load(bg_flags_file))
        # noinspection PyCallingNonCallable
        feature_mat = np.matrix(np.load(feature_mat_file))
        # noinspection PyCallingNonCallable
        kernel_mat = np.matrix(np.load(kernel_mat_file))

        bg_clips = set([clip_ids[i] for i, f in enumerate(bg_flags) if f])

        return FeatureMemory(clip_ids,
                             bg_clips,
                             feature_mat,
                             kernel_mat,
                             rw_lock=rw_lock)

    @property
    def _log(self):
        return logging.getLogger('.'.join(
            [self.__module__, self.__class__.__name__]))

    def __init__(self,
                 id_vector,
                 bg_clip_ids,
                 feature_mat,
                 kernel_mat,
                 rw_lock=None):
        """ Initialize this FeatureMemory object

        This class must be used with numpy ndarray and matrix classes for shared
        memory purposes.

        NOTE: Arrays and matrices given here must own their data! This is
        currently required in order to resize them later when updating with new
        feature vectors. A ValueError will be thrown if an given array/matrix
        does not own its data.

        TODO: Allow kernel matrix to be optional, causing it to be built from
        the provided feature matrix (not a recommended action).

        :param id_vector: (numpy) Array of clip IDs. This is used as the map
            from an index position to the clip ID its associated with in the
            kernel and distance kernel matrices.
        :type id_vector: ndarray of int
        :param bg_clip_ids: Set of clip IDs that are to be treated as background
            clip IDs.
        :type bg_clip_ids: set of int
        :param feature_mat: (numpy) Matrix of features for clip IDs. Features
            should be stored vertically, i.e. Each row is a feature for a
            particular clip ID (id_vector being the index-to-clipID map).
        :type feature_mat: matrix of double
        :param kernel_mat: (numpy) Matrix detailing the distances between
            feature vectors. This must be a square, symmetric matrix.
        :type kernel_mat: matrix of double
        :param rw_lock: Optional ReadWriteLock for this instance to use. If not
            provided, we will create our own.
        :type rw_lock: None or ReadWriteLock

        """
        # assert isinstance(id_vector, (ndarray, ArrayProxy)), \
        #     "ID vector not given as a numpy.ndarray!"
        assert isinstance(bg_clip_ids, (set, frozenset)), \
            "Background ID vector not a numpy.ndarray!"
        # assert isinstance(feature_mat, (matrix, MatrixProxy)), \
        #     "Kernel matrix not a numpy.matrix!"
        # assert isinstance(kernel_mat, (matrix, MatrixProxy)), \
        #     "Distance kernel not a numpy.matrix!"

        # noinspection PyUnresolvedReferences
        # -> base IS a member of the matrix class...
        if id_vector.base is not None:
            raise ValueError("Given ``id_vector`` does not own its data! It "
                             "will not be transformable later.")
        elif feature_mat.base is not None:
            raise ValueError("Given ``feature_mat`` does not own its data! It "
                             "will not be transformable later.")
        elif kernel_mat.base is not None:
            raise ValueError("Given ``kernel_mat`` does not own its data! It "
                             "will not be transformable later.")

        # The kernel should be square and should be the same size as the feature
        # matrix's number or rows (unique stored clip features).
        if not (kernel_mat.shape[0] == kernel_mat.shape[1] ==
                feature_mat.shape[0]):
            raise ValueError("The distance kernel matrix provided is either "
                             "misshapen or conflicts with the dimensions of "
                             "the provided feature matrix. (kernel matrix "
                             "shape: %s, num feature vectors: %d" %
                             (kernel_mat.shape, feature_mat.shape[0]))

        self._log.debug("Lock given: %s", rw_lock)
        if rw_lock:
            assert isinstance(rw_lock, ReadWriteLock), \
                "Not given a value ReadWriteLock instance!"
            self._rw_lock = rw_lock
        else:
            self._log.debug("Falling back on bad lock given (given: %s)",
                            type(rw_lock))
            self._rw_lock = ReadWriteLock()

        self._id_vector = id_vector
        self._bg_clip_ids = bg_clip_ids
        self._feature_mat = feature_mat
        self._kernel_mat = kernel_mat

        # Helper structure mapping clipIDs to their row index
        self._cid2idx_map = dict(
            (cid, idx) for idx, cid in enumerate(self._id_vector))

    @staticmethod
    def _histogram_intersection_distance(a, b):
        """
        Calculates distance between two vectors using histogram intersection.

        Non-branching version of the histogram intersection algorithm.

        :param a: A vector in array form.
        :type a: ndarray
        :param b: A vector in array form.
        :type b: ndarray

        :return: Histogram Intersection (HI) distance scalar
        :rtype: double

        """
        # noinspection PyUnresolvedReferences
        return (a + b - np.abs(a - b)).sum() * 0.5

    def get_ids(self):
        """
        NOTE: NOT THREAD SAFE. Use the returned structure only in conjunction
        with this object's lock when in a parallel environment to prevent
        possible memory corruption.

        :return: Ordered vector of clip IDs along the row-edge of this object's
            feature matrix and along both edges of the kernel matrix.
        :rtype: numpy.core.multiarray.ndarray

        """
        return self._id_vector

    def get_bg_ids(self):
        """
        NOTE: NOT THREAD SAFE. Use the returned structure only in conjunction
        with this object's lock when in a parallel environment to prevent
        possible memory corruption.

        :return: Ordered vector of clip IDs that we are treating as background
            clips.
        :rtype: ndarray

        """
        return frozenset(self._bg_clip_ids)

    def get_feature_matrix(self):
        """
        NOTE: NOT THREAD SAFE. Use the returned structure only in conjunction
        with this object's lock when in a parallel environment to prevent
        possible memory corruption.

        :return: Matrix recording feature vectors for a feature type. See the
            id vector for row-wise index-to-clipID association.
        :rtype: numpy.matrixlib.defmatrix.matrix

        """
        return self._feature_mat

    def get_kernel_matrix(self):
        """
        NOTE: NOT THREAD SAFE. Use the returned structure only in conjunction
        with this object's lock when in a parallel environment to prevent
        possible memory corruption.

        :return: Symmetric matrix detailing the distances between any two clip
            ID features. Distances are computed via histogram intersection.
        :rtype: matrix

        """
        return self._kernel_mat

    def get_lock(self):
        """
        :return: a reference to this object's read/write lock.
        :rtype: ReadWriteLock

        """
        return self._rw_lock

    def get_distance_kernel(self):
        """
        DistanceKernel object constructed from this feature's current state.

        :return: This feature distance kernel.
        :rtype: DistanceKernel

        """
        with self._rw_lock.read_lock():
            return DistanceKernel(self._id_vector, self._id_vector,
                                  self._kernel_mat, self._bg_clip_ids,
                                  self._rw_lock)

    def get_feature(self, *clip_id_or_ids):
        """
        Return the a matrix where each row is the feature vector for one or more
        clip IDs. The given list of clip IDs given acts as the index-to-clipID
        map for the returned matrix's rows. If repeat clip IDs are provided in
        the input, there will be repeat feature vectors in the returned matrix.

        Raises ValueError if the given clip ID is not represented in the current
        matrix.

        :param clip_id_or_ids: One or more integer clip IDs to retrieve the
            feature vectors for.
        :type clip_id_or_ids: tuple of int

        :return: NxM matrix, where N is the number of clip IDs requested and M
            is the length of a feature vector for this vector.
        :rtype: np.matrix

        """
        assert all(isinstance(e, int) for e in clip_id_or_ids), \
            "Not given an integer or a valid iterable over integers!"

        with self._rw_lock.read_lock():
            # rows = num of IDs given, cols = width of feature matrix
            with SimpleTimer("Allocating return matrix", self._log.debug):
                # noinspection PyUnresolvedReferences
                # -> matrix class DOES have ``dtype`` property...
                ret_mat = matrix(
                    ndarray((len(clip_id_or_ids), self._feature_mat.shape[1]),
                            self._feature_mat.dtype))
            for i, cid in enumerate(clip_id_or_ids):
                feature_idx = self._cid2idx_map[cid]
                ret_mat[i, :] = self._feature_mat[feature_idx, :]
            return ret_mat

    # noinspection PyUnresolvedReferences,PyCallingNonCallable
    def update(self,
               clip_id,
               feature_vec=None,
               is_background=False,
               timeout=None):
        """
        Update this feature with a feature vector associated with a clip ID. If
        clip ID is already in the feature matrix, we replace the current vector
        with the given one.

        Either way, the distance kernel is updated with either a new row/column,
        or updating relevant slots in the existing distance kernel.

        :raise ValueError: if the given feature vector is not compatible with
            our feature vector.
        :raise RuntimeError: If a timeout is given and the underlying write lock
            doesn't acquire in that amount of time.

        :param clip_id: The ID of the clip the given ``feature_vec`` represents.
        :type clip_id: int
        :param feature_vec: Feature vector associated to the given clip ID.
        :type feature_vec: ndarray
        :param is_background: Flag declaring that this clip ID represents a
            background feature.
        :type is_background: bool
        :param timeout: Timeout seconds for the underlying write lock to acquire
            before a RuntimeError is thrown.
        :type timeout: None or int or float

        """
        with self._rw_lock.write_lock(timeout):
            clip_id = int(clip_id)
            if feature_vec is not None and \
                    not (feature_vec.ndim == 1
                         and len(feature_vec) == self._feature_mat.shape[1]):
                raise ValueError("Given feature vector not compatible "
                                 "(dimensionality or length does not match)")

            # Update the given feature vector and kernel distances
            if self._cid2idx_map.get(clip_id, None) is not None:
                # In all cases, update the background status of the clip
                if is_background:
                    self._bg_clip_ids.add(clip_id)
                else:
                    self._bg_clip_ids.discard(clip_id)

                # If we were given a new feature vector, update entries
                if feature_vec is not None:
                    idx = self._cid2idx_map[clip_id]
                    self._feature_mat[idx] = feature_vec
                    new_dist = np.mat(
                        tuple(
                            self._histogram_intersection_distance(
                                feature_vec, fv) for fv in self._feature_mat))
                    self._kernel_mat[idx, :] = new_dist
                    self._kernel_mat[:, idx] = new_dist

            # Given a new feature to add.
            else:
                if feature_vec is None:
                    raise ValueError("Update given a new clip ID, but no "
                                     "feature vector provided.")

                # Update internal feature matrix with added vector
                self._cid2idx_map[clip_id] = self._id_vector.size
                self._id_vector.resize((self._id_vector.size + 1, ),
                                       refcheck=False)
                self._id_vector[-1] = clip_id

                if is_background:
                    self._bg_clip_ids.add(clip_id)

                # noinspection PyUnresolvedReferences
                if self._feature_mat.base is not None:
                    raise RuntimeError("Feature matrix does not own its data")
                # Since we're only adding a new row, this resize does not affect
                # the positioning of the existing data.
                # noinspection PyUnresolvedReferences
                self._feature_mat.resize((self._feature_mat.shape[0] + 1,
                                          self._feature_mat.shape[1]),
                                         refcheck=False)
                self._feature_mat[-1, :] = feature_vec

                # Need to add a new row AND column to the distance kernel.
                if self._kernel_mat.base is not None:
                    raise RuntimeError("kernel matrix does not own its data")
                assert self._kernel_mat.shape[0] == self._kernel_mat.shape[1], \
                    "kernel matrix is not symmetric for some reason???"
                # noinspection PyPep8Naming
                # -> because I like ``N`` better...
                N = self._kernel_mat.shape[0]
                kernel_copy = np.matrix(self._kernel_mat)
                self._kernel_mat.resize((N + 1, N + 1), refcheck=False)
                self._kernel_mat[:N, :N] = kernel_copy
                del kernel_copy

                # Computing new feature distance (histogram intersection). Only
                # need to compute this once because of HI being being
                # commutative and the kernel matrix being symmetric.
                dist_vec = np.mat(
                    tuple(
                        self._histogram_intersection_distance(feature_vec, fv)
                        for fv in self._feature_mat))
                self._kernel_mat[-1, :] = dist_vec
                self._kernel_mat[:, -1] = dist_vec.T
Exemplo n.º 7
0
class DistanceKernel (object):
    """
    Feature Distance Kernel object.

    This class allows the kernel to either be symmetric or not. If it is
    symmetric, the ``symmetric_submatrix`` function becomes available.

    Intended to be used with ProxyManager proxy objects (given at
    construction)

    MONKEY PATCHING:
    When using this object directly (not using the ProxyManager stuff) and
    sending it over pipes, the ReadWriteLock needs to be monkey patched out (the
    multiprocessing.Condition variable doesn't play nicely). Need to set an
    instance of a DummyRWLock to the DistanceKernel._rw_lock property. For
    example:

        ...
        dk = ...
        dk._rw_lock = DummyRWLock()
        <send dk into a pipe>
        ...

    """

    @classmethod
    def construct_symmetric_from_files(cls, id_vector_file, kernel_mat_file,
                                       bg_flags_file=None):
        """
        Construct a symmetric DistanceKernel object, requiring a background
        flags file to denote clip IDs that are to be treated as background
        clips (required to activate symmetric_submatrix function). Such a

        DistanceKernel is usually used with event learning and should be
        provided a background flags file also.

        :param id_vector_file: File containing the numpy.savetxt(...) output of
            clip ID values in the order in which they associate to the rows of
            the kernel matrix.
        :type id_vector_file: str
        :param kernel_mat_file: File containing the kernel matrix as saved by
            numpy.save(...) (saved as an ndarray, converted to matrix on load).
        :type kernel_mat_file: str
        :param bg_flags_file: Optional file containing output of
            numpy.savetxt(...) where each index maps a row index of the kernel
            to whether or not the associated clip ID should be considered a
            background video or not.
        :type bg_flags_file: str
        :return: Symmetric DistanceKernel constructed with the data provided in
            the provided files.
        :rtype: DistanceKernel

        """
        clip_ids = np.array(np.loadtxt(id_vector_file))
        # noinspection PyCallingNonCallable
        kernel_mat = np.matrix(np.load(kernel_mat_file))

        if bg_flags_file is not None:
            bg_flags = np.array(np.loadtxt(bg_flags_file))
            bg_clips = np.array([clip_ids[i]
                                 for i, e in enumerate(bg_flags)
                                 if e])
        else:
            bg_clips = None

        return DistanceKernel(clip_ids, clip_ids, kernel_mat, bg_clips)

    @classmethod
    def construct_asymmetric_from_files(cls, row_ids_file, col_ids_file,
                                        kernel_mat_file):
        """
        Construct an asymmetric DistanceKernel object, usually used for archive
        searches.

        No option for providing background clip IDs as asymmetric kernels are
        NOT used for learning purposes.

        :param row_ids_file: File containing the numpy.savetxt(...) output of
            clip ID values in the order in which they associate to the rows of
            the given kernel matrix.
        :type row_ids_file: str
        :param col_ids_file: File containing the numpy.savetxt(...) output of
            clip ID values in the order in which they associate to the columns
            of the given kernel matrix.
        :type col_ids_file: str
        :param kernel_mat_file: File containing the kernel matrix as saved by
            numpy.save(...) (saved as an ndarray, converted to matrix on load).
        :type kernel_mat_file: str
        :return: Asymmetric DistanceKernel constructed with the data provided in
            the provided files.
        :rtype: DistanceKernel

        """
        row_cids = np.array(np.loadtxt(row_ids_file))
        col_cids = np.array(np.loadtxt(col_ids_file))
        # noinspection PyCallingNonCallable
        kernel_mat = np.matrix(np.load(kernel_mat_file))
        return DistanceKernel(row_cids, col_cids, kernel_mat)

    @property
    def _log(self):
        return logging.getLogger('.'.join([self.__module__,
                                           self.__class__.__name__]))

    def __init__(self, row_id_index_map, col_id_index_map, kernel_mat,
                 bg_clip_ids=None, rw_lock=None):
        """
        Initialize the kernel matrix. The initialization values will more than
        likely be proxies to np.matrix objects.

        The ``bg_clip_ids`` array may be given when this kernel matrix is to be
        a square, symmetric kernel and activates the use of the
        ``symmetric_submatrix`` method. This array must list clip IDs that are
        to be considered "background" IDs, or clips that are to always be
        considered negative. These clip IDs must be included in symmetric
        sub-matrices.

        This array must be the same dimension as
        the row and column indices, containing boolean flags. These flags mark
        that the clip ID found at the same index in the edge ID maps should be
        considered a "background" ID, or one that is always treated as a
        negative. This is for the stipulation in the symmetric_submatrix method
        that it always includes the background ID set in the submatrix.

        :param row_id_index_map: Array of clip IDs associated to row indices.
            Contents will be treated as ints.
        :type row_id_index_map: ndarray of int
        :param col_id_index_map: Array of clip IDs associated to row indices.
            Contents will be treated as ints.
        :type col_id_index_map: ndarray of int
        :param kernel_mat: Kernel data matrix.
        :type kernel_mat: matrix
        :param bg_clip_ids: Optional array of boolean flags, marking whether an
            index should be considered a "background" video. Contents will be
            treated as ints.
        :type bg_clip_ids: set of int
        :param rw_lock: Read-Write lock for data provided. This should be
            provided if the any of the data is shared with other objects/
            sources. If this is given None (default), then a lock is created.
        :type rw_lock: ReadWriteLock or None

        """
        # TODO: Possibly add checks for the id arrays like there is for the
        #       bgclipid array (int-able contents)
        assert row_id_index_map.shape[0] == kernel_mat.shape[0], \
            "Length of row index map and kernel row count did not match! " \
            "(row index map: %d, kernel row count: %d)" \
            % (row_id_index_map.shape[0], kernel_mat.shape[0])
        assert col_id_index_map.shape[0] == kernel_mat.shape[1], \
            "Length of col index map and kernel col count did not match! " \
            "(col index map: %d, kernel col count: %d)" \
            % (col_id_index_map.shape[0], kernel_mat.shape[1])

        self._row_id_index_map = row_id_index_map
        self._col_id_index_map = col_id_index_map
        self._kernel = kernel_mat

        assert ((bg_clip_ids is None)
                or isinstance(bg_clip_ids, (set, frozenset))), \
            "Must either given None or a set for the bg_clip_ids " \
            "vector. Got: %s" % type(bg_clip_ids)
        self._bg_cid_set = bg_clip_ids
        if bg_clip_ids is not None:
            try:
                [int(e) for e in bg_clip_ids]
            except Exception:
                raise ValueError("Not all of the contents of of bg_clip_ids "
                                 "could be treated as ints!")

        if rw_lock:
            assert isinstance(rw_lock, ReadWriteLock), \
                "Did not receive valid istance of RW Lock. Got '%s'" \
                % type(rw_lock)
            self._rw_lock = rw_lock
        else:
            self._rw_lock = ReadWriteLock()

    def get_lock(self):
        """
        :return: This object's read/write lock.
        :rtype: ReadWriteLock
        """
        return self._rw_lock

    def row_id_map(self):
        """
        :return: Row index-to-clipID map
        :rtype: ndarray
        """
        with self.get_lock().read_lock():
            return self._row_id_index_map

    def col_id_map(self):
        """
        :return: Column index-to-clipID map
        :rtype: ndarray
        """
        with self.get_lock().read_lock():
            return self._col_id_index_map

    def get_kernel_matrix(self):
        """
        RETURNED OBJECTS NOT THREAD/PROCESS SAFE. Once retrieved, if
        matrix may be modified by another thread/process

        :return: The underlying kernel matrix.
        :rtype: matrix

        """
        with self.get_lock().read_lock():
            return self._kernel

    def get_background_ids(self):
        """
        RETURNED OBJECTS NOT THREAD/PROCESS SAFE

        :return: The set of background clip IDs. May be None if there was no
            background set initialized.
        :rtype: None or frozenset

        """
        with self.get_lock().read_lock():
            return frozenset(self._bg_cid_set) \
                if self._bg_cid_set is not None \
                else frozenset()

    def is_symmetric(self):
        """
        :return: True if this is a square kernel matrix. This means that clip
            IDs along the row and column axes are the same and in the same order
            (starting from [0,0] and moving outwards).
        :rtype: bool

        """
        with self._rw_lock.read_lock():
            # Doing shape equality short circuit because the return value of
            # numpy.array equality changes depending on this condition, meaning
            # the use of the ...all() member function on the result is not
            # universally possible (i.e. when it returns a bool value when
            # shapes are not equal).

            # noinspection PyUnresolvedReferences
            return (self._row_id_index_map.shape == self._col_id_index_map.shape
                    and
                    (self._row_id_index_map == self._col_id_index_map).all())

    def symmetric_submatrix(self, *clip_ids):
        """
        Return a symmetric sub NxN matrix of the total distance kernel based on
        the clip IDs provided. The background clips will always be included in
        the matrix if this DistanceKernel was constructed with a list of
        background clip IDs.

        Clip IDs provided will be assumed non-background, or positive
        event examples. If the clip ID of a background video is provided as an
        argument, we will reconsider it as a non-background video in the
        returned index-to-is-background mapping (tuple).

        Note: The matrix returned will always be a new instance and not set up
        to use shared memory. When directly used with shared memory objects, it
        will be passed by value, not by reference.

        :param clip_ids: Integer clip IDs to include in the returned matrix. The
            returned matrix will contain all background clip IDs.
        :type clip_ids: Iterable of int
        :return: The index-to-clipID map (tuple), the index-to-is-background map
            (tuple) and the symmetric NxN submatrix, where N is the number of
            clip IDs provided as arguments plus the number of background IDs,
            minus the overlap between those two sets.
        :rtype: tuple of int, tuple of bool, numpy.matrixlib.defmatrix.matrix

        """
        with self._rw_lock.read_lock():
            with SimpleTimer("Checking inputs", self._log.debug):
                if not self.is_symmetric():
                    raise RuntimeError("Cannot get a symmetric sub-matrix if "
                                       "the kernel is not square!")
                # DEPRECATED: Allowing the use of this method without explicitly
                #             providing background cIDs. This object will
                #             probably not ever be used this way, but there's no
                #             reason to explicitly disallow it.
                # if self._bg_cid_vec is None:
                #     raise RuntimeError("Cannot create the square submatrix "
                #                        "without the background flag vector!")

                try:
                    clip_ids = [int(e) for e in clip_ids]
                except:
                    raise ValueError("Not all clip IDs could be used as ints!")

                id_diff = set(clip_ids).difference(self._row_id_index_map)
                assert not id_diff, \
                    "Not all clip IDs provided are represented in this " \
                    "distance kernel matrix! (difference: %s)" \
                    % id_diff
                del id_diff

            with SimpleTimer("Computing union of BG clips and provided IDs",
                             self._log.debug):
                if self._bg_cid_set is not None:
                    all_cids = self._bg_cid_set.union(clip_ids)
                else:
                    all_cids = set(clip_ids)

            # Reorder the given clip IDs so that they are in the same relative
            # order as the kernel matrix edges.
            focus_indices = []
            focus_clipids = []
            for idx, cid in enumerate(self._row_id_index_map):
                if (cid in all_cids) and (cid not in focus_clipids):
                    focus_indices.append(idx)
                    focus_clipids.append(cid)

            # index-to-isBG map for return
            # -> IDs provided as arguments are to be considered non-background,
            # even if a the ID is in the background set. All other IDs in the
            # union then must be from the background set.
            focus_id2isbg = []
            for idx in focus_indices:
                cid = self._row_id_index_map[idx]
                focus_id2isbg.append(False if cid in clip_ids else True)

            ret_mat = self._kernel[focus_indices, :][:, focus_indices]
            return focus_clipids, focus_id2isbg, ret_mat

    # noinspection PyPep8Naming
    def extract_rows(self, *clipID_or_IDs):
        """
        Find and return the v-stacked distance vectors, in kernel row order
        (i.e. not in the order given as arguments), of the kernel rows matching
        the given clip IDs.

        Note: The matrix returned will always be a new instance and not set up
        to use shared memory. When directly used with shared memory objects, it
        will be passed by value, not by reference.

        :param clipID_or_IDs: The integer clip ID or IDs of which to get the
            distance vectors for.
        :type clipID_or_IDs: int or Iterable of int

        :return: The row-wise index-to-clipID map (tuple), the column-wise
            index-to-clipID map (tuple), and the KxL shape matrix, where K is
            the number of clip IDs given to the method, and L is the width
            (columns) of the distance kernel.
        :rtype: tuple of int, tuple of int, matrix

        """
        with self._rw_lock.read_lock():
            with SimpleTimer("Checking inputs", self._log.debug):
                try:
                    clipID_or_IDs = frozenset(int(e) for e in clipID_or_IDs)
                except Exception, ex:
                    raise ValueError("Not all clip IDs could be used as ints: "
                                     "%s" % str(ex))

                id_diff = clipID_or_IDs.difference(self._row_id_index_map)
                assert not id_diff, \
                    "Not all clip IDs provided are represented in this " \
                    "distance kernel matrix! (difference: %s)" \
                    % id_diff
                del id_diff

            # Reorder the given clip IDs so that they are in the same relative
            # order as the kernel matrix edge order
            with SimpleTimer("Creating focus index/cid sequence", self._log.debug):
                focus_row_indices = []
                focus_row_clipids = []
                for idx, cid in enumerate(self._row_id_index_map):
                    # if ((cid in clipID_or_IDs)
                    #         and (cid not in focus_row_clipids)):
                    if cid in clipID_or_IDs:
                        focus_row_indices.append(idx)
                        focus_row_clipids.append(cid)

            with SimpleTimer("Cropping kernel to focus range", self._log.debug):
                return (
                    tuple(focus_row_clipids),
                    tuple(self._col_id_index_map),
                    self._kernel[focus_row_indices, :]
                )
Exemplo n.º 8
0
    def __init__(self, row_id_index_map, col_id_index_map, kernel_mat,
                 bg_clip_ids=None, rw_lock=None):
        """
        Initialize the kernel matrix. The initialization values will more than
        likely be proxies to np.matrix objects.

        The ``bg_clip_ids`` array may be given when this kernel matrix is to be
        a square, symmetric kernel and activates the use of the
        ``symmetric_submatrix`` method. This array must list clip IDs that are
        to be considered "background" IDs, or clips that are to always be
        considered negative. These clip IDs must be included in symmetric
        sub-matrices.

        This array must be the same dimension as
        the row and column indices, containing boolean flags. These flags mark
        that the clip ID found at the same index in the edge ID maps should be
        considered a "background" ID, or one that is always treated as a
        negative. This is for the stipulation in the symmetric_submatrix method
        that it always includes the background ID set in the submatrix.

        :param row_id_index_map: Array of clip IDs associated to row indices.
            Contents will be treated as ints.
        :type row_id_index_map: ndarray of int
        :param col_id_index_map: Array of clip IDs associated to row indices.
            Contents will be treated as ints.
        :type col_id_index_map: ndarray of int
        :param kernel_mat: Kernel data matrix.
        :type kernel_mat: matrix
        :param bg_clip_ids: Optional array of boolean flags, marking whether an
            index should be considered a "background" video. Contents will be
            treated as ints.
        :type bg_clip_ids: set of int
        :param rw_lock: Read-Write lock for data provided. This should be
            provided if the any of the data is shared with other objects/
            sources. If this is given None (default), then a lock is created.
        :type rw_lock: ReadWriteLock or None

        """
        # TODO: Possibly add checks for the id arrays like there is for the
        #       bgclipid array (int-able contents)
        assert row_id_index_map.shape[0] == kernel_mat.shape[0], \
            "Length of row index map and kernel row count did not match! " \
            "(row index map: %d, kernel row count: %d)" \
            % (row_id_index_map.shape[0], kernel_mat.shape[0])
        assert col_id_index_map.shape[0] == kernel_mat.shape[1], \
            "Length of col index map and kernel col count did not match! " \
            "(col index map: %d, kernel col count: %d)" \
            % (col_id_index_map.shape[0], kernel_mat.shape[1])

        self._row_id_index_map = row_id_index_map
        self._col_id_index_map = col_id_index_map
        self._kernel = kernel_mat

        assert ((bg_clip_ids is None)
                or isinstance(bg_clip_ids, (set, frozenset))), \
            "Must either given None or a set for the bg_clip_ids " \
            "vector. Got: %s" % type(bg_clip_ids)
        self._bg_cid_set = bg_clip_ids
        if bg_clip_ids is not None:
            try:
                [int(e) for e in bg_clip_ids]
            except Exception:
                raise ValueError("Not all of the contents of of bg_clip_ids "
                                 "could be treated as ints!")

        if rw_lock:
            assert isinstance(rw_lock, ReadWriteLock), \
                "Did not receive valid istance of RW Lock. Got '%s'" \
                % type(rw_lock)
            self._rw_lock = rw_lock
        else:
            self._rw_lock = ReadWriteLock()