Exemplo n.º 1
0
    def dense(self,
              filename='',
              pyr_scale=0.5,
              levels=3,
              winsize=15,
              iterations=3,
              poly_n=5,
              poly_sigma=1.2,
              flags=0,
              skip_empty=False):
        """
        Renders a dense optical flow video of the input video file using `cv2.calcOpticalFlowFarneback()`.
        For more details about the parameters consult the cv2 documentation.

        Parameters
        ----------
        - filename : str, optional

            Path to the input video file. If not specified the video file pointed to by the MgObject is used.
        - pyr_scale : float, optional

            Default is 0.5.
        - levels : int, optional

            Default is 3.
        - winsize : int, optional

            Default is 15.
        - iterations : int, optional

            Default is 3.
        - poly_n : int, optional

            Default is 5.
        - poly_sigma : float, optional

            Default is 1.2.
        - flags : int, optional

            Default is 0.
        - skip_empty : bool, optional

            Default is `False`. If `True`, repeats previous frame in the output when encounters an empty frame.

        Outputs
        -------
        - `filename`_flow_dense.avi

        Returns
        -------
        - MgObject

            A new MgObject pointing to the output '_flow_dense' video file.
        """

        if filename == '':
            filename = self.filename

        of, fex = os.path.splitext(filename)

        # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms
        if fex != '.avi':
            convert_to_avi(of + fex)
            fex = '.avi'
            filename = of + fex

        vidcap = cv2.VideoCapture(filename)
        ret, frame = vidcap.read()
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')

        fps = int(vidcap.get(cv2.CAP_PROP_FPS))
        width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

        pb = MgProgressbar(total=length,
                           prefix='Rendering dense optical flow video:')

        out = cv2.VideoWriter(of + '_flow_dense' + fex, fourcc, fps,
                              (width, height))

        ret, frame1 = vidcap.read()
        prev_frame = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
        hsv = np.zeros_like(frame1)
        hsv[..., 1] = 255

        ii = 0

        while (vidcap.isOpened()):
            ret, frame2 = vidcap.read()
            if ret == True:
                next_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

                flow = cv2.calcOpticalFlowFarneback(prev_frame, next_frame,
                                                    None, pyr_scale, levels,
                                                    winsize, iterations,
                                                    poly_n, poly_sigma, flags)

                mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
                hsv[..., 0] = ang * 180 / np.pi / 2
                hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
                rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

                if skip_empty:
                    if np.sum(rgb) > 0:
                        out.write(rgb.astype(np.uint8))
                    else:
                        if ii == 0:
                            out.write(rgb.astype(np.uint8))
                        else:
                            out.write(prev_rgb.astype(np.uint8))
                else:
                    out.write(rgb.astype(np.uint8))

                prev_frame = next_frame

                if skip_empty:
                    if np.sum(rgb) > 0:
                        prev_rgb = rgb
                else:
                    prev_rgb = rgb

            else:
                pb.progress(length)
                break

            pb.progress(ii)
            ii += 1

        out.release()

        destination_video = of + '_flow_dense' + fex

        if self.has_audio:
            source_audio = extract_wav(of + fex)
            embed_audio_in_video(source_audio, destination_video)
            os.remove(source_audio)

        return musicalgestures.MgObject(destination_video,
                                        color=self.color,
                                        returned_by_process=True)
Exemplo n.º 2
0
def history_cv2(self, filename='', history_length=10, weights=1):
    """
    This function  creates a video where each frame is the average of the 
    n previous frames, where n is determined by `history_length`.
    The history frames are summed up and normalized, and added to the 
    current frame to show the history. Based on cv2.

    Parameters
    ----------
    - filename : str, optional

        Path to the input video file. If not specified the video file pointed to by the MgObject is used.
    - history_length : int, optional

        Default is 10. Number of frames to be saved in the history tail.

    Outputs
    -------
    - `filename`_history.avi

    Returns
    -------
    - MgObject 
        A new MgObject pointing to the output '_history' video file.
    """

    if filename == '':
        filename = self.filename

    of = os.path.splitext(filename)[0]
    fex = os.path.splitext(filename)[1]
    video = cv2.VideoCapture(filename)
    ret, frame = video.read()
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')

    fps = int(video.get(cv2.CAP_PROP_FPS))
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    pb = MgProgressbar(total=length, prefix='Rendering history video:')

    out = cv2.VideoWriter(of + '_history' + fex, fourcc, fps, (width, height))

    ii = 0
    history = []
    weights_map = [1 for weight in range(history_length + 1)]

    if type(weights) in [int, float]:
        offset = weights - 1
        weights_map[0] = weights
    elif type(weights) == list:
        offset = sum([weight - 1 for weight in weights])
        for ind, weight in enumerate(weights):
            if ind > history_length:
                break
            weights_map[ind] = weight

    denominator = history_length + 1 + offset

    while (video.isOpened()):
        ret, frame = video.read()
        if ret == True:
            if self.color == False:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            frame = (np.array(frame)).astype(np.float64)

            if len(history) > 0:
                #history_total = frame/(len(history)+1)
                history_total = frame * weights_map[0] / denominator
                # history_total = frame
            else:
                history_total = frame

            for ind, newframe in enumerate(history):
                #history_total += newframe/(len(history)+1)
                history_total += newframe * weights_map[ind + 1] / denominator
            # or however long history you would like
            if len(history) >= history_length:
                history.pop(0)  # pop first frame
            history.append(frame)
            # 0.5 to not overload it poor thing
            total = history_total.astype(np.uint64)

            if self.color == False:
                total = cv2.cvtColor(total.astype(np.uint8),
                                     cv2.COLOR_GRAY2BGR)
                out.write(total)
            else:
                out.write(total.astype(np.uint8))

        else:
            pb.progress(length)
            # mg_progressbar(
            #     length, length, 'Rendering history video:', 'Complete')
            break

        pb.progress(ii)
        ii += 1
        # mg_progressbar(ii, length+1, 'Rendering history video:', 'Complete')

    out.release()

    destination_video = self.of + '_history' + self.fex

    if self.has_audio:
        source_audio = extract_wav(self.of + self.fex)
        embed_audio_in_video(source_audio, destination_video)
        os.remove(source_audio)

    return musicalgestures.MgObject(destination_video,
                                    color=self.color,
                                    returned_by_process=True)
Exemplo n.º 3
0
    def sparse(self,
               filename='',
               corner_max_corners=100,
               corner_quality_level=0.3,
               corner_min_distance=7,
               corner_block_size=7,
               of_win_size=(15, 15),
               of_max_level=2,
               of_criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                            10, 0.03)):
        """
        Renders a sparse optical flow video of the input video file using `cv2.calcOpticalFlowPyrLK()`.
        `cv2.goodFeaturesToTrack()` is used for the corner estimation.
        For more details about the parameters consult the cv2 documentation.

        Parameters
        ----------
        - filename : str, optional

            Path to the input video file. If not specified the video file pointed to by the MgObject is used.
        - corner_max_corners : int, optional

            Default is 100.
        - corner_quality_level : float, optional

            Default is 0.3.
        - corner_min_distance : int, optional

            Default is 7.
        - corner_block_size : int, optional

            Default is 7.
        - of_win_size : tuple (int, int), optional

            Default is (15, 15).
        - of_max_level : int, optional

            Default is 2.
        - of_criteria : optional

            Default is `(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)`.

        Outputs
        -------
        - `filename`_flow_sparse.avi

        Returns
        -------
        - MgObject

            A new MgObject pointing to the output '_flow_sparse' video file.
        """

        if filename == '':
            filename = self.filename

        of, fex = os.path.splitext(filename)

        # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms
        if fex != '.avi':
            convert_to_avi(of + fex)
            fex = '.avi'
            filename = of + fex

        vidcap = cv2.VideoCapture(filename)
        ret, frame = vidcap.read()
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')

        fps = int(vidcap.get(cv2.CAP_PROP_FPS))
        width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

        pb = MgProgressbar(total=length,
                           prefix='Rendering sparse optical flow video:')

        out = cv2.VideoWriter(of + '_flow_sparse' + fex, fourcc, fps,
                              (width, height))

        # params for ShiTomasi corner detection
        feature_params = dict(maxCorners=corner_max_corners,
                              qualityLevel=corner_quality_level,
                              minDistance=corner_min_distance,
                              blockSize=corner_block_size)

        # Parameters for lucas kanade optical flow
        lk_params = dict(winSize=of_win_size,
                         maxLevel=of_max_level,
                         criteria=of_criteria)

        # Create some random colors
        color = np.random.randint(0, 255, (100, 3))

        # Take first frame and find corners in it
        ret, old_frame = vidcap.read()
        old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
        p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, **feature_params)

        # Create a mask image for drawing purposes
        mask = np.zeros_like(old_frame)

        ii = 0

        while (vidcap.isOpened()):
            ret, frame = vidcap.read()
            if ret == True:
                frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

                # calculate optical flow
                p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray,
                                                       p0, None, **lk_params)

                # Select good points
                good_new = p1[st == 1]
                good_old = p0[st == 1]

                # draw the tracks
                for i, (new, old) in enumerate(zip(good_new, good_old)):
                    a, b = new.ravel()
                    c, d = old.ravel()
                    mask = cv2.line(mask, (a, b), (c, d), color[i].tolist(), 2)

                    if self.color == False:
                        frame = cv2.cvtColor(frame_gray, cv2.COLOR_GRAY2BGR)

                    frame = cv2.circle(frame, (a, b), 5, color[i].tolist(), -1)

                img = cv2.add(frame, mask)

                out.write(img.astype(np.uint8))

                # Now update the previous frame and previous points
                old_gray = frame_gray.copy()
                p0 = good_new.reshape(-1, 1, 2)

            else:
                pb.progress(length)
                break

            pb.progress(ii)
            ii += 1

        out.release()

        destination_video = of + '_flow_sparse' + fex

        if self.has_audio:
            source_audio = extract_wav(of + fex)
            embed_audio_in_video(source_audio, destination_video)
            os.remove(source_audio)

        return musicalgestures.MgObject(destination_video,
                                        color=self.color,
                                        returned_by_process=True)
Exemplo n.º 4
0
    def dense(
            self,
            filename='',
            pyr_scale=0.5,
            levels=3,
            winsize=15,
            iterations=3,
            poly_n=5,
            poly_sigma=1.2,
            flags=0,
            skip_empty=False):
        """
        Renders a dense optical flow video of the input video file using `cv2.calcOpticalFlowFarneback()`. The description of the matching parameters are taken from the cv2 documentation.

        Args:
            filename (str, optional): Path to the input video file. If not specified the video file pointed to by the MgObject is used. Defaults to ''.
            pyr_scale (float, optional): Specifies the image scale (<1) to build pyramids for each image. `pyr_scale=0.5` means a classical pyramid, where each next layer is twice smaller than the previous one. Defaults to 0.5.
            levels (int, optional): The number of pyramid layers including the initial image. `levels=1` means that no extra layers are created and only the original images are used. Defaults to 3.
            winsize (int, optional): The averaging window size. Larger values increase the algorithm robustness to image noise and give more chances for fast motion detection, but yield more blurred motion field. Defaults to 15.
            iterations (int, optional): The number of iterations the algorithm does at each pyramid level. Defaults to 3.
            poly_n (int, optional): The size of the pixel neighborhood used to find polynomial expansion in each pixel. Larger values mean that the image will be approximated with smoother surfaces, yielding more robust algorithm and more blurred motion field, typically poly_n =5 or 7. Defaults to 5.
            poly_sigma (float, optional): The standard deviation of the Gaussian that is used to smooth derivatives used as a basis for the polynomial expansion. For `poly_n=5`, you can set `poly_sigma=1.1`, for `poly_n=7`, a good value would be `poly_sigma=1.5`. Defaults to 1.2.
            flags (int, optional): Operation flags that can be a combination of the following: - **OPTFLOW_USE_INITIAL_FLOW** uses the input flow as an initial flow approximation. - **OPTFLOW_FARNEBACK_GAUSSIAN** uses the Gaussian \\f$\\texttt{winsize}\\times\\texttt{winsize}\\f$ filter instead of a box filter of the same size for optical flow estimation. Usually, this option gives z more accurate flow than with a box filter, at the cost of lower speed. Normally, `winsize` for a Gaussian window should be set to a larger value to achieve the same level of robustness. Defaults to 0.
            skip_empty (bool, optional): If True, repeats previous frame in the output when encounters an empty frame. Defaults to False.

        Outputs:
            `filename`_flow_dense.avi

        Returns:
            MgObject: A new MgObject pointing to the output '_flow_dense' video file.
        """

        if filename == '':
            filename = self.filename

        of, fex = os.path.splitext(filename)

        # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms
        if fex != '.avi':
            convert_to_avi(of + fex)
            fex = '.avi'
            filename = of + fex

        vidcap = cv2.VideoCapture(filename)
        ret, frame = vidcap.read()
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')

        fps = int(vidcap.get(cv2.CAP_PROP_FPS))
        width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

        pb = MgProgressbar(
            total=length, prefix='Rendering dense optical flow video:')

        out = cv2.VideoWriter(of + '_flow_dense' + fex,
                              fourcc, fps, (width, height))

        ret, frame1 = vidcap.read()
        prev_frame = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
        prev_rgb = None
        hsv = np.zeros_like(frame1)
        hsv[..., 1] = 255

        ii = 0

        while(vidcap.isOpened()):
            ret, frame2 = vidcap.read()
            if ret == True:
                next_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

                flow = cv2.calcOpticalFlowFarneback(
                    prev_frame, next_frame, None, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags)

                mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
                hsv[..., 0] = ang*180/np.pi/2
                hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
                rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

                if skip_empty:
                    if np.sum(rgb) > 0:
                        out.write(rgb.astype(np.uint8))
                    else:
                        if ii == 0:
                            out.write(rgb.astype(np.uint8))
                        else:
                            out.write(prev_rgb.astype(np.uint8))
                else:
                    out.write(rgb.astype(np.uint8))

                prev_frame = next_frame

                if skip_empty:
                    if np.sum(rgb) > 0 or ii == 0:
                        prev_rgb = rgb
                else:
                    prev_rgb = rgb

            else:
                pb.progress(length)
                break

            pb.progress(ii)
            ii += 1

        out.release()

        destination_video = of + '_flow_dense' + fex

        if self.has_audio:
            source_audio = extract_wav(of + fex)
            embed_audio_in_video(source_audio, destination_video)
            os.remove(source_audio)

        return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)
Exemplo n.º 5
0
    def sparse(
            self,
            filename='',
            corner_max_corners=100,
            corner_quality_level=0.3,
            corner_min_distance=7,
            corner_block_size=7,
            of_win_size=(15, 15),
            of_max_level=2,
            of_criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)):
        """
        Renders a sparse optical flow video of the input video file using `cv2.calcOpticalFlowPyrLK()`. `cv2.goodFeaturesToTrack()` is used for the corner estimation. The description of the matching parameters are taken from the cv2 documentation.

        Args:
            filename (str, optional): Path to the input video file. If not specified the video file pointed to by the MgObject is used. Defaults to ''.
            corner_max_corners (int, optional): Maximum number of corners to return. If there are more corners than are found, the strongest of them is returned. `maxCorners <= 0` implies that no limit on the maximum is set and all detected corners are returned. Defaults to 100.
            corner_quality_level (float, optional): Parameter characterizing the minimal accepted quality of image corners. The parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue (see cornerMinEigenVal in cv2 docs) or the Harris function response (see cornerHarris in cv2 docs). The corners with the quality measure less than the product are rejected. For example, if the best corner has the quality measure = 1500, and the qualityLevel=0.01, then all the corners with the quality measure less than 15 are rejected. Defaults to 0.3.
            corner_min_distance (int, optional): Minimum possible Euclidean distance between the returned corners. Defaults to 7.
            corner_block_size (int, optional): Size of an average block for computing a derivative covariation matrix over each pixel neighborhood. See cornerEigenValsAndVecs in cv2 docs. Defaults to 7.
            of_win_size (tuple, optional): Size of the search window at each pyramid level. Defaults to (15, 15).
            of_max_level (int, optional): 0-based maximal pyramid level number. If set to 0, pyramids are not used (single level), if set to 1, two levels are used, and so on. If pyramids are passed to input then the algorithm will use as many levels as pyramids have but no more than `maxLevel`. Defaults to 2.
            of_criteria (tuple, optional): Specifies the termination criteria of the iterative search algorithm (after the specified maximum number of iterations criteria.maxCount or when the search window moves by less than criteria.epsilon). Defaults to (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03).

        Outputs:
            `filename`_flow_sparse.avi

        Returns:
            MgObject: A new MgObject pointing to the output '_flow_sparse' video file.
        """

        if filename == '':
            filename = self.filename

        of, fex = os.path.splitext(filename)

        # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms
        if fex != '.avi':
            convert_to_avi(of + fex)
            fex = '.avi'
            filename = of + fex

        vidcap = cv2.VideoCapture(filename)
        ret, frame = vidcap.read()
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')

        fps = int(vidcap.get(cv2.CAP_PROP_FPS))
        width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

        pb = MgProgressbar(
            total=length, prefix='Rendering sparse optical flow video:')

        out = cv2.VideoWriter(of + '_flow_sparse' + fex,
                              fourcc, fps, (width, height))

        # params for ShiTomasi corner detection
        feature_params = dict(maxCorners=corner_max_corners,
                              qualityLevel=corner_quality_level,
                              minDistance=corner_min_distance,
                              blockSize=corner_block_size)

        # Parameters for lucas kanade optical flow
        lk_params = dict(winSize=of_win_size,
                         maxLevel=of_max_level,
                         criteria=of_criteria)

        # Create some random colors
        color = np.random.randint(0, 255, (100, 3))

        # Take first frame and find corners in it
        ret, old_frame = vidcap.read()
        old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
        p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, **feature_params)

        # Create a mask image for drawing purposes
        mask = np.zeros_like(old_frame)

        ii = 0

        while(vidcap.isOpened()):
            ret, frame = vidcap.read()
            if ret == True:
                frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

                # calculate optical flow
                p1, st, err = cv2.calcOpticalFlowPyrLK(
                    old_gray, frame_gray, p0, None, **lk_params)

                # Select good points
                good_new = p1[st == 1]
                good_old = p0[st == 1]

                # draw the tracks
                for i, (new, old) in enumerate(zip(good_new, good_old)):
                    a, b = new.ravel()
                    c, d = old.ravel()
                    mask = cv2.line(mask, (a, b), (c, d), color[i].tolist(), 2)

                    if self.color == False:
                        frame = cv2.cvtColor(frame_gray, cv2.COLOR_GRAY2BGR)

                    frame = cv2.circle(
                        frame, (a, b), 5, color[i].tolist(), -1)

                img = cv2.add(frame, mask)

                out.write(img.astype(np.uint8))

                # Now update the previous frame and previous points
                old_gray = frame_gray.copy()
                p0 = good_new.reshape(-1, 1, 2)

            else:
                pb.progress(length)
                break

            pb.progress(ii)
            ii += 1

        out.release()

        destination_video = of + '_flow_sparse' + fex

        if self.has_audio:
            source_audio = extract_wav(of + fex)
            embed_audio_in_video(source_audio, destination_video)
            os.remove(source_audio)

        return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)
Exemplo n.º 6
0
def mg_motion(self,
              filtertype='Regular',
              thresh=0.05,
              blur='None',
              kernel_size=5,
              inverted_motionvideo=False,
              inverted_motiongram=False,
              unit='seconds',
              equalize_motiongram=True,
              save_plot=True,
              save_data=True,
              data_format="csv",
              save_motiongrams=True,
              save_video=True):
    """
    Finds the difference in pixel value from one frame to the next in an input video, 
    and saves the frames into a new video. Describes the motion in the recording.

    Parameters
    ----------
    - filtertype : {'Regular', 'Binary', 'Blob'}, optional

        `Regular` turns all values below `thresh` to 0.
        `Binary` turns all values below `thresh` to 0, above `thresh` to 1.
        `Blob` removes individual pixels with erosion method.
    - thresh : float, optional

        A number in the range of 0 to 1. Default is 0.05.
        Eliminates pixel values less than given threshold.
    - blur : {'None', 'Average'}, optional

        `Average` to apply a 10px * 10px blurring filter, `None` otherwise.
    - kernel_size : int, optional

        Default is 5. Size of structuring element.
    - inverted_motionvideo : bool, optional

        Default is `False`. If `True`, inverts colors of the motion video.
    - inverted_motiongram : bool, optional

        Default is `False`. If `True`, inverts colors of the motiongrams.
    - unit : {'seconds', 'samples'}, optional

        Unit in QoM plot.
    - equalize_motiongram : bool, optional

        Default is `True`. If `True`, converts the motiongrams to hsv-color 
        space and flattens the value channel (v).
    - save_plot : bool, optional

        Default is `True`. If `True`, outputs motion-plot.
    - save_data : bool, optional

        Default is `True`. If `True`, outputs motion-data.
    - data_format : {'csv', 'tsv', 'txt'}, optional

        Specifies format of motion-data.
    - save_motiongrams : bool, optional

        Default is `True`. If `True`, outputs motiongrams.
    - save_video : bool, optional

        Default is `True`. If `True`, outputs the motion video.

    Outputs
    -------
    - `filename`_motion.avi

        A video of the absolute difference between consecutive frames in the source video. 
    - `filename`_motion_com_qom.png

        A plot describing the centroid of motion and the quantity of motion in the source video.
    - `filename`_mgx.png

        A horizontal motiongram of the source video.
    - `filename`_mgy.png

        A vertical motiongram of the source video.
    - `filename`_motion.csv

        A text file containing the quantity of motion and the centroid of motion for each frame 
        in the source video with timecodes in milliseconds. Available formats: csv, tsv, txt.

    Returns
    -------
    - MgObject 

        A new MgObject pointing to the output '_motion' video file. If `save_video=False`, it 
        returns an MgObject pointing to the input video file.
    """

    if save_plot | save_data | save_motiongrams | save_video:

        # self.blur = blur
        # self.thresh = thresh
        # self.filtertype = filtertype
        of, fex = self.of, self.fex

        # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms
        if fex != '.avi':
            convert_to_avi(of + fex)
            fex = '.avi'
            filename = of + fex

        vidcap = cv2.VideoCapture(of + fex)
        ret, frame = vidcap.read()

        if save_video:
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            out = cv2.VideoWriter(of + '_motion' + fex, fourcc, self.fps,
                                  (self.width, self.height))

        if save_motiongrams:
            gramx = np.zeros([1, self.width, 3])
            gramy = np.zeros([self.height, 1, 3])
        if save_data | save_plot:
            time = np.array([])  # time in ms
            qom = np.array([])  # quantity of motion
            com = np.array([])  # centroid of motion

        ii = 0

        pgbar_text = 'Rendering motion' + ", ".join(
            np.array(["-video", "-grams", "-plots", "-data"])[np.array(
                [save_video, save_motiongrams, save_plot, save_data])]) + ":"

        pb = MgProgressbar(total=self.length, prefix=pgbar_text)

        if self.color == False:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            if save_motiongrams:
                gramx = np.zeros([1, self.width])
                gramy = np.zeros([self.height, 1])

        while (vidcap.isOpened()):
            if blur.lower() == 'average':
                prev_frame = cv2.blur(frame, (10, 10))
            elif blur.lower() == 'none':
                prev_frame = frame

            ret, frame = vidcap.read()
            if ret == True:
                if blur.lower() == 'average':
                    # The higher these numbers the more blur you get
                    frame = cv2.blur(frame, (10, 10))

                if self.color == False:
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

                frame = np.array(frame)
                frame = frame.astype(np.int32)

                if self.color == True:
                    motion_frame_rgb = np.zeros([self.height, self.width, 3])

                    for i in range(frame.shape[2]):
                        motion_frame = (np.abs(frame[:, :, i] -
                                               prev_frame[:, :, i])).astype(
                                                   np.uint8)
                        motion_frame = filter_frame(motion_frame, filtertype,
                                                    thresh, kernel_size)
                        motion_frame_rgb[:, :, i] = motion_frame

                    if save_motiongrams:
                        movement_y = np.mean(motion_frame_rgb, axis=1).reshape(
                            self.height, 1, 3)
                        movement_x = np.mean(motion_frame_rgb,
                                             axis=0).reshape(1, self.width, 3)
                        gramy = np.append(gramy, movement_y, axis=1)
                        gramx = np.append(gramx, movement_x, axis=0)

                else:
                    motion_frame = (np.abs(frame - prev_frame)).astype(
                        np.uint8)
                    motion_frame = filter_frame(motion_frame, filtertype,
                                                thresh, kernel_size)

                    if save_motiongrams:
                        movement_y = np.mean(motion_frame,
                                             axis=1).reshape(self.height, 1)
                        movement_x = np.mean(motion_frame,
                                             axis=0).reshape(1, self.width)
                        gramy = np.append(gramy, movement_y, axis=1)
                        gramx = np.append(gramx, movement_x, axis=0)

                if self.color == False:
                    motion_frame = cv2.cvtColor(motion_frame,
                                                cv2.COLOR_GRAY2BGR)
                    motion_frame_rgb = motion_frame

                if save_video:
                    if inverted_motionvideo:
                        out.write(
                            cv2.bitwise_not(motion_frame_rgb.astype(np.uint8)))
                    else:
                        out.write(motion_frame_rgb.astype(np.uint8))

                if save_plot | save_data:
                    combite, qombite = centroid(
                        motion_frame_rgb.astype(np.uint8), self.width,
                        self.height)
                    if ii == 0:
                        time = frame2ms(ii, self.fps)
                        com = combite.reshape(1, 2)
                        qom = qombite
                    else:
                        time = np.append(time, frame2ms(ii, self.fps))
                        com = np.append(com, combite.reshape(1, 2), axis=0)
                        qom = np.append(qom, qombite)
            else:
                pb.progress(self.length)
                break

            pb.progress(ii)
            ii += 1

        if save_motiongrams:
            if self.color == False:
                # Normalize before converting to uint8 to keep precision
                gramx = gramx / gramx.max() * 255
                gramy = gramy / gramy.max() * 255
                gramx = cv2.cvtColor(gramx.astype(np.uint8),
                                     cv2.COLOR_GRAY2BGR)
                gramy = cv2.cvtColor(gramy.astype(np.uint8),
                                     cv2.COLOR_GRAY2BGR)

            gramx = (gramx - gramx.min()) / (gramx.max() - gramx.min()) * 255.0
            gramy = (gramy - gramy.min()) / (gramy.max() - gramy.min()) * 255.0

            if equalize_motiongram:
                gramx = gramx.astype(np.uint8)
                gramx_hsv = cv2.cvtColor(gramx, cv2.COLOR_BGR2HSV)
                gramx_hsv[:, :, 2] = cv2.equalizeHist(gramx_hsv[:, :, 2])
                gramx = cv2.cvtColor(gramx_hsv, cv2.COLOR_HSV2BGR)

                gramy = gramy.astype(np.uint8)
                gramy_hsv = cv2.cvtColor(gramy, cv2.COLOR_BGR2HSV)
                gramy_hsv[:, :, 2] = cv2.equalizeHist(gramy_hsv[:, :, 2])
                gramy = cv2.cvtColor(gramy_hsv, cv2.COLOR_HSV2BGR)

            if inverted_motiongram:
                cv2.imwrite(of + '_mgx.png',
                            cv2.bitwise_not(gramx.astype(np.uint8)))
                cv2.imwrite(of + '_mgy.png',
                            cv2.bitwise_not(gramy.astype(np.uint8)))
            else:
                cv2.imwrite(of + '_mgx.png', gramx.astype(np.uint8))
                cv2.imwrite(of + '_mgy.png', gramy.astype(np.uint8))

        if save_data:
            save_txt(of, time, com, qom, self.width, self.height, data_format)

        if save_plot:
            plot_motion_metrics(of, self.fps, com, qom, self.width,
                                self.height, unit)

        vidcap.release()
        if save_video:
            out.release()
            destination_video = of + '_motion' + fex
            if self.has_audio:
                source_audio = extract_wav(of + fex)
                embed_audio_in_video(source_audio, destination_video)
                os.remove(source_audio)
            return musicalgestures.MgObject(destination_video,
                                            color=self.color,
                                            returned_by_process=True)
        else:
            return musicalgestures.MgObject(of + fex,
                                            color=self.color,
                                            returned_by_process=True)

    else:
        print("Nothing to render. Exiting...")
        return musicalgestures.MgObject(of + fex, returned_by_process=True)
Exemplo n.º 7
0
def mg_motionhistory(self,
                     history_length=10,
                     kernel_size=5,
                     filtertype='Regular',
                     thresh=0.05,
                     blur='None',
                     inverted_motionhistory=False):
    """
    Finds the difference in pixel value from one frame to the next in an input video, 
    and saves the difference frame to a history tail. The history frames are summed up 
    and normalized, and added to the current difference frame to show the history of 
    motion. 

    Parameters
    ----------
    - history_length : int, optional

        Default is 10. Number of frames to be saved in the history tail.
    - kernel_size : int, optional

        Default is 5. Size of structuring element.
    - filtertype : {'Regular', 'Binary', 'Blob'}, optional

        `Regular` turns all values below `thresh` to 0.
        `Binary` turns all values below `thresh` to 0, above `thresh` to 1.
        `Blob` removes individual pixels with erosion method.
    - thresh : float, optional

        A number in the range of 0 to 1. Default is 0.05.
        Eliminates pixel values less than given threshold.
    - blur : {'None', 'Average'}, optional

        `Average` to apply a 10px * 10px blurring filter, `None` otherwise.
    - inverted_motionhistory : bool, optional

        Default is `False`. If `True`, inverts colors of the motionhistory video.

    Outputs
    -------
    - `filename`_motionhistory.avi

    Returns
    -------
    - MgObject

        A new MgObject pointing to the output '_motionhistory' video file.
    """
    enhancement = 1  # This can be adjusted to higher number to make motion more visible. Use with caution to not make it overflow.
    self.filtertype = filtertype
    self.thresh = thresh
    self.blur = blur

    vidcap = cv2.VideoCapture(self.of + self.fex)
    ret, frame = vidcap.read()
    #of = os.path.splitext(self.filename)[0]
    fex = os.path.splitext(self.filename)[1]
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(self.of + '_motionhistory' + fex, fourcc, self.fps,
                          (self.width, self.height))

    ii = 0
    history = []

    if self.color == False:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    while (vidcap.isOpened()):
        if self.blur.lower() == 'average':
            prev_frame = cv2.blur(frame, (10, 10))
        elif self.blur.lower() == 'none':
            prev_frame = frame

        ret, frame = vidcap.read()

        if ret == True:
            if self.blur.lower() == 'average':
                # The higher these numbers the more blur you get
                frame = cv2.blur(frame, (10, 10))

            if self.color == False:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            frame = (np.array(frame)).astype(np.float64)

            if self.color == True:
                motion_frame_rgb = np.zeros([self.height, self.width, 3])
                for i in range(frame.shape[2]):
                    motion_frame = (np.abs(frame[:, :, i] -
                                           prev_frame[:, :, i])).astype(
                                               np.float64)
                    motion_frame = filter_frame(motion_frame, self.filtertype,
                                                self.thresh, kernel_size)
                    motion_frame_rgb[:, :, i] = motion_frame

                if len(history) > 0:
                    motion_history = motion_frame_rgb / (len(history) + 1)
                else:
                    motion_history = motion_frame_rgb

                for newframe in history:
                    motion_history += newframe / (len(history) + 1)
                # or however long history you would like
                if len(history) > history_length or len(
                        history) == history_length:
                    history.pop(0)  # pop first frame
                history.append(motion_frame_rgb)
                motion_history = motion_history.astype(
                    np.uint64)  # 0.5 to not overload it poor thing

            else:  # self.color = False
                motion_frame = (np.abs(frame - prev_frame)).astype(np.float64)
                motion_frame = filter_frame(motion_frame, self.filtertype,
                                            self.thresh, kernel_size)
                if len(history) > 0:
                    motion_history = motion_frame / (len(history) + 1)
                else:
                    motion_history = motion_frame

                for newframe in history:
                    motion_history += newframe / (len(history) + 1)

                # or however long history you would like
                if len(history) > history_length or len(
                        history) == history_length:
                    history.pop(0)  # pop first frame

                history.append(motion_frame)
                motion_history = motion_history.astype(np.uint64)

            if self.color == False:
                motion_history_rgb = cv2.cvtColor(
                    motion_history.astype(np.uint8), cv2.COLOR_GRAY2BGR)
            else:
                motion_history_rgb = motion_history
            if inverted_motionhistory:
                out.write(
                    cv2.bitwise_not(enhancement *
                                    motion_history_rgb.astype(np.uint8)))
            else:
                out.write(enhancement * motion_history_rgb.astype(np.uint8))
        else:
            mg_progressbar(self.length, self.length,
                           'Rendering motion history video:', 'Complete')
            break
        ii += 1
        mg_progressbar(ii, self.length, 'Rendering motion history video:',
                       'Complete')

    out.release()
    source_audio = extract_wav(self.of + self.fex)
    destination_video = self.of + '_motionhistory' + self.fex
    embed_audio_in_video(source_audio, destination_video)
    os.remove(source_audio)

    return musicalgestures.MgObject(destination_video,
                                    color=self.color,
                                    returned_by_process=True)
Exemplo n.º 8
0
def pose(self,
         model='mpi',
         device='cpu',
         threshold=0.1,
         downsampling_factor=4,
         save_data=True,
         data_format='csv',
         save_video=True):
    """
    Renders a video with the pose estimation (aka. "keypoint detection" or "skeleton tracking") overlaid on it. Outputs the predictions in a text file (default format is csv). Uses models from the [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) project.

    Args:
        model (str, optional): 'mpi' loads the model trained on the Multi-Person Dataset (MPII), 'coco' loads one trained on the COCO dataset. The MPII model outputs 15 points, while the COCO model produces 18 points. Defaults to 'mpi'.
        device (str, optional): Sets the backend to use for the neural network ('cpu' or 'gpu'). Defaults to 'cpu'.
        threshold (float, optional): The normalized confidence threshold that decides whether we keep or discard a predicted point. Discarded points get substituted with (0, 0) in the output data. Defaults to 0.1.
        downsampling_factor (int, optional): Decides how much we downsample the video before we pass it to the neural network. For example `downsampling_factor=4` means that the input to the network is one-fourth the resolution of the source video. Heaviver downsampling reduces rendering time but produces lower quality pose estimation. Defaults to 4.
        save_data (bool, optional): Whether we save the predicted pose data to a file. Defaults to True.
        data_format (str or list, optional): Specifies format of pose-data. Accepted values are 'csv', 'tsv' and 'txt'. For multiple output formats, use list, eg. ['csv', 'txt']. Defaults to 'csv'.
        save_video (bool, optional): Whether we save the video with the estimated pose overlaid on it. Defaults to True.

    Outputs:
        `filename`_pose.avi: The source video with pose overlay.
        `filename`_pose.`data_format`: A text file containing the normalized x and y coordinates of each keypoints (such as head, left shoulder, right shoulder, etc) for each frame in the source video with timecodes in milliseconds. Available formats: csv, tsv, txt.

    Returns:
        MgObject: An MgObject pointing to the output '_pose' video.
    """

    module_path = os.path.abspath(os.path.dirname(musicalgestures.__file__))

    if model.lower() == 'mpi':
        protoFile = module_path + '/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt'
        weightsFile = module_path + '/pose/mpi/pose_iter_160000.caffemodel'
        model = 'mpi'
        nPoints = 15
        POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [1, 5], [5, 6], [6, 7],
                      [1, 14], [14, 8], [8, 9], [9, 10], [14, 11], [11, 12],
                      [12, 13]]
    elif model.lower() == 'coco':
        protoFile = module_path + '/pose/coco/pose_deploy_linevec.prototxt'
        weightsFile = module_path + '/pose/coco/pose_iter_440000.caffemodel'
        model = 'coco'
        nPoints = 18
        POSE_PAIRS = [[1, 0], [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7],
                      [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13],
                      [0, 14], [0, 15], [14, 16], [15, 17]]
    else:
        print(f'Unrecognized model "{model}", switching to default (mpi).')
        protoFile = module_path + '/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt'
        weightsFile = module_path + '/pose/mpi/pose_iter_160000.caffemodel'
        model = 'mpi'

    # Check if .caffemodel file exists, download if necessary
    if not os.path.exists(weightsFile):
        print(
            'Could not find weights file. Do you want to download it (~200MB)? (y/n)'
        )
        answer = input()
        if answer.lower() == 'n':
            print('Ok. Exiting...')
            return musicalgestures.MgObject(self.filename,
                                            color=self.color,
                                            returned_by_process=True)
        elif answer.lower() == 'y':
            download_model(model)
        else:
            print(f'Unrecognized answer "{answer}". Exiting...')
            return musicalgestures.MgObject(self.filename,
                                            color=self.color,
                                            returned_by_process=True)

    # Read the network into Memory
    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
    if device == "cpu":
        net.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU)
    elif device == "gpu":
        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
    else:
        print(f'Unrecognized device "{device}", switching to default (cpu).')
        net.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU)

    of, fex = os.path.splitext(self.filename)

    if fex != '.avi':
        convert_to_avi(of + fex)
        fex = '.avi'
        filename = of + fex
    else:
        filename = self.filename

    vidcap = cv2.VideoCapture(filename)
    ret, frame = vidcap.read()

    fps = int(vidcap.get(cv2.CAP_PROP_FPS))
    width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

    inWidth = int(roundup(width / downsampling_factor, 2))
    inHeight = int(roundup(height / downsampling_factor, 2))

    pb = MgProgressbar(total=length, prefix='Rendering pose estimation video:')

    if save_video:
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(of + '_pose' + fex, fourcc, fps, (width, height))

    ii = 0
    data = []

    while (vidcap.isOpened()):
        ret, frame = vidcap.read()
        if ret:

            inpBlob = cv2.dnn.blobFromImage(frame,
                                            1.0 / 255, (inWidth, inHeight),
                                            (0, 0, 0),
                                            swapRB=False,
                                            crop=False)

            net.setInput(inpBlob)

            output = net.forward()

            H = output.shape[2]
            W = output.shape[3]
            points = []

            for i in range(nPoints):

                # confidence map of corresponding body's part.
                probMap = output[0, i, :, :]

                # Find global maxima of the probMap.
                minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)

                # Scale the point to fit on the original image
                x = (width * point[0]) / W
                y = (height * point[1]) / H

                if prob > threshold:
                    points.append((int(x), int(y)))

                else:
                    points.append(None)

            if save_data:
                time = frame2ms(ii, fps)
                points_list = [[
                    list(point)[0] / width,
                    list(point)[1] / height,
                ] if point != None else [0, 0] for point in points]
                points_list_flat = itertools.chain.from_iterable(points_list)
                datapoint = [time]
                datapoint += points_list_flat
                data.append(datapoint)

            for pair in POSE_PAIRS:
                partA = pair[0]
                partB = pair[1]

                if points[partA] and points[partB]:
                    cv2.line(frame,
                             points[partA],
                             points[partB], (0, 255, 255),
                             2,
                             lineType=cv2.LINE_AA)
                    cv2.circle(frame,
                               points[partA],
                               4, (0, 0, 255),
                               thickness=-1,
                               lineType=cv2.FILLED)
                    cv2.circle(frame,
                               points[partB],
                               4, (0, 0, 255),
                               thickness=-1,
                               lineType=cv2.FILLED)

            if save_video:
                out.write(frame.astype(np.uint8))

        else:
            pb.progress(length)
            break

        pb.progress(ii)
        ii += 1

    if save_video:
        out.release()
        destination_video = of + '_pose' + fex
        if self.has_audio:
            source_audio = extract_wav(of + fex)
            embed_audio_in_video(source_audio, destination_video)
            os.remove(source_audio)

    def save_txt(of, width, height, model, data, data_format):
        """
        Helper function to export pose estimation data as textfile(s).
        """
        def save_single_file(of, width, height, model, data, data_format):
            """
            Helper function to export pose estimation data as a textfile using pandas.
            """

            coco_table = [
                'Nose', 'Neck', 'Right Shoulder', 'Right Elbow', 'Right Wrist',
                'Left Shoulder', 'Left Elbow', 'Left Wrist', 'Right Hip',
                'Right Knee', 'Right Ankle', 'Left Hip', 'Left Knee',
                'Left Ankle', 'Right Eye', 'Left Eye', 'Right Ear', 'Left Ear'
            ]
            mpi_table = [
                'Head', 'Neck', 'Right Shoulder', 'Right Elbow', 'Right Wrist',
                'Left Shoulder', 'Left Elbow', 'Left Wrist', 'Right Hip',
                'Right Knee', 'Right Ankle', 'Left Hip', 'Left Knee',
                'Left Ankle', 'Chest'
            ]
            headers = ['Time']

            table_to_use = []
            if model.lower() == 'mpi':
                table_to_use = mpi_table
            else:
                table_to_use = coco_table

            for i in range(len(table_to_use)):
                header_x = table_to_use[i] + ' X'
                header_y = table_to_use[i] + ' Y'
                headers.append(header_x)
                headers.append(header_y)

            data_format = data_format.lower()

            df = pd.DataFrame(data=data, columns=headers)

            if data_format == "tsv":
                with open(of + '_pose.tsv', 'wb') as f:
                    head_str = ''
                    for head in headers:
                        head_str += head + '\t'
                    head_str += '\n'
                    f.write(head_str.encode())
                    fmt_list = ['%d']
                    fmt_list += [
                        '%.15f' for item in range(len(table_to_use) * 2)
                    ]
                    np.savetxt(f, df.values, delimiter='\t', fmt=fmt_list)

            elif data_format == "csv":
                df.to_csv(of + '_pose.csv', index=None)

            elif data_format == "txt":
                with open(of + '_pose.txt', 'wb') as f:
                    head_str = ''
                    for head in headers:
                        head_str += head + ' '
                    head_str += '\n'
                    f.write(head_str.encode())
                    fmt_list = ['%d']
                    fmt_list += [
                        '%.15f' for item in range(len(table_to_use) * 2)
                    ]
                    np.savetxt(f, df.values, delimiter=' ', fmt=fmt_list)
            elif data_format not in ["tsv", "csv", "txt"]:
                print(
                    f"Invalid data format: '{data_format}'.\nFalling back to '.csv'."
                )

        if type(data_format) == str:
            save_single_file(of, width, height, model, data, data_format)

        elif type(data_format) == list:
            if all([
                    item.lower() in ["csv", "tsv", "txt"]
                    for item in data_format
            ]):
                data_format = list(set(data_format))
                [
                    save_single_file(of, width, height, model, data, item)
                    for item in data_format
                ]
            else:
                print(
                    f"Unsupported formats in {data_format}.\nFalling back to '.csv'."
                )
                save_single_file(of, width, height, model, data, "csv")

    save_txt(of, width, height, model, data, data_format)

    return musicalgestures.MgObject(destination_video,
                                    color=self.color,
                                    returned_by_process=True)