Exemplo n.º 1
0
def test_series_unclosed():
    im1 = imageio.imread("imageio:chelsea.png")
    ims1 = [im1, im1 * 0.8, im1 * 0.5]

    fname = os.path.join(test_dir, "chelseam.bsdf")
    w = imageio.get_writer(fname)
    for im in ims1:
        w.append_data(im)
    w._close = lambda: None  # nope, leave stream open
    w.close()

    # read non-streaming, reads all frames on opening (but skips over blobs
    r = imageio.get_reader(fname)
    assert r.get_length() == 3  # not np.inf because not streaming

    # read streaming and get all
    r = imageio.get_reader(fname, random_access=False)
    assert r.get_length() == np.inf
    #
    ims2 = [im for im in r]
    assert len(ims2) == 3 and all(np.all(ims1[i] == ims2[i]) for i in range(3))

    # read streaming and read one
    r = imageio.get_reader(fname, random_access=False)
    assert r.get_length() == np.inf
    #
    assert np.all(ims1[2] == r.get_data(2))
Exemplo n.º 2
0
def test_writer_pixelformat_size_verbose(tmpdir):
    # Check that video pixel format and size get written as expected.
    need_internet()
    # Make sure verbose option works and that default pixelformat is yuv420p
    tmpf = tmpdir.join('test.mp4', fps=30)
    W = imageio.get_writer(str(tmpf), ffmpeg_log_level='debug')
    nframes = 4  # Number of frames in video
    for i in range(nframes):
        # Use size divisible by 16 or it gets changed.
        W.append_data(np.zeros((64, 64, 3), np.uint8))
    W.close()

    # Check that video is correct size & default output video pixel format
    # is correct
    W = imageio.get_reader(str(tmpf))
    assert len(W) == nframes
    assert "64x64" in W._stderr_catcher.header
    assert "yuv420p" in W._stderr_catcher.header

    # Now check that macroblock size gets turned off if requested
    W = imageio.get_writer(str(tmpf), macro_block_size=None,
                           ffmpeg_log_level='debug')
    for i in range(nframes):
        W.append_data(np.zeros((100, 106, 3), np.uint8))
    W.close()
    W = imageio.get_reader(str(tmpf))
    assert len(W) == nframes
    assert "106x100" in W._stderr_catcher.header
    assert "yuv420p" in W._stderr_catcher.header

    # Now double check values different than default work
    W = imageio.get_writer(str(tmpf), macro_block_size=4,
                           ffmpeg_log_level='debug')
    for i in range(nframes):
        W.append_data(np.zeros((64, 65, 3), np.uint8))
    W.close()
    W = imageio.get_reader(str(tmpf))
    assert len(W) == nframes
    assert "68x64" in W._stderr_catcher.header
    assert "yuv420p" in W._stderr_catcher.header

    # Now check that the macroblock works as expected for the default of 16
    W = imageio.get_writer(str(tmpf), ffmpeg_log_level='debug')
    for i in range(nframes):
        W.append_data(np.zeros((111, 140, 3), np.uint8))
    W.close()
    W = imageio.get_reader(str(tmpf))
    assert len(W) == nframes
    # Check for warning message with macroblock
    assert "144x112" in W._stderr_catcher.header
    assert "yuv420p" in W._stderr_catcher.header
Exemplo n.º 3
0
def test_writer_pixelformat_size_verbose(tmpdir):
    # Check that video pixel format and size get written as expected.

    # Make sure verbose option works and that default pixelformat is yuv420p
    tmpf = tmpdir.join("test.mp4")
    W = imageio.get_writer(str(tmpf), ffmpeg_log_level="warning")
    nframes = 4  # Number of frames in video
    for i in range(nframes):
        # Use size divisible by 16 or it gets changed.
        W.append_data(np.zeros((64, 64, 3), np.uint8))
    W.close()

    # Check that video is correct size & default output video pixel format
    # is correct
    W = imageio.get_reader(str(tmpf))
    assert W.count_frames() == nframes
    assert W._meta["size"] == (64, 64)
    assert "yuv420p" == W._meta["pix_fmt"]

    # Now check that macroblock size gets turned off if requested
    W = imageio.get_writer(str(tmpf), macro_block_size=1, ffmpeg_log_level="warning")
    for i in range(nframes):
        W.append_data(np.zeros((100, 106, 3), np.uint8))
    W.close()
    W = imageio.get_reader(str(tmpf))
    assert W.count_frames() == nframes
    assert W._meta["size"] == (106, 100)
    assert "yuv420p" == W._meta["pix_fmt"]

    # Now double check values different than default work
    W = imageio.get_writer(str(tmpf), macro_block_size=4, ffmpeg_log_level="warning")
    for i in range(nframes):
        W.append_data(np.zeros((64, 65, 3), np.uint8))
    W.close()
    W = imageio.get_reader(str(tmpf))
    assert W.count_frames() == nframes
    assert W._meta["size"] == (68, 64)
    assert "yuv420p" == W._meta["pix_fmt"]

    # Now check that the macroblock works as expected for the default of 16
    W = imageio.get_writer(str(tmpf), ffmpeg_log_level="debug")
    for i in range(nframes):
        W.append_data(np.zeros((111, 140, 3), np.uint8))
    W.close()
    W = imageio.get_reader(str(tmpf))
    assert W.count_frames() == nframes
    # Check for warning message with macroblock
    assert W._meta["size"] == (144, 112)
    assert "yuv420p" == W._meta["pix_fmt"]
def main_testing():
    '''cd /Users/kentchiu/Night_Graden/Project/2016_MIT/Auto_HOG_SVM/'''
    from common_tool_agent.common_func import non_max_suppression
    from common_tool_agent.conf import Conf
    from common_tool_agent.descriptor_agent.hog import HOG
    from common_tool_agent.detect import ObjectDetector
    from sklearn.externals import joblib
    from skimage.io import imread
    from skimage.io import imshow as show
    from common_tool_agent.common_func import auto_resized
    import argparse 
    import numpy as np
    import cv2
    import os
    import imageio

    # load vid : )
    vid = imageio.get_reader('/Users/kentchiu/MIT_Vedio/2016-01-21/10.167.10.158_01_20160121082638418_1.mp4')

    #conf = Conf('conf_hub/conf_001.json')
    conf = Conf('conf_hub/conf_001.json')

    # loading model
    clf = joblib.load(conf['model_ph'])

    # initialize feature container
    hog = HOG(orientations=conf["orientations"], pixelsPerCell=tuple(conf["pixels_per_cell"]),
    cellsPerBlock=tuple(conf["cells_per_block"]), normalize=conf["normalize"])
    # initialize the object detector
    od = ObjectDetector(clf, hog)
Exemplo n.º 5
0
def default_loader(path):
    reader = imageio.get_reader(path)
    video = np.zeros((reader._meta['nframes'], reader._meta['size']
                      [1], reader._meta['size'][0]), dtype=np.uint8)
    for i, im in enumerate(reader):
        video[i, :, :] = im.mean(2)
    return video
Exemplo n.º 6
0
    def _select_features(self, video_file):
        '''
        Select a sequence of frames from video_file and return them as
        a Tensor.
        '''
        video_reader = imageio.get_reader(video_file, 'ffmpeg')
        num_frames   = len(video_reader)
        if self.sequence_length > num_frames:
            raise ValueError('Sequence length {} is larger then the total number of frames {} in {}.'.format(self.sequence_length, num_frames, video_file))

        # select which sequence frames to use.
        step = 1
        expanded_sequence = self.sequence_length
        if num_frames > 2*self.sequence_length:
            step = 2
            expanded_sequence = 2*self.sequence_length

        seq_start = int(num_frames/2) - int(expanded_sequence/2)
        if self.is_training:
            seq_start = randint(0, num_frames - expanded_sequence)

        frame_range = [seq_start + step*i for i in range(self.sequence_length)]            
        video_frames = []
        for frame_index in frame_range:
            video_frames.append(self._read_frame(video_reader.get_data(frame_index)))
        
        return np.stack(video_frames, axis=1)
Exemplo n.º 7
0
def split_data(groups, file_ext):
    '''
    Split the data at random for train, eval and test set.
    '''
    group_count = len(groups)
    indices = np.arange(group_count)

    np.random.seed(0) # Make it deterministic.
    np.random.shuffle(indices)

    # 80% training and 20% test.
    train_count = int(0.8 * group_count)
    test_count  = group_count - train_count

    train = []
    test  = []

    for i in range(train_count):
        group = groups[indices[i]]
        video_files = os.listdir(group[0])
        for video_file in video_files:
            video_file_path = os.path.join(group[0], video_file)
            if os.path.isfile(video_file_path):
                video_file_path = os.path.abspath(video_file_path)
                ext = os.path.splitext(video_file_path)[1]
                if (ext == file_ext):
                    # make sure we have enough frames and the file isn't corrupt
                    video_reader = imageio.get_reader(video_file_path, 'ffmpeg')                    
                    if len(video_reader) >= 16:
                        train.append([video_file_path, group[1]])

    for i in range(train_count, train_count + test_count):
        group = groups[indices[i]]
        video_files = os.listdir(group[0])
        for video_file in video_files:
            video_file_path = os.path.join(group[0], video_file)
            if os.path.isfile(video_file_path):
                video_file_path = os.path.abspath(video_file_path)
                ext = os.path.splitext(video_file_path)[1]
                if (ext == file_ext):
                    # make sure we have enough frames and the file isn't corrupt
                    video_reader = imageio.get_reader(video_file_path, 'ffmpeg')
                    if len(video_reader) >= 16:
                        test.append([video_file_path, group[1]])

    return train, test
def background_substraction(filepath):
    #fgbg = cv2.createBackgroundSubtractorMOG()
    fgbg = cv2.createBackgroundSubtractorMOG2()
    # training with frames
    vid = imageio.get_reader(filepath)
    for i in range(100):
        fgmask = fgbg.apply(vid.get_data(i))
    cv2.imshow('frame',fgmask)
    return 
Exemplo n.º 9
0
 def _configure(self, camera_num=0):
     self.camera_num = camera_num
     reader = imageio.get_reader('<video{}>'.format(self.camera_num))
     self.metadata = reader.get_meta_data()
     reader.close()
     
     s = self.metadata['size']
     self.output.spec['shape'] = (s[1], s[0], 3,)
     self.output.spec['sample_rate'] = float(self.metadata['fps'])
Exemplo n.º 10
0
def viedeo_to_ascii(filename):
    reader = imageio.get_reader(filename)
    frames = []
    bar = progressbar.ProgressBar()

    for im in bar(reader):
        image = Image.fromarray(im)
        frames += [pic2ascii(image)]
    return frames
def configuration_2():
    vid = imageio.get_reader('/Users/kentchiu/MIT_Vedio/2016-01-21/10.167.10.158_01_20160121082638418_2.mp4')
    conf = Conf('conf_hub/conf_pureScrewDriver_2.json')
    clf = joblib.load(conf['model_ph'])
    # initialize feature container
    hog = HOG(orientations=conf["orientations"], pixelsPerCell=tuple(conf["pixels_per_cell"]),
    cellsPerBlock=tuple(conf["cells_per_block"]), normalize=conf["normalize"])
    # initialize the object detector
    od = ObjectDetector(clf, hog)
Exemplo n.º 12
0
def bounds2video(bounds_file,video_in, video_out, subsampleRate, speedup):
    """
        This function reads a .csv file with bound and creates a split video based on them.
    """

    # reading the bounds
    bounds = pd.read_csv(bounds_file, index_col = None)

    fig = plt.figure(figsize = (15,10))

    # setting up the video reader and writer
    vid_in = imageio.get_reader(video_in,'ffmpeg')
    vid_out = imageio.get_writer(video_out,fps = 30/subsampleRate*speedup)

    # create a binary indicator for where the scenes are
    frame_idx = np.arange(0,len(vid_in),subsampleRate)
    binary = np.zeros((len(frame_idx),))

    print(subsampleRate)

    for lb,ub in zip(bounds['LB'],bounds['UB']):
        binary[round(lb/subsampleRate*10):round(ub/subsampleRate*10)] = 1




    for i in range(len(frame_idx)):
    # for i in range(100):

        print(i)


        im = vid_in.get_data(frame_idx[i])
        plt.subplot(211)
        plt.imshow(im, aspect = 'equal')
        plt.axis('off')
        plt.title('Raw Video')


        plt.subplot(212)
        plt.imshow(im*int(binary[i]), aspect = 'equal')
        plt.axis('off')
        plt.title('Static Scenes')


        # convert the plot
        fig.canvas.draw()
        fig_data = np.fromstring(fig.canvas.tostring_rgb(),dtype = np.uint8, sep='')
        fig_data = fig_data.reshape(fig.canvas.get_width_height()[::-1] + (3,))

        # vid_out.append_data(fig_data.astype(np.dtype('uint8')))
        vid_out.append_data(fig_data)
        # vid_out.append_data(im)
        plt.clf()

    vid_in.close()
    vid_out.close()
Exemplo n.º 13
0
    def __init__(self, filename, **kwargs):
        if imageio is None:
            raise ImportError('The ImageIOReader requires imageio to work.')
        self.reader = imageio.get_reader(filename, **kwargs)
        self.filename = filename
        self._len = self.reader.get_length()

        first_frame = self.get_frame(0)
        self._shape = first_frame.shape
        self._dtype = first_frame.dtype
Exemplo n.º 14
0
def test_overload_fps():

    need_internet()

    # Native
    r = imageio.get_reader("imageio:cockatoo.mp4")
    assert r.count_frames() == 280  # native
    assert int(r._meta["fps"] * r._meta["duration"] + 0.5) == 280
    ims = [im for im in r]
    assert len(ims) == 280
    # imageio.mimwrite('~/parot280.gif', ims[:30])

    # Less
    r = imageio.get_reader("imageio:cockatoo.mp4", fps=8)
    # assert r.count_frames() == 112  # cant :(
    assert int(r._meta["fps"] * r._meta["duration"] + 0.5) == 112  # note the mismatch
    ims = [im for im in r]
    assert len(ims) == 114
    # imageio.mimwrite('~/parot112.gif', ims[:30])

    # More
    r = imageio.get_reader("imageio:cockatoo.mp4", fps=24)
    # assert r.count_frames() == 336  # cant :(
    ims = [im for im in r]
    assert int(r._meta["fps"] * r._meta["duration"] + 0.5) == 336
    assert len(ims) == 336
    # imageio.mimwrite('~/parot336.gif', ims[:30])

    # Do we calculate nframes correctly? To be fair, the reader wont try to
    # read beyond what it thinks how many frames it has. But this at least
    # makes sure that this works.
    for fps in (8.0, 8.02, 8.04, 8.06, 8.08):
        r = imageio.get_reader("imageio:cockatoo.mp4", fps=fps)
        n = int(r._meta["fps"] * r._meta["duration"] + 0.5)
        i = 0
        try:
            while True:
                r.get_next_data()
                i += 1
        except (StopIteration, IndexError):
            pass
        # print(r._meta['duration'], r._meta['fps'], r._meta['duration'] * fps, r._meta['nframes'], n)
        assert n - 2 <= i <= n + 2
Exemplo n.º 15
0
def test_grab_simulated():
    # Hard to test for real, if only because its only fully suppored on
    # Windows, but we can monkey patch so we can test all the imageio bits.

    imageio.plugins.grab.BaseGrabFormat._ImageGrab = FakeImageGrab
    imageio.plugins.grab.BaseGrabFormat._pillow_imported = True
    _plat = sys.platform
    sys.platform = "win32"

    try:

        im = imageio.imread("<screen>")
        assert im.shape == (8, 8, 3)

        reader = imageio.get_reader("<screen>")
        im1 = reader.get_data(0)
        im2 = reader.get_data(0)
        im3 = reader.get_data(1)
        assert im1.shape == (8, 8, 3)
        assert im2.shape == (8, 8, 3)
        assert im3.shape == (8, 8, 3)

        im = imageio.imread("<clipboard>")
        assert im.shape == (9, 9, 3)

        reader = imageio.get_reader("<clipboard>")
        im1 = reader.get_data(0)
        im2 = reader.get_data(0)
        im3 = reader.get_data(1)
        assert im1.shape == (9, 9, 3)
        assert im2.shape == (9, 9, 3)
        assert im3.shape == (9, 9, 3)

        # Grabbing from clipboard can fail if there is no image data to grab
        FakeImageGrab.has_clipboard = False
        with raises(RuntimeError):
            im = imageio.imread("<clipboard>")

    finally:
        sys.platform = _plat
        imageio.plugins.grab.BaseGrabFormat._ImageGrab = None
        imageio.plugins.grab.BaseGrabFormat._pillow_imported = False
        FakeImageGrab.has_clipboard = True
Exemplo n.º 16
0
 def __init__(self, filename, n_frames=None):
     self.reader = imageio.get_reader(filename,  'ffmpeg')
     self.fps = self.reader.get_meta_data()['fps']
     vid_shape = self.reader.get_data(0).shape[:2]
     self.vid_shape = np.array(vid_shape)
     self.center = self.vid_shape/2
     if n_frames:
         assert(n_frames < self.reader.get_length())
         self.n_frames = n_frames
     else:
         self.n_frames = self.reader.get_length() - 1
Exemplo n.º 17
0
def imageio_gif_importer(filepath, asset=None, normalise=True, **kwargs):
    r"""
    Imports GIF images using freeimagemulti plugin from the imageio library.
    Returns a :map:`LazyList` that gives lazy access to the GIF on a per-frame
    basis.

    Parameters
    ----------
    filepath : `Path`
        Absolute filepath of the video.
    asset : `object`, optional
        An optional asset that may help with loading. This is unused for this
        implementation.
    normalise : `bool`, optional
        If ``True``, normalise between 0.0 and 1.0 and convert to float. If
        ``False`` just return whatever imageio imports.
    \**kwargs : `dict`, optional
        Any other keyword arguments.

    Returns
    -------
    image : :map:`LazyList`
        A :map:`LazyList` containing :map:`Image` or subclasses per frame
        of the GIF.
    """
    import imageio

    reader = imageio.get_reader(str(filepath), format='gif', mode='I')

    def imageio_to_menpo(imio_reader, index):
        pixels = imio_reader.get_data(index)
        pixels = channels_to_front(pixels)

        if pixels.shape[0] == 4:
            # If normalise is False, then we return the alpha as an extra
            # channel, which can be useful if the alpha channel has semantic
            # meanings!
            if normalise:
                p = normalize_pixels_range(pixels[:3])
                return MaskedImage(p, mask=pixels[-1].astype(np.bool),
                                   copy=False)
            else:
                return Image(pixels, copy=False)

        # Assumed not to have an Alpha channel
        if normalise:
            return Image(normalize_pixels_range(pixels), copy=False)
        else:
            return Image(pixels, copy=False)

    index_callable = partial(imageio_to_menpo, reader)
    ll = LazyList.init_from_index_callable(index_callable,
                                           reader.get_length())
    return ll
Exemplo n.º 18
0
    def set_videos(self, videofiles, videotimes = None):
        
        self.cv_image_widgets = [ ]
        self.grid_changing =False
        
        self.videofiles = videofiles
        self.videotimes = videotimes
        if self.videotimes is None:
            self.videotimes = [None]*len(videofiles)

        all = [ ]
        for i, vid in enumerate(self.videofiles):
            name = '{} {}'.format(i, os.path.basename(vid))
            all.append({ 'name': name, 'type' : 'group', 'children' : param_by_channel})
        self.paramVideos = pg.parametertree.Parameter.create(name='Videos', type='group', children=all)
        
        self.allParams = pg.parametertree.Parameter.create(name = 'all param', type = 'group', children = [self.paramGlobal,self.paramVideos  ])
        
        self.paramControler = VideoViewerControler(viewer = self)
        
        
        #~ self.captures = [ ]
        self.videos = [ ]
        self.video_length = [ ]
        self.video_fps = [ ]
        for i, vid in enumerate(self.videofiles):
            #~ cap = cv2.VideoCapture(vid)
            #~ self.captures.append(cap)
            #~ self.video_length.append(cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT))
            #~ self.video_fps.append(cap.get(cv2.cv.CV_CAP_PROP_FPS))
            
            if mode =='imageio':
                v  = imageio.get_reader(vid, format = 'avi', mode = 'I')
                self.videos.append(v)
                self.video_length.append(v.get_meta_data()['nframes'])
                self.video_fps.append(v.get_meta_data()['fps'])
            elif mode =='skimage':
                #~ v  = skimage.io.Video(source = vid, backend = 'gstreamer')
                v  = skimage.io.Video(source = vid, backend = 'opencv')
                self.videos.append(v)
                self.video_length.append(v.frame_count())
                #~ self.video_fps.append(25.)
                self.video_fps.append(float(v.duration())/v.frame_count())
            
            
            
            
        #~ print self.video_fps, self.video_length
        self.create_grid()
        
        self.paramVideos.sigTreeStateChanged.connect(self.create_grid)
        self.paramGlobal.param('nb_column').sigValueChanged.connect(self.create_grid)
        
        self.proxy = pg.SignalProxy(self.allParams.sigTreeStateChanged, rateLimit=5, delay=0.1, slot=self.refresh)
Exemplo n.º 19
0
def test_writer_ffmpeg_params(tmpdir):
    need_internet()
    # Test optional ffmpeg_params with a valid option
    tmpf = tmpdir.join('test.mp4')
    W = imageio.get_writer(str(tmpf), ffmpeg_params=['-vf', 'scale=320:240'])
    for i in range(10):
        W.append_data(np.zeros((100, 100, 3), np.uint8))
    W.close()
    W = imageio.get_reader(str(tmpf))
    # Check that the optional argument scaling worked.
    assert "320x240" in W._stderr_catcher.header
Exemplo n.º 20
0
def test_writer_wmv(tmpdir):
    # WMV has different default codec, make sure it works.
    tmpf = tmpdir.join("test.wmv")
    W = imageio.get_writer(str(tmpf), ffmpeg_params=["-v", "info"])
    for i in range(10):
        W.append_data(np.zeros((100, 100, 3), np.uint8))
    W.close()

    W = imageio.get_reader(str(tmpf))
    # Check that default encoder is msmpeg4 for wmv
    assert W._meta["codec"].startswith("msmpeg4")
Exemplo n.º 21
0
def test_writer_ffmpeg_params(tmpdir):
    # Test optional ffmpeg_params with a valid option
    # Also putting in an image size that is not divisible by macroblock size
    # To check that the -vf scale overwrites what it does.
    tmpf = tmpdir.join("test.mp4")
    W = imageio.get_writer(str(tmpf), ffmpeg_params=["-vf", "scale=320:240"])
    for i in range(10):
        W.append_data(np.zeros((100, 100, 3), np.uint8))
    W.close()
    W = imageio.get_reader(str(tmpf))
    # Check that the optional argument scaling worked.
    assert W._meta["size"] == (320, 240)
Exemplo n.º 22
0
def test_random_access():

    im1 = imageio.imread("imageio:chelsea.png")
    ims1 = [im1, im1 * 0.8, im1 * 0.5]

    fname = os.path.join(test_dir, "chelseam.bsdf")
    imageio.mimsave(fname, ims1)

    r = imageio.get_reader(fname)

    for i in (1, 0, 2, 0, 1, 2):
        assert np.all(ims1[i] == r.get_data(i))
Exemplo n.º 23
0
def test_writer_wmv(tmpdir):
    need_internet()
    # WMV has different default codec, make sure it works.
    tmpf = tmpdir.join('test.wmv')
    W = imageio.get_writer(str(tmpf), ffmpeg_params=['-v', 'info'])
    for i in range(10):
        W.append_data(np.zeros((100, 100, 3), np.uint8))
    W.close()

    W = imageio.get_reader(str(tmpf))
    # Check that default encoder is msmpeg4 for wmv
    assert "msmpeg4" in W._stderr_catcher.header
    def convert_to_gif(inputpath, target_format):
        """Reference: http://imageio.readthedocs.io/en/latest/examples.html#convert-a-movie"""
        outputpath = os.path.splitext(inputpath)[0] + target_format

        reader = imageio.get_reader(inputpath)
        fps = reader.get_meta_data()['fps']

        writer = imageio.get_writer(outputpath, fps=fps)
        for i, im in enumerate(reader):
            sys.stdout.write("\rframe {0}".format(i))
            sys.stdout.flush()
            writer.append_data(im)
        writer.close()
Exemplo n.º 25
0
def test_writer_file_properly_closed(tmpdir):
    # Test to catch if file is correctly closed.
    # Otherwise it won't play in most players. This seems to occur on windows.
    tmpf = tmpdir.join("test.mp4")
    W = imageio.get_writer(str(tmpf))
    for i in range(12):
        W.append_data(np.zeros((100, 100, 3), np.uint8))
    W.close()
    W = imageio.get_reader(str(tmpf))
    # If Duration: N/A reported by ffmpeg, then the file was not
    # correctly closed.
    # This will cause the file to not be readable in many players.
    assert 1.1 < W._meta["duration"] < 1.3
Exemplo n.º 26
0
def test_writer_ffmpeg_params(tmpdir):
    need_internet()
    # Test optional ffmpeg_params with a valid option
    # Also putting in an image size that is not divisible by macroblock size
    # To check that the -vf scale overwrites what it does.
    tmpf = tmpdir.join('test.mp4')
    W = imageio.get_writer(str(tmpf), ffmpeg_params=['-vf', 'scale=320:240'])
    for i in range(10):
        W.append_data(np.zeros((100, 100, 3), np.uint8))
    W.close()
    W = imageio.get_reader(str(tmpf))
    # Check that the optional argument scaling worked.
    assert "320x240" in W._stderr_catcher.header
Exemplo n.º 27
0
    def __init__(self, path_to_file, **kwargs):
        self.debug = kwargs['debug'] if 'debug' in kwargs else False
        self.paths = {'input': path_to_file,
                      'output': self._get_output_directory(path_to_file)}
        self.options = self._get_options(kwargs)
        self.tracker = FrameTracker(self.options['blocksize'], self.options['grid'])

        try:
            self.video = imageio.get_reader(self.paths['input'])
        except IOError:
            self.exit('video not found')

        self.frame_maps = []
        self.consecutive = 0
Exemplo n.º 28
0
def loadImageIOVideo(path):
    if not os.path.exists(path):
        print("ERROR: Video path not found: %s"%path)
        return None
    import imageio
    videoReader = imageio.get_reader(path, 'ffmpeg')
    NFrames = videoReader.get_length()
    F0 = videoReader.get_data(0)
    IDims = F0.shape
    I = np.zeros((NFrames, F0.size))
    I[0, :] = np.array(F0.flatten(), dtype = np.float32)/255.0
    for i in range(1, NFrames):
        I[i, :] = np.array(videoReader.get_data(i).flatten(), dtype = np.float32)/255.0
    return (I, IDims)
Exemplo n.º 29
0
def test_reverse_read(tmpdir):
    # Ensure we can read a file in reverse without error.

    tmpf = tmpdir.join("test_vid.mp4")
    W = imageio.get_writer(str(tmpf))
    for i in range(120):
        W.append_data(np.zeros((64, 64, 3), np.uint8))
    W.close()

    W = imageio.get_reader(str(tmpf))
    for i in range(W.count_frames() - 1, 0, -1):
        print("reading", i)
        W.get_data(i)
    W.close()
Exemplo n.º 30
0
def test_writer_pixelformat_verbose(tmpdir):
    need_internet()
    # Make sure verbose option works and that default pixelformat is yuv420p
    tmpf = tmpdir.join('test.mp4')
    W = imageio.get_writer(str(tmpf), ffmpeg_log_level='debug')
    for i in range(10):
        W.append_data(np.zeros((100, 100, 3), np.uint8))
    W.close()

    # Check that video is correct size & default output video pixel format
    # is correct
    W = imageio.get_reader(str(tmpf))
    assert "100x100" in W._stderr_catcher.header
    assert "yuv420p" in W._stderr_catcher.header
Exemplo n.º 31
0
def getRawVideo(filename):
    filename, ext = os.path.splitext(filename)
    raw_filename = Path(filename + "_raw" + ext)
    if raw_filename.exists():
        return imageio.get_reader(raw_filename)
    return imageio.get_reader(filename + ext)
Exemplo n.º 32
0
import os
import imageio
from PIL import Image

T = 10
#reader = imageio.get_reader('video.avi','ffmpeg')
reader = imageio.get_reader('mv_test.mp4', 'ffmpeg')
for i, im in enumerate(reader):
    if (i % T == 0):
        x = i / T
        #imageio.imwrite(str(i)+'.png',im[:, :, 1])
        #print('i===',i)
        #print('im===',im)
        imageio.imwrite(
            '/home/aa/Desktop/everybody_dance_now_pytorch/datasets/train/train_B/'
            + '%05d' % x + '.png', im[:, :, :])
# Long-exposure (avg) simulation from video. Usage: vid_long_expose_avg.py source_video target_image
import sys
import numpy as np
import imageio
filename = sys.argv[1]
vid = imageio.get_reader(filename)
#vidout = imageio.get_writer("test.mp4", format=None, mode='I', fps=60)
target = vid.get_data(0).astype('Float32')
total = 0
last_frame = len(vid)
for i, img in enumerate(vid):
    target += img
    total += 1
    #vidout.append_data(target)
    percent = int((i + 1) / last_frame * 100)
    sys.stdout.write("\r[" + ('#' * int(percent / 4)) +
                     ('.' * (25 - int(percent / 4))) +
                     ("] %d" % percent + "%" + " (%d" % i +
                      "/%d" % last_frame + ")"))
    sys.stdout.flush()
target = target / (total * 255)
vid.close()
#vidout.close()
imageio.imwrite(sys.argv[2], target)
        while detections[0, i, j, 0] >= 0.6:
            point = (detections[0, i, j, 1:] * scale).numpy()
            cv.rectangle(frame, (int(point[0]), int(point[1])),
                         (int(point[2]), int(point[3])), (255, 0, 0), 2)
            cv.putText(frame, labelmap[i - 1], (int(point[0]), int(point[1])),
                       cv.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2,
                       cv.LINE_AA)
            j += 1
    return frame


#Creating the SSD neural network
net = build_ssd('test')
net.load_state_dict(
    torch.load('ssd300_mAP_77.43_v2.pth',
               map_location=lambda storage, loc: storage)
)  # We get the weights of the neural network from another one that is pretrained (ssd300_mAP_77.43_v2.pth).

#Creating Transformation
transform = BaseTransform(net.size, (104 / 256.0, 117 / 256.0, 123 / 256.0))

# Doing some Object Detection on a video
reader = imageio.get_reader('one.mp4')
fps = reader.get_meta_data()['fps']
writer = imageio.get_writer('one_output.mp4', fps=fps)
for i, frame in enumerate(reader):
    detect(frame, net.eval(), transform)
    writer.append_data(frame)
    print(i)
writer.close()
def process_video(args):

    device = 'cpu' if args.cpu else 'cuda'
    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                      flip_input=False,
                                      device=device)
    video = imageio.get_reader(args.inp)

    trajectories = []
    previous_frame = None
    fps = video.get_meta_data()['fps']
    commands = []
    try:
        for i, frame in tqdm(enumerate(video)):
            frame_shape = frame.shape
            bboxes = extract_bbox(frame, fa)
            ## For each trajectory check the criterion
            not_valid_trajectories = []
            valid_trajectories = []

            for trajectory in trajectories:
                tube_bbox = trajectory[0]
                intersection = 0
                for bbox in bboxes:
                    intersection = max(
                        intersection,
                        bb_intersection_over_union(tube_bbox, bbox))
                if intersection > args.iou_with_initial:
                    valid_trajectories.append(trajectory)
                else:
                    not_valid_trajectories.append(trajectory)

            commands += compute_bbox_trajectories(not_valid_trajectories, fps,
                                                  frame_shape, args)
            trajectories = valid_trajectories

            ## Assign bbox to trajectories, create new trajectories
            for bbox in bboxes:
                intersection = 0
                current_trajectory = None
                for trajectory in trajectories:
                    tube_bbox = trajectory[0]
                    current_intersection = bb_intersection_over_union(
                        tube_bbox, bbox)
                    if intersection < current_intersection and current_intersection > args.iou_with_initial:
                        intersection = bb_intersection_over_union(
                            tube_bbox, bbox)
                        current_trajectory = trajectory

                ## Create new trajectory
                if current_trajectory is None:
                    trajectories.append([bbox, bbox, i, i])
                else:
                    current_trajectory[3] = i
                    current_trajectory[1] = join(current_trajectory[1], bbox)

    except IndexError as e:
        raise (e)

    commands += compute_bbox_trajectories(trajectories, fps, frame_shape, args)
    return commands
#!/usr/bin/python
# vim: set fileencoding=utf-8 :
############################################
#haochen.wang
#2018.01.23
############################################
import multiprocessing
import time
# import pylab
import imageio
import skimage
from PIL import Image
import os

video_path = [
    'students003.avi', 'crowds_zara01.avi', 'crowds_zara02.avi',
    'Arxiepiskopi_flock.avi'
]
for video in video_path:
    vid = imageio.get_reader(video, 'ffmpeg')
    save_path = video.split('.')[0]
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    for i, img in enumerate(vid):
        opencvImg = skimage.img_as_ubyte(img, True)
        # 下边就回到熟悉的opencv套路上了
        img = Image.fromarray(opencvImg)
        img.save(save_path + '/' + str(i).zfill(6) + '.jpg')
        if i % 1000 == 0:
            print('now its video:' + video + '    frame:' + str(i))
def generate_tfrecord(movie):
    if movie not in MOVIE_SET:
        return
    segments = os.listdir(input_dir+movie)
    segments.sort()
    os.mkdir(output_dir + movie)
    for segment in segments:
        segment_no_ext = segment.split(".")[0]
        output_file = os.path.join(output_dir, movie, "%s.tfrecord" % segment_no_ext)
        input_file = os.path.join(input_dir, movie, segment)
        with tf.python_io.TFRecordWriter(output_file) as writer:
             # Read and resize all video frames, np.uint8 of size [N,H,W,3]
             video = imageio.get_reader(input_file)
             #frame_gen = video.iter_data()
             #frame_list = []
             #for frame in frame_gen:
             #    resized_frame = cv2.resize(frame, (400,400))
             #    frame_list.append(resized_frame)
               
             #frames = np.stack(frame_list, axis=0)
             vidinfo = video.get_meta_data()
             # vidfps = vidinfo['fps']
             # vid_W, vid_H = vidinfo['size']
             no_frames = vidinfo['nframes']-1 # last frames seem to be bugged
 
             f_timesteps, f_H, f_W, f_C = [32, 400, 400, 3]
 
             slope = (no_frames-1) / float(f_timesteps - 1)
             indices = (slope * np.arange(f_timesteps)).astype(np.int64)
 
             frames = np.zeros([f_timesteps, f_H, f_W, f_C], np.uint8)
 
             timestep = 0
             # for vid_idx in range(no_frames):
             for vid_idx in indices.tolist():
                 frame = video.get_data(vid_idx)
                 # frame = frame[:,:,::-1] #opencv reads bgr, i3d trained with rgb
                 reshaped = cv2.resize(frame, (f_W, f_H))
                 frames[timestep, :, :, :] = reshaped
                 timestep += 1
 
             video.close()

             if "%s.%s" % (movie, segment_no_ext) not in ANNOS:
                 continue

             # get labels
             labels_np, rois_np, no_det, segment_key = get_labels_wrapper(movie, segment_no_ext)


           
             features = {}
             features['num_frames']  = _int64_feature(frames.shape[0])
             features['height']      = _int64_feature(frames.shape[1])
             features['width']       = _int64_feature(frames.shape[2])
             features['channels']    = _int64_feature(frames.shape[3])
             #features['class_label'] = _int64_feature(example['class_id'])
             #features['class_text']  = _bytes_feature(tf.compat.as_bytes(example['class_label']))
             #features['filename']    = _bytes_feature(tf.compat.as_bytes(example['video_id']))
             #features['filename']    = _bytes_feature(tf.compat.as_bytes(fname))
             features['movie']    = _bytes_feature(tf.compat.as_bytes(movie))
             features['segment']    = _bytes_feature(tf.compat.as_bytes(segment_no_ext))
           
             # Compress the frames using JPG and store in as a list of strings in 'frames'
             encoded_frames = [tf.compat.as_bytes(cv2.imencode(".jpg", frame)[1].tobytes())
                               for frame in frames]
             features['frames'] = _bytes_list_feature(encoded_frames)
             
             #labels
             features['no_det'] = _int64_feature(no_det)
             features['segment_key'] = _bytes_feature(segment_key)
             features['labels'] = _int64_feature(labels_np.reshape([-1]).tolist())
             features['rois'] = _float_feature(rois_np.reshape([-1]).tolist())
           
             tfrecord_example = tf.train.Example(features=tf.train.Features(feature=features))
             writer.write(tfrecord_example.SerializeToString())
             #tqdm.write("Output file %s written!" % output_file)
             print("Output file %s written!" % output_file)
Exemplo n.º 38
0
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#

import imageio
import os
from os import listdir
from os.path import isfile, join 
from PIL import Image
import PIL   

os.mkdir("clevrer")
os.mkdir("clevrer/train")
mypath = 'video_train/'

outerfolders = [f for f in listdir(mypath)]

for folder_1 in outerfolders:
	new_path = mypath  + folder_1 + "/"
	files = [f for f in listdir(new_path) if isfile(join(new_path, f))]
	for file in files:
	    print(file)
	    path = new_path+file
	    vid = imageio.get_reader(path,  'ffmpeg')
	    new_new_path = "clevrer/train/"+file.split(".")[0].split("_")[1]+"/"
	    os.mkdir(new_new_path)
	    for i, im in enumerate(vid):
	        im = Image.fromarray(im)
	        im = im.save(new_new_path+"/"+str(i)+".png") 
Exemplo n.º 39
0
    imgurFilename = submission.url.split('/').pop().split('.')[0] + '.mp4'
    newUrl = submission.url.split('m/')[0] + 'm/' + imgurFilename
    print('url:',newUrl,'imgurFilename:', imgurFilename)
    # download gif
    if '//i.imgur.com/' in submission.url:

        if '?' in imgurFilename:
            imgurFilename = imgurFilename[:imgurFilename.find('?')]
        localFileName = 'reddit_%s' % imgurFilename
        download_image(newUrl, localFileName)

    revFileName = localFileName.split('.')[0] + '_reversed.mp4'

    # REVERSE GIF & SAVE
    vid = imageio.get_reader(localFileName, 'ffmpeg')
    revVid = imageio.get_writer(revFileName, 'ffmpeg',fps=27,quality=7,macro_block_size=None)
    print('frames:',len(vid))
    percent = '|' # if (len(vid) - i) % 10 == 0: print(len(vid) - i,'/',len(vid))
    for i in range(len(vid)-1,0,-1):
        revVid.append_data(vid.get_data(i))

    vid.close()
    revVid.close()

    # UPLOAD GIF
    url = "https://api.imgur.com/3/upload.json"
    headers = {
        "Authorization": "Bearer " + appconfig.imgur['auth']
    }
Exemplo n.º 40
0
                        # Add bbox to the image
                        label = '%s %.2f' % (self.classes[int(cls)], conf)
                        plot_one_box([x1, y1, x2, y2],
                                     img,
                                     label=label,
                                     color=self.colors[int(cls)])
            if self.debug_show:
                img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                cv2.imshow('show', img)
                cv2.waitKey(1)
        return human_bound_rects


if __name__ == '__main__':

    cam = imageio.get_reader('<video0>', size=(1280, 720), fps=30)

    # cam.set(cv2.CAP_PROP_SETTINGS, 0)

    hd = Yolov3HumanDetector(debug_show=True)

    while True:
        img = cam.get_next_data()
        # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        hd.push_frame(img)
        rt = hd.detect()
        if len(rt) > 0:
            print('found human!')
        else:
            print('no found')
        cv2.waitKey(1000 // 30)
Exemplo n.º 41
0
import imageio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import trackviz.static

# load data
tracks = pd.read_csv('sample_data/ant_tracking_res.csv').rename(
    columns={'frame': 't'})
image = imageio.get_reader('sample_data/ant_dataset.mp4').get_next_data()

# create labels
trackid = tracks['trackid'].unique()
label = np.random.choice(['a', 'b', 'c'], size=len(trackid))
labels = pd.DataFrame(dict(trackid=trackid, label=label))

# plot
fig, ax = trackviz.static.trajectory_2d(tracks,
                                        image=image,
                                        labels=labels,
                                        color='label',
                                        cbar=True,
                                        scale=0.8)

fig.savefig('output/static_2d_color_labels.png')
# plt.show()
Exemplo n.º 42
0
import numpy as np
from videogaze_model import VideoGaze
import cv2
import math
from sklearn import metrics

#Loading the model
model = VideoGaze(bs=batch_size, side=20)
checkpoint = torch.load('model.pth.tar')
model.load_state_dict(checkpoint['state_dict'])
#model.cuda()
cudnn.benchmark = True

#Reading input video
video_name = 'video_test.mp4'
vid = imageio.get_reader(video_name, 'ffmpeg')
fps = vid.get_meta_data()['fps']
frame_list = []
for i, im in enumerate(vid):
    frame_list.append(im)

print('Frame List Created')

#Loading the features for tracking
p_image = face_recognition.load_image_file("face.jpg")
p_encoding = face_recognition.face_encodings(p_image)[0]

trans = transforms.ToTensor()

#Reading video
vid = imageio.get_reader(video_name, 'ffmpeg')
Exemplo n.º 43
0
def permutate(im):
    # Convert the image (256,256,3) to the familiar [1,3,256,256] format
    return torch.tensor(im[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)


image_path = './input/input.jpg'
video_path = './input/bakamitai_template.mp4'
temp_out_path = './temp/baka_mitai_no_sound.mp4'
music_path = './input/bakamitai_sound_clip.mp3'
final_out_path = './output/output.mp4'
cpu = True

source_image = imageio.imread(image_path)
# Should fix memory error by feeding frame by frame opposed to whole video at once
driving_video = imageio.get_reader(video_path)

# Resize image to 256x256
source_image = resize(source_image, (256, 256))[..., :3]

fps = driving_video.get_meta_data()['fps']
ttl = driving_video.get_meta_data()['duration'] * fps
relative = True
print(fps, ttl)

generator, kp_detector = load_checkpoints(
    config_path='config/vox-adv-256.yaml',
    checkpoint_path='./data/checkpoints/vox-adv-cpk.pth.tar',
    cpu=cpu)

# Create Video Writer for output
                        type=int,
                        default=None,
                        help="Set frame to start from.")

    parser.add_argument("--cpu",
                        dest="cpu",
                        action="store_true",
                        help="cpu mode.")

    parser.set_defaults(relative=False)
    parser.set_defaults(adapt_scale=False)

    opt = parser.parse_args()

    source_image = imageio.imread(opt.source_image)
    reader = imageio.get_reader(opt.driving_video)
    fps = reader.get_meta_data()['fps']
    driving_video = []
    try:
        for im in reader:
            driving_video.append(im)
    except RuntimeError:
        pass
    reader.close()

    source_image = resize(source_image, (256, 256))[..., :3]
    driving_video = [
        resize(frame, (256, 256))[..., :3] for frame in driving_video
    ]
    generator, kp_detector = load_checkpoints(config_path=opt.config,
                                              checkpoint_path=opt.checkpoint,
angrypath = '../../workspace/micro-expression/data/angry/'
happypath = '../../workspace/micro-expression/data/happy/'
disgustpath = '../../workspace/micro-expression/data/disgust/'

i, j, k = 0, 0, 0

eye_training_list = []
nose_training_list = []

directorylisting = os.listdir(angrypath)
for video in directorylisting:
    eye_frames = []
    nose_mouth_frames = []
    videopath = angrypath + video
    print(videopath)
    loadedvideo = imageio.get_reader(videopath, 'ffmpeg')
    framerange = [x + 72 for x in range(96)]
    for frame in framerange:
        image = loadedvideo.get_data(frame)
        landmarks = get_landmark(image)
        numpylandmarks = numpy.asarray(landmarks)
        eye_image = image[numpylandmarks[19][1]:numpylandmarks[1][1],
                          numpylandmarks[1][0]:numpylandmarks[15][0]]
        eye_image = cv2.resize(eye_image, (32, 32),
                               interpolation=cv2.INTER_AREA)
        eye_image = cv2.cvtColor(eye_image, cv2.COLOR_BGR2GRAY)
        nose_mouth_image = image[numpylandmarks[2][1]:numpylandmarks[6][1],
                                 numpylandmarks[2][0]:numpylandmarks[14][0]]
        nose_mouth_image = cv2.resize(nose_mouth_image, (32, 32),
                                      interpolation=cv2.INTER_AREA)
        nose_mouth_image = cv2.cvtColor(nose_mouth_image, cv2.COLOR_BGR2GRAY)
    y = net(x)
    detections = y.data
    scale = torch.Tensor([width, height, width, height])
    # detections = [batch, number of classes, number of coincidence, (Score, x0, y0, x1, y1)]
    for i in range(detections.size(1)):
        j = 0
        while detections[0,i,j,0] >=0.6:
            pt = (detections[0,i,j,1:]*scale).numpy()
            cv2.rectangle(frame, (int(pt[0]), int(pt[1])), (int(pt[2]), int(pt[3])), (255,0,0), 2)
            cv2.putText(frame, labelmap[i - 1], (int(pt[0]), int(pt[1])), cv2.FONT_HERSHEY_SIMPLEX, 2, (255,255,255),2,cv2.LINE_AA)
            j += 1
    return frame


            
 # Creating the SSd neural netwrok        
net = build_ssd('test')
net.load_state_dict(torch.load('ssd300_mAP_77.43_v2.pth', map_location = lambda storage, loc:storage))


#Creating th transformation
transform = BaseTransform(net.size, (104/256.0, 117/256.0, 123/256.0))

reader = imageio.get_reader('epic-horses.mp4')
fps = reader.get_meta_data()['fps']
writer = imageio.get_writer('outputhw.mp4', fps = fps)
for i,frame in enumerate(reader):
    frame = detect(frame,net.eval(), transform)
    writer.append_data(frame)
    print(i)
writer.close()
Exemplo n.º 47
0
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
    raise ImportError(
        'Please upgrade your TensorFlow installation to v1.12.*.')

import imageio

reader = imageio.get_reader('video/1.mp4')
fps = reader.get_meta_data()['fps']
writer = imageio.get_writer('video/output1.mp4', fps=fps)

# ## Object detection imports
# Here are the imports from the object detection module.

# In[3]:

from utils import label_map_util

from utils import visualization_utils as vis_util

# # Model preparation

# ## Variables
                '/home/monark/LEARNINGS/Projects/SDC/sdrcc/training_data/' +
                folder)):
        # steer-1.csv --> steering angle (direction), action and timestamp
        data_stack = pd.read_csv(
            '/home/monark/LEARNINGS/Projects/SDC/sdrcc/training_data/' +
            folder + '/steer-1.csv')

        # start-ts-1.pkl --> start time of video recording
        with open(
                '/home/monark/LEARNINGS/Projects/SDC/sdrcc/training_data/' +
                folder + '/start-ts-1.pkl', 'rb') as f:
            start_time = pickle.load(f)

        # load mp4 video
        filename = '/home/monark/LEARNINGS/Projects/SDC/sdrcc/training_data/' + folder + '/video1.mp4'
        vid = imageio.get_reader(filename, 'ffmpeg')

        # extract start and end time for each action
        angle_df = pd.DataFrame(
            columns=['angle', 'start_timestamp', 'end_timestamp'])
        stack = Stack()
        for i in range(len(data_stack)):
            angle, action, timestamp = data_stack.loc[i][
                'angle'], data_stack.loc[i]['action'], data_stack.loc[i][
                    'timestamp']
            if action == 'pressed':
                stack.push([angle, timestamp])
            elif action == 'released':
                start_ts = stack.pop()[1]
                end_ts = timestamp
                angle_df.loc[len(angle_df)] = [angle, start_ts, end_ts]
def align_new_audio_to_video(source_video, target_dialogue, new_video_name, verbose=False, profile_time=False):
    """Dynamic programming reference - "A Maximum Likelihood Stereo Algorithm"
    by Ingemar J. Cox, Sunita L. Hingorani, Satish B. Rao
    (https://pdfs.semanticscholar.org/b232/e3426e0014389ea05132ea8d08789dcc0566.pdf)
    """
    
    if profile_time:
        import time
        times = {}
        start_time = time.time()
    
    # READ SOURCE VIDEO
    if verbose:
        print("Reading source video", source_video)
    video_reader = imageio.get_reader(source_video) 
    video_fps = video_reader.get_meta_data()['fps']
    
    if profile_time:
        source_video_dur = video_reader.get_meta_data()['duration']
        video_read_time = time.time()
        times['00_video_read'] = video_read_time - start_time
    
    # READ SOURCE AUDIO
    # Convert video's audio into a .wav file
    if verbose:
        print("Writing source video's audio as /tmp/audio.wav")
    ret = subprocess.call(['ffmpeg', '-loglevel', 'error', '-i', source_video, '-y', '-codec:a', 'pcm_s16le', '-ac', '1', '/tmp/audio.wav'])
    
    if profile_time:
        source_audio_write_time = time.time()
        times['01_source_audio_write'] = source_audio_write_time - video_read_time
    
    # Read the .wav file
    if verbose:
        print("Reading source video's audio - /tmp/audio.wav")
    source_audio_fs, source_audio = scipy.io.wavfile.read('/tmp/audio.wav')
    if len(source_audio.shape) > 1:
        source_audio = source_audio[:, 0]
    
    if profile_time:
        source_audio_read_time = time.time()
        times['02_source_audio_read'] = source_audio_read_time - source_audio_write_time    
    
    # READ TARGET AUDIO
    # Check file type
    file_type = os.path.splitext(target_dialogue)[-1]
    # If file type is not .wav, convert it to .wav and read that
    if file_type != '.wav':
        if verbose:
            print("Target dialogue not a .wav file! Given:", target_dialogue)
            print("Converting target dialogue file into .wav - /tmp/audio.wav")
        ret = subprocess.call(['ffmpeg', '-loglevel', 'error', '-i', target_dialogue, '-y', '-codec:a', 'pcm_s16le', '-ac', '1', '/tmp/audio.wav'])
        target_dialogue = '/tmp/audio.wav'
    
    # Read the target .wav file
    if verbose:
        print("Reading target audio", target_dialogue)
    target_audio_fs, target_audio = scipy.io.wavfile.read(target_dialogue)
    if len(target_audio.shape) > 1:
        target_audio = target_audio[:, 0]
    
    if profile_time:
        target_audio_dur = len(target_audio) / target_audio_fs
        target_audio_read_time = time.time()
        times['03_target_audio'] = target_audio_read_time - source_audio_read_time
    
    # EXTRACT MFCC FEATURES
    frame_length = 0.025
    frame_stride = 0.010
    num_cepstral = 13
    num_filters = 40
    if verbose:
        print("Converting source and target audio into MFCC features with frame_length", frame_length,
              ", frame_stride", frame_stride, ", num_cepstral", num_cepstral, ", num_filters", num_filters)
    # Extract MFCC features of source audio
    source_audio_mfcc = speechpy.feature.mfcc(source_audio, sampling_frequency=source_audio_fs,
                                              frame_length=frame_length, frame_stride=frame_stride,
                                              num_cepstral=num_cepstral, num_filters=num_filters)
    # Extract MFCC features of target audio
    target_audio_mfcc = speechpy.feature.mfcc(target_audio, sampling_frequency=target_audio_fs,
                                              frame_length=frame_length, frame_stride=frame_stride,
                                              num_cepstral=num_cepstral, num_filters=num_filters)
    
    if profile_time:
        mfcc_extract_time = time.time()
        times['04_MFCC_extract'] = mfcc_extract_time - target_audio_read_time
    
    # DO DYNAMIC PROGRAMMING BETWEEN THE SOURCE AND TARGET AUDIO MFCC FRAMES
    if verbose:
        print("Doing dynamic programming between source and target audio")
    mapped_target_audio_frames_of_source_audio_frames, \
        mapped_source_audio_frames_of_target_audio_frames = dynamic_programming(source_audio_mfcc, target_audio_mfcc)
    
    if profile_time:
        dp_time = time.time()
        times['05_dynamic_programming'] = dp_time - mfcc_extract_time
    
    # CONVERT AUDIO MAPPING TO VIDEO MAPPING, i.e. mapped_source_video_frames_of_target_video_frames
    if verbose:
        print("Converting mapped_source_audio_frames_of_target_audio_frames into mapped_source_video_frames_of_target_video_frames")
    # Get source videos frames of the target audio frames
    mapped_source_video_frames_of_target_audio_frames = mapped_source_audio_frames_of_target_audio_frames * frame_stride * video_fps
    # Calculate the number of target video frames (from the number of audio frames and fps)
    num_of_target_video_frames = round( len(target_audio_mfcc) * frame_stride * video_fps )
    # Make a linear mapping from the target audio frames to target video frames
    target_audio_frames_idx_of_target_video_frames = np.round(np.linspace(0,
                                                                          len(target_audio_mfcc)-1,
                                                                          num_of_target_video_frames)).astype(int)
    # Select the source video frames corresponding to each target video frame
    mapped_source_video_frames_of_target_video_frames = np.floor(mapped_source_video_frames_of_target_audio_frames[target_audio_frames_idx_of_target_video_frames]).astype(int)
    
    if profile_time:
        convert_audio_map_to_video_map_time = time.time()
        times['06_audio_map_to_video_map'] = convert_audio_map_to_video_map_time - dp_time
    
    # MAKE NEW VIDEO
    
    if verbose:
        print("Making new video", new_video_name)
    
    # Read video
    source_frames = []
    for frame in video_reader:
        source_frames.append(frame)
    
    if profile_time:
        read_source_video_frames_time = time.time()
        times['07_read_source_video_frames'] = read_source_video_frames_time - convert_audio_map_to_video_map_time
    
    # Note new frames
    new_frames = []
    for source_frame_number in mapped_source_video_frames_of_target_video_frames:
        new_frames.append(source_frames[int(source_frame_number)])
    
    # Save new video
    if os.path.splitext(new_video_name)[-1] != '.mp4':
        new_video_name += '.mp4'
        if verbose:
            print("new_video_name not mp4! Modified to", new_video_name)
    
    if verbose:
        print("Writing mp4 of new video frames /tmp/video.mp4")
    imageio.mimwrite('/tmp/video.mp4', new_frames, fps=video_fps)
     
    if profile_time:
        save_new_frames_time = time.time()
        times['08_save_new_frames'] = save_new_frames_time - read_source_video_frames_time
    
    if verbose:
        print("Writing new video with source_video frames and target dialogue", new_video_name)
    command = ['ffmpeg', '-loglevel', 'error',
               '-i', '/tmp/video.mp4', '-i', target_dialogue, '-y',
               '-vcodec', 'libx264', '-preset', 'ultrafast', '-profile:v', 'main',
               '-acodec', 'aac', '-strict', '-2',
               new_video_name]
    ret = subprocess.call(command)
    
    if verbose:
        print("Done!")
    
    if profile_time:
        new_video_write_time = time.time()
        times['09_new_video_write'] = new_video_write_time - save_new_frames_time
        print("Source video duration:", source_video_dur, "seconds")
        print("Target audio duration:", target_audio_dur, "seconds")
        for key in sorted(times.keys()):
            print("{0:30s}: {1:.02f} seconds".format(key, times[key]))
Exemplo n.º 50
0
def extract_features(video_file):
    print(video_file)
    video_name = os.path.splitext(video_file)[0]
    if args.structure == 'tsn':  # create the video folder if the data structure is TSN
        if not os.path.isdir(path_output + video_name + '/'):
            os.makedirs(path_output + video_name + '/')

    num_exist_files = len(os.listdir(path_output + video_name + '/'))

    frames_tensor = []
    # print(class_name)
    if args.input_type == 'video':
        reader = imageio.get_reader(path_input + class_name + '/' + video_file)

        #--- collect list of frame tensors
        try:
            for t, im in enumerate(reader):
                if np.sum(im.shape) != 0:
                    id_frame = t + 1
                    frames_tensor.append(
                        im2tensor(im))  # include data pre-processing
        except RuntimeError:
            print(Back.RED + 'Could not read frame', id_frame + 1, 'from',
                  video_file)
    elif args.input_type == 'frames':
        list_frames = os.listdir(path_input + class_name + '/' + video_file)
        list_frames.sort()

        # --- collect list of frame tensors
        try:
            for t in range(len(list_frames)):
                im = imageio.imread(path_input + class_name + '/' +
                                    video_file + '/' + list_frames[t])
                if np.sum(im.shape) != 0:
                    id_frame = t + 1
                    frames_tensor.append(
                        im2tensor(im))  # include data pre-processing
        except RuntimeError:
            print(Back.RED + 'Could not read frame', id_frame + 1, 'from',
                  video_file)

    #--- divide the list into two parts: major (can de divided by batch size) & the rest (will add dummy tensors)
    num_frames = len(frames_tensor)
    if num_frames == num_exist_files:  # skip if the features are already saved
        return

    num_major = num_frames // args.batch_size * args.batch_size
    num_rest = num_frames - num_major

    # add dummy tensor to make total size == batch_size*N
    num_dummy = args.batch_size - num_rest
    for i in range(num_dummy):
        frames_tensor.append(torch.zeros_like(frames_tensor[0]))

    #--- extract video features
    features = torch.Tensor()

    for t in range(0, num_frames + num_dummy, args.batch_size):
        frames_batch = frames_tensor[t:t + args.batch_size]
        features_batch = extract_frame_feature_batch(frames_batch)
        features = torch.cat((features, features_batch))

    features = features[:num_frames]  # remove the dummy part

    #--- save the frame-level feature vectors to files
    for t in range(features.size(0)):
        id_frame = t + 1
        id_frame_name = str(id_frame).zfill(5)
        if args.structure == 'tsn':
            filename = path_output + video_name + '/' + 'img_' + id_frame_name + feature_in_type
        elif args.structure == 'imagenet':
            filename = path_output + class_name + '/' + video_name + '_' + id_frame_name + feature_in_type
        else:
            raise NameError(Back.RED + 'not valid data structure')

        if not os.path.exists(filename):
            torch.save(
                features[t].clone(), filename
            )  # if no clone(), the size of features[t] will be the same as features
Exemplo n.º 51
0
	points_found_1 = computeCornerResponse(img_1,MIN_RADIUS_POINTS,NUM_POINTS_TO_FIND,HARRIS_TRESHOLD_RELATIVE)
	descriptors_1 = extractDescriptors(img_1,points_found_1,DESCRIPTOR_SHAPE_XY)

	points_found_2 = computeCornerResponse(img_2,MIN_RADIUS_POINTS,NUM_POINTS_TO_FIND,HARRIS_TRESHOLD_RELATIVE)
	descriptors_2 = extractDescriptors(img_2,points_found_2,DESCRIPTOR_SHAPE_XY)

	points_1, points_2, match_score = defineCorrespondences(descriptors_1,points_found_1,descriptors_2,points_found_2,RUSSIAN_GRANDMA_TRESHOLD)

	return match_score


if __name__ == '__main__':


	target_filename = 'tar.mp4'
	target_video_reader = imageio.get_reader(target_filename,  'mp4')
	target_fps = target_video_reader.get_meta_data()['fps']
	tar_num_frames = target_video_reader.get_meta_data()['nframes']
	print target_video_reader.get_meta_data()

	target_frames,target_indexes = getLowResolution(target_video_reader,INIT_TAR_FRAME,END_TAR_FRAME,0.1)

	saveFrames("out_low.mp4",target_frames,target_fps)
	
	self_probabilities, self_N = getProbabilities(target_frames,target_indexes, target_video_reader, target_fps)

	figure, axis = plt.subplots(ncols=1)
	axis.imshow(trip(self_probabilities/np.max(self_probabilities)), vmin=0, vmax=1, interpolation="nearest")
	plt.show(block=True)

	true_tar_frames = target_frames[self_N:len(target_frames)-self_N-1]
    for i in range(detections.size(1)):  #ilterate through classes
        j = 0
        while detections[0, i, j, 0] >= 0.6:
            pt = (detections[0, i, j, 1:] * scale).numpy()
            cv2.rectangle(frame, (int(pt[0]), int(pt[1])),
                          (int(pt[2]), int(pt[3])), (255, 0, 0), 2)
            cv2.putText(frame, labelmap[i - 1], (int(pt[0]), int(pt[1])),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2,
                        cv2.LINE_AA)
            j += 1
    return frame


# Creating the SSD neural network
net = build_ssd('test')
net.load_state_dict(
    torch.load('ssd300_mAP_77.43_v2.pth',
               map_location=lambda storage, loc: storage))

# Creating the transformation
transform = BaseTransform(net.size, (104 / 256.0, 117 / 256.0, 123 / 256.0))

# Doing some Object Detection on a video
reader = imageio.get_reader('funny_dog.mp4')
fps = reader.get_meta_data()['fps']
writer = imageio.get_writer('output.mp4', fps=fps)
for i, frame in enumerate(reader):
    frame = detect(frame, net.eval(), transform)
    writer.append_data(frame)
    print(i)
writer.close()
Exemplo n.º 53
0
video_list = {}
video_path = os.path.dirname(os.path.abspath(__file__)) + "/animations/"

video_list_name = [
    'idle', 'go_left', 'back_left', 'idle_left', 'go_right', 'back_right',
    'idle_right', 'go_left', 'back_left', 'idle_left', 'go_attentive',
    'back_attentive', 'idle_attentive', 'go_laugh', 'back_laugh', 'idle_laugh',
    'go_angry', 'back_angry', 'idle_angry', 'go_sad', 'back_sad', 'idle_sad',
    'go_confused', 'back_confused', 'idle_confused'
]

for video_name in video_list_name:

    try:
        video_mp4 = imageio.get_reader(video_path + video_name + ".gif")
        video_list.update({video_name: video_mp4})

    except FileNotFoundError:
        print("Error: Wrong path, cannot find " + video_name + ".gif")
        sys.exit(1)

# --------------------- Functions and variables for ROS logic  ----------------------- #

newRosCommandFlag = False
newRosCommand = ""
last_expression = "IDLE"

#Define the expressions accepted through rosservice
expressions_list = [
    'IDLE', 'LOOK_LEFT', 'LOOK_RIGHT', 'HAPPY', 'ANGRY', 'SAD', 'ATTENTIVE',
Exemplo n.º 54
0
def GenerateVideo():
    globs = globals()
    disable_widget_names = [
        "source_preview", "driving_preview", "generate_preview",
        'source_button', 'driving_button', 'saveto_button'
    ]
    for widget in disable_widget_names:
        globs[widget].configure(state='disabled')
    generate_button = globs['generate_button']
    generate_button.grid_remove()
    progress_var = globs['progress_var']
    progress_bar = globs['progress_bar']
    progress_bar.grid()
    progress_label = globs['progress_label']
    progress_label.grid()

    source_address, driving_address, generated_address = globs[
        "source_address"], globs["driving_address"], globs["generated_address"]
    #Load source image and driving video
    source_image = imageio.imread(source_address)
    reader = imageio.get_reader(driving_address)

    #Resize image and video to 256x256
    source_image = resize(source_image, (256, 256))[..., :3]

    fps = reader.get_meta_data()['fps']
    driving_video = []
    try:
        for im in reader:
            driving_video.append(im)
    except RuntimeError:
        pass
    reader.close()

    driving_video = [
        resize(frame, (256, 256))[..., :3] for frame in driving_video
    ]

    predictions = make_animation(source_image,
                                 driving_video,
                                 generator,
                                 kp_detector,
                                 relative=True,
                                 adapt_movement_scale=True,
                                 cpu=True,
                                 progress_var=progress_var,
                                 progress_label=progress_label)
    # relative and adapt_movement_scale can be changed to obtain different results

    # Save resulting video
    imageio.mimsave(generated_address,
                    [img_as_ubyte(frame) for frame in predictions],
                    fps=fps)

    # View generated video
    message = messagebox.askquestion(title="Generation successfull.",
                                     message="Video at:\n" +
                                     generated_address +
                                     "\n\nView generated video?",
                                     icon="question")
    if 'yes' in message:
        os.startfile(generated_address)

    globs = globals()
    for widget in disable_widget_names:
        globs[widget].configure(state='normal')
    generate_button.grid()
    progress_var.set(0)
    progress_bar.grid_remove()
    progress_label.grid_remove()
Exemplo n.º 55
0
                                          minNeighbors=5)

    for (x, y, w, h) in faces:
        roi_gray = gray[y:y + h, x:x + w]

        id_, conf = recognizer.predict(roi_gray)
        # videodan anladığım kadarıyla condifence değeri için kesin bir şey bulamadığından bahsediyor ve bu değerleri kullanıyor
        # bu değeri 60dan büyük yaptım bu sefer daha iyi buldu ama şuan mantığını tam anlayamadım
        if conf >= 45 and conf <= 85:
            # eğer yüz tanırsa isim resme yazdırılıyor
            font = cv2.FONT_HERSHEY_SIMPLEX
            name = labels[id_]
            color = (255, 255, 255)
            stroke = 2
            cv2.putText(frame, name, (x, y - 10), font, 1, color, stroke,
                        cv2.LINE_AA)
            color = (0, 0, 255)
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
    return frame


# videodan okunup yeniden yazdırılıyor
reader = imageio.get_reader('deneme.avi')
fps = reader.get_meta_data()['fps']
writer = imageio.get_writer('output.avi', fps=fps)
for i, frame in enumerate(reader):
    frame = detect(frame)
    writer.append_data(frame)
    print(i)
writer.close()
Exemplo n.º 56
0
import matplotlib.animation as animation
from skimage.transform import resize
import warnings
import sys

warnings.filterwarnings("ignore")

imagename = sys.argv[1]
#print(imagename)
videoname = sys.argv[2]

# Put image name here
source_image = imageio.imread(imagename)

# Put video name here
reader = imageio.get_reader(videoname)

#Resize image and video to 256x256

source_image = resize(source_image, (256, 256))[..., :3]

fps = reader.get_meta_data()['fps']
driving_video = []
try:
    for im in reader:
        driving_video.append(im)
except RuntimeError:
    pass
reader.close()

driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]
interval=10


path='./UCF-101/'
file_names = os.listdir(path)
print(file_names)
f_feature=open('ucf_features'+str(interval)+'.txt','w')
f_imgs=open('ucf_imgs'+str(interval)+'.txt','w')
num=0
for file_name in file_names:
    new_path=path+file_name+'/'
    for v_name in os.listdir(new_path):
        print(num)
        num+=1
        v_path=new_path+v_name
        video= imageio.get_reader(v_path)
        for im in enumerate(video):
            if im[0]==5:
                img5=Image.fromarray(np.uint8(im[1]))
            if im[0]==(5+interval):
                img30=Image.fromarray(np.uint8(im[1]))
                feature30=layer1(tran(img30).unsqueeze(0))
                feature30=feature30.detach().numpy()
                feature30.squeeze(0)
                img5.save('./ucf_imgs'+str(interval)+'/a'+str(num)+'.jpg')
                f_imgs.write('/ucf_imgs'+str(interval)+'/a'+str(num)+'.jpg\n')
                np.save('./ucf_f'+str(interval)+'/a'+str(num)+'.npy',feature30)
                f_feature.write('/ucf_f'+str(interval)+'/a'+str(num)+'.npy\n')
            if im[0]==10:
                img10=Image.fromarray(np.uint8(im[1]))
            if im[0]==(10+interval):
Exemplo n.º 58
0
        def on_refresh():
            while layout.count() > 1:
                layout.takeAt(1).widget().deleteLater()

            camera_readers = {}
            for device in get_all_camera_devices():
                try:
                    camera_readers[device] = imageio.get_reader(
                        "<{}>".format(os.path.basename(device)))
                except:
                    pass

            serial_readers = {}
            for port in get_all_serial_ports():
                try:
                    baudrate = int(subprocess.check_output(
                        "stty < {}".format(port), shell=True).split()[1])
                    serial_readers[port] = serial.Serial(
                        port, baudrate=baudrate, timeout=1)
                except:
                    continue

            time.sleep(1)

            layout.addRow(create_toolbox_header("Camera"))

            for device, reader in camera_readers.items():
                try:
                    image = reader.get_next_data()
                except:
                    continue
                finally:
                    reader.close()
                image = QImage(
                    image.tobytes(), image.shape[1], image.shape[0],
                    QImage.Format_RGB888)
                image = image.scaledToHeight(100)
                pixmap = QPixmap(image)

                canvas = QLabel()
                layout.addRow(create_expanding_label(device), canvas)
                canvas.setFixedHeight(100)
                canvas.setPixmap(pixmap)

            layout.addRow(create_toolbox_header("ROS"))

            for topic in get_all_ros_topics():
                try:
                    message = subprocess.check_output(
                        ["rostopic", "echo", "-n", "1"])[:20]
                except:
                    continue

                output = QLabel()
                layout.addRow(output)
                output.setText(line)

            layout.addRow(create_toolbox_header("Serial"))

            for port, reader in serial_readers.items():
                try:
                    line = reader.readline().decode("UTF-8")[:20]
                except:
                    continue
                finally:
                    reader.close()

                output = QLabel()
                layout.addRow(port, output)
                output.setText(line)
    scale = torch.Tensor([width, height, width, height])

    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j, 0] >= 0.7:
            pt = (detections[0, i, j, 1:] * scale).numpy()
            cv2.rectangle(frame, (int(pt[0]), int(pt[1])),
                          (int(pt[2]), int(pt[3])), (255, 0, 0), 2)
            cv2.putText(frame, labelmap[i - 1], (int(pt[0]), int(pt[1])),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2,
                        cv2.LINE_AA)
            j = j + 1
    return frame


net = build_ssd('test')
net.load_state_dict(
    torch.load('ssd300_mAP_77.43_v2.pth',
               map_location=lambda storage, loc: storage))

transform = BaseTransform(net.size, (104 / 256.0, 117 / 256.0, 123 / 256.0))

reader = imageio.get_reader('video')
fps = reader.get_meta_data()['fps']
writer = imageio.get_writer('output.mp4', fps=fps)
for i, frame in enumerate(reader):
    frame = detect(frame, net.eval(), transform)
    writer.append_data(frame)
    print(i)
writer.close()
def create_tf_example(segment_key):
    movie, segment_no_ext = segment_key.split('.')
    segment = '%s.mp4' % segment_no_ext
    input_file = os.path.join(input_dir, movie, segment)
    # Read and resize all video frames, np.uint8 of size [N,H,W,3]
    video = imageio.get_reader(input_file)
    #frame_gen = video.iter_data()
    #frame_list = []
    #for frame in frame_gen:
    #    resized_frame = cv2.resize(frame, (400,400))
    #    frame_list.append(resized_frame)
      
    #frames = np.stack(frame_list, axis=0)
    vidinfo = video.get_meta_data()
    # vidfps = vidinfo['fps']
    # vid_W, vid_H = vidinfo['size']
    no_frames = vidinfo['nframes']-1 # last frames seem to be bugged
 
    f_timesteps, f_H, f_W, f_C = [32, 400, 400, 3]
 
    slope = (no_frames-1) / float(f_timesteps - 1)
    indices = (slope * np.arange(f_timesteps)).astype(np.int64)
 
    frames = np.zeros([f_timesteps, f_H, f_W, f_C], np.uint8)
 
    timestep = 0
    # for vid_idx in range(no_frames):
    for vid_idx in indices.tolist():
        frame = video.get_data(vid_idx)
        # frame = frame[:,:,::-1] #opencv reads bgr, i3d trained with rgb
        reshaped = cv2.resize(frame, (f_W, f_H))
        frames[timestep, :, :, :] = reshaped
        timestep += 1
 
    video.close()

    if "%s.%s" % (movie, segment_no_ext) not in ANNOS:
        return None

    # get labels
    labels_np, rois_np, no_det, segment_key = get_labels_wrapper(movie, segment_no_ext)


    
    features = {}
    features['num_frames']  = _int64_feature(frames.shape[0])
    features['height']      = _int64_feature(frames.shape[1])
    features['width']       = _int64_feature(frames.shape[2])
    features['channels']    = _int64_feature(frames.shape[3])
    #features['class_label'] = _int64_feature(example['class_id'])
    #features['class_text']  = _bytes_feature(tf.compat.as_bytes(example['class_label']))
    #features['filename']    = _bytes_feature(tf.compat.as_bytes(example['video_id']))
    #features['filename']    = _bytes_feature(tf.compat.as_bytes(fname))
    features['movie']    = _bytes_feature(tf.compat.as_bytes(movie))
    features['segment']    = _bytes_feature(tf.compat.as_bytes(str(segment_no_ext)))
    
    ## Compress the frames using JPG and store in as a list of strings in 'frames'
    encoded_frames = [tf.compat.as_bytes(cv2.imencode(".jpg", frame)[1].tobytes())
                      for frame in frames]
    #encoded_frames = [ frame.tobytes() for frame in frames]
    features['frames'] = _bytes_list_feature(encoded_frames)
    
    #labels
    features['no_det'] = _int64_feature(no_det)
    features['segment_key'] = _bytes_feature(str(segment_key))
    features['labels'] = _int64_feature(labels_np.reshape([-1]).tolist())
    features['rois'] = _float_feature(rois_np.reshape([-1]).tolist())
    
    tfrecord_example = tf.train.Example(features=tf.train.Features(feature=features))
    return tfrecord_example