def test_spectrogram_image(spark: SparkSession): """Test generate spectrogram image from YouTubeVideo/VideoStream videos types.""" videostream_df = spark.createDataFrame( [ (VideoStream(uri=os.path.abspath( os.path.join( os.path.dirname(__file__), "..", "assets", "big_buck_bunny_short.mp4", ))), ), ], ["video"], ) youtube_df = spark.createDataFrame( [ (YouTubeVideo(vid="rUWxSEwctFU"), ), ], ["video"], ) videostream_df = videostream_df.withColumn( "spectrogram", spectrogram_image(col("video")), ) youtube_df = youtube_df.withColumn( "spectrogram", spectrogram_image(col("video")), ) videostream_sample = videostream_df.first()["spectrogram"] youtube_sample = youtube_df.first()["spectrogram"] assert type(videostream_sample) == Image assert type(youtube_sample) == Image
def test_spectrogram_image( spark: SparkSession, tmp_path: Path, asset_path: Path ): """Test generate spectrogram image from YouTubeVideo/VideoStream videos types.""" video = VideoStream(str(asset_path / "big_buck_bunny_short.mp4")) s1 = ( spark.createDataFrame([(video,)], ["video"]) .withColumn( "spectrogram", spectrogram_image(col("video"), lit(str(tmp_path / "s1.jpg"))), ) .first()["spectrogram"] ) assert type(s1) == Image yt = YouTubeVideo(vid="rUWxSEwctFU") s2 = ( spark.createDataFrame([(yt,)], ["video"]) .withColumn( "spectrogram", spectrogram_image(col("video"), lit(str(tmp_path / "s2.jpg"))), ) .first()["spectrogram"] ) assert type(s2) == Image
def test_video_to_images( spark: SparkSession, tmp_path: Path, asset_path: Path ): """Test extract video frames from YouTubeVideo/VideoStream types into list of Image assets. """ sample_rate = 2 max_samples = 10 video = VideoStream(str(asset_path / "big_buck_bunny_short.mp4")) df1 = spark.createDataFrame( [(video, Segment(0, 20))], ["video", "segment"] ) output_dir = tmp_path / "videostream_test" output_dir.mkdir(parents=True) df1 = df1.withColumn( "images", video_to_images( col("video"), lit(str(output_dir)), col("segment"), lit(sample_rate), lit(max_samples), ), ) df2 = spark.createDataFrame( [(YouTubeVideo(vid="rUWxSEwctFU"), Segment(0, 20))], ["video", "segment"], ) output_dir = tmp_path / "youtube_test" output_dir.mkdir(parents=True) df2 = df2.withColumn( "images", video_to_images( col("video"), lit(str(output_dir)), col("segment"), lit(sample_rate), lit(max_samples), ), ) videostream_sample = df1.first()["images"] youtube_sample = df2.first()["images"] assert ( type(videostream_sample) == list and type(videostream_sample[0]) == Image and len(videostream_sample) == max_samples ) assert ( type(youtube_sample) == list and type(youtube_sample[0]) == Image and len(youtube_sample) == max_samples )
def test_video_to_images(spark: SparkSession): """Test extract video frames from YouTubeVideo/VideoStream types into list of Image assets. """ sample_rate = 2 max_samples = 10 videostream_df = spark.createDataFrame( [ ( VideoStream(uri=os.path.abspath( os.path.join( os.path.dirname(__file__), "..", "assets", "big_buck_bunny_short.mp4", ))), Segment(0, 20), ), ], ["video", "segment"], ) youtube_df = spark.createDataFrame( [ (YouTubeVideo(vid="rUWxSEwctFU"), Segment(0, 20)), ], ["video", "segment"], ) videostream_df = videostream_df.withColumn( "images", video_to_images(col("video"), col("segment"), lit(sample_rate), lit(max_samples)), ) youtube_df = youtube_df.withColumn( "images", video_to_images(col("video"), col("segment"), lit(sample_rate), lit(max_samples)), ) videostream_sample = videostream_df.first()["images"] youtube_sample = youtube_df.first()["images"] assert (type(videostream_sample) == list and type(videostream_sample[0]) == Image and len(videostream_sample) == max_samples) assert (type(youtube_sample) == list and type(youtube_sample[0]) == Image and len(youtube_sample) == max_samples)
def test_scene_detect(spark: SparkSession, asset_path: Path): video = VideoStream(str(asset_path / "big_buck_bunny_short.mp4")) df = spark.createDataFrame([(video, )], ["video"]) result = [ r.asDict(True) for r in df.withColumn("scenes", scene_detect( "video")).first()["scenes"] ] expected = [{ "start": { "frame_num": 0, "frame_pos_sec": 0.0 }, "end": { "frame_num": 300, "frame_pos_sec": 10.010000228881836 }, }] for rs, xp in zip(result, expected): pdt.assert_frame_equal(pd.DataFrame(rs), pd.DataFrame(xp))
def test_video_metadata(spark: SparkSession, asset_path: Path): video = VideoStream(str(asset_path / "big_buck_bunny_short.mp4")) result = (spark.createDataFrame([(video, )], ["video"]).select( video_metadata(col("video")).alias("meta")).first()["meta"].asDict()) expected = { "width": 640, "height": 360, "num_frames": 300, "duration": 10.010000228881836, "bit_rate": 415543, "frame_rate": 30, "codec": "h264", "size": 736613, "_errors": None, } pdt.assert_series_equal(pd.Series(result), pd.Series(expected)) video = "bad_uri" result = (spark.createDataFrame([(video, )], ["video"]).select( video_metadata(col("video")).alias("meta")).first()["meta"].asDict()) err = result["_errors"].asDict() assert err["message"].startswith("ffprobe error") assert "bad_uri: No such file or directory" in err["stderr"]
def test_videostream(spark, tmpdir): df = spark.createDataFrame( [Row(VideoStream("uri1")), Row(VideoStream("uri2"))]) _check_roundtrip(spark, df, tmpdir)
def deserialize(self, datum) -> "VideoStream": from rikai.types import VideoStream # pylint: disable=import-outside-toplevel return VideoStream(datum[0])
def test_videostream(self): df = self.spark.createDataFrame( [Row(VideoStream("uri1")), Row(VideoStream("uri2"))] ) self._check_roundtrip(df)