Exemplo n.º 1
0
def test_spectrogram_image(spark: SparkSession):
    """Test generate spectrogram image
    from YouTubeVideo/VideoStream videos types."""
    videostream_df = spark.createDataFrame(
        [
            (VideoStream(uri=os.path.abspath(
                os.path.join(
                    os.path.dirname(__file__),
                    "..",
                    "assets",
                    "big_buck_bunny_short.mp4",
                ))), ),
        ],
        ["video"],
    )
    youtube_df = spark.createDataFrame(
        [
            (YouTubeVideo(vid="rUWxSEwctFU"), ),
        ],
        ["video"],
    )
    videostream_df = videostream_df.withColumn(
        "spectrogram",
        spectrogram_image(col("video")),
    )
    youtube_df = youtube_df.withColumn(
        "spectrogram",
        spectrogram_image(col("video")),
    )
    videostream_sample = videostream_df.first()["spectrogram"]
    youtube_sample = youtube_df.first()["spectrogram"]

    assert type(videostream_sample) == Image
    assert type(youtube_sample) == Image
Exemplo n.º 2
0
def test_spectrogram_image(
    spark: SparkSession, tmp_path: Path, asset_path: Path
):
    """Test generate spectrogram image
    from YouTubeVideo/VideoStream videos types."""
    video = VideoStream(str(asset_path / "big_buck_bunny_short.mp4"))
    s1 = (
        spark.createDataFrame([(video,)], ["video"])
        .withColumn(
            "spectrogram",
            spectrogram_image(col("video"), lit(str(tmp_path / "s1.jpg"))),
        )
        .first()["spectrogram"]
    )
    assert type(s1) == Image

    yt = YouTubeVideo(vid="rUWxSEwctFU")
    s2 = (
        spark.createDataFrame([(yt,)], ["video"])
        .withColumn(
            "spectrogram",
            spectrogram_image(col("video"), lit(str(tmp_path / "s2.jpg"))),
        )
        .first()["spectrogram"]
    )
    assert type(s2) == Image
Exemplo n.º 3
0
def test_video_to_images(
    spark: SparkSession, tmp_path: Path, asset_path: Path
):
    """Test extract video frames from YouTubeVideo/VideoStream types
    into list of Image assets.
    """
    sample_rate = 2
    max_samples = 10
    video = VideoStream(str(asset_path / "big_buck_bunny_short.mp4"))
    df1 = spark.createDataFrame(
        [(video, Segment(0, 20))], ["video", "segment"]
    )
    output_dir = tmp_path / "videostream_test"
    output_dir.mkdir(parents=True)
    df1 = df1.withColumn(
        "images",
        video_to_images(
            col("video"),
            lit(str(output_dir)),
            col("segment"),
            lit(sample_rate),
            lit(max_samples),
        ),
    )

    df2 = spark.createDataFrame(
        [(YouTubeVideo(vid="rUWxSEwctFU"), Segment(0, 20))],
        ["video", "segment"],
    )
    output_dir = tmp_path / "youtube_test"
    output_dir.mkdir(parents=True)
    df2 = df2.withColumn(
        "images",
        video_to_images(
            col("video"),
            lit(str(output_dir)),
            col("segment"),
            lit(sample_rate),
            lit(max_samples),
        ),
    )

    videostream_sample = df1.first()["images"]
    youtube_sample = df2.first()["images"]

    assert (
        type(videostream_sample) == list
        and type(videostream_sample[0]) == Image
        and len(videostream_sample) == max_samples
    )
    assert (
        type(youtube_sample) == list
        and type(youtube_sample[0]) == Image
        and len(youtube_sample) == max_samples
    )
Exemplo n.º 4
0
def test_video_to_images(spark: SparkSession):
    """Test extract video frames from YouTubeVideo/VideoStream types
    into list of Image assets.
    """
    sample_rate = 2
    max_samples = 10
    videostream_df = spark.createDataFrame(
        [
            (
                VideoStream(uri=os.path.abspath(
                    os.path.join(
                        os.path.dirname(__file__),
                        "..",
                        "assets",
                        "big_buck_bunny_short.mp4",
                    ))),
                Segment(0, 20),
            ),
        ],
        ["video", "segment"],
    )
    youtube_df = spark.createDataFrame(
        [
            (YouTubeVideo(vid="rUWxSEwctFU"), Segment(0, 20)),
        ],
        ["video", "segment"],
    )
    videostream_df = videostream_df.withColumn(
        "images",
        video_to_images(col("video"), col("segment"), lit(sample_rate),
                        lit(max_samples)),
    )
    youtube_df = youtube_df.withColumn(
        "images",
        video_to_images(col("video"), col("segment"), lit(sample_rate),
                        lit(max_samples)),
    )

    videostream_sample = videostream_df.first()["images"]
    youtube_sample = youtube_df.first()["images"]

    assert (type(videostream_sample) == list
            and type(videostream_sample[0]) == Image
            and len(videostream_sample) == max_samples)
    assert (type(youtube_sample) == list and type(youtube_sample[0]) == Image
            and len(youtube_sample) == max_samples)
Exemplo n.º 5
0
def test_scene_detect(spark: SparkSession, asset_path: Path):
    video = VideoStream(str(asset_path / "big_buck_bunny_short.mp4"))
    df = spark.createDataFrame([(video, )], ["video"])
    result = [
        r.asDict(True) for r in df.withColumn("scenes", scene_detect(
            "video")).first()["scenes"]
    ]
    expected = [{
        "start": {
            "frame_num": 0,
            "frame_pos_sec": 0.0
        },
        "end": {
            "frame_num": 300,
            "frame_pos_sec": 10.010000228881836
        },
    }]
    for rs, xp in zip(result, expected):
        pdt.assert_frame_equal(pd.DataFrame(rs), pd.DataFrame(xp))
Exemplo n.º 6
0
def test_video_metadata(spark: SparkSession, asset_path: Path):
    video = VideoStream(str(asset_path / "big_buck_bunny_short.mp4"))
    result = (spark.createDataFrame([(video, )], ["video"]).select(
        video_metadata(col("video")).alias("meta")).first()["meta"].asDict())
    expected = {
        "width": 640,
        "height": 360,
        "num_frames": 300,
        "duration": 10.010000228881836,
        "bit_rate": 415543,
        "frame_rate": 30,
        "codec": "h264",
        "size": 736613,
        "_errors": None,
    }
    pdt.assert_series_equal(pd.Series(result), pd.Series(expected))

    video = "bad_uri"
    result = (spark.createDataFrame([(video, )], ["video"]).select(
        video_metadata(col("video")).alias("meta")).first()["meta"].asDict())
    err = result["_errors"].asDict()
    assert err["message"].startswith("ffprobe error")
    assert "bad_uri: No such file or directory" in err["stderr"]
Exemplo n.º 7
0
def test_videostream(spark, tmpdir):
    df = spark.createDataFrame(
        [Row(VideoStream("uri1")),
         Row(VideoStream("uri2"))])
    _check_roundtrip(spark, df, tmpdir)
Exemplo n.º 8
0
    def deserialize(self, datum) -> "VideoStream":
        from rikai.types import VideoStream  # pylint: disable=import-outside-toplevel

        return VideoStream(datum[0])
Exemplo n.º 9
0
 def test_videostream(self):
     df = self.spark.createDataFrame(
         [Row(VideoStream("uri1")), Row(VideoStream("uri2"))]
     )
     self._check_roundtrip(df)