def test_doesnt_have_subtitles(ucache: URLCache) -> None: summ_resp = ucache.get(youtube_without_cc) # shouldnt match, is the 'corrected' preprocessed URL assert summ_resp.url != youtube_without_cc # make sure this parsed the youtube id assert "xvQUiX26RfE" == get_yt_video_id(youtube_without_cc) assert summ_resp.data is not None # deleted for youtube by the site-specific extractor assert summ_resp.html_summary is None assert "subtitles" not in summ_resp.data dir_full_path = ucache.summary_cache.dir_cache.get( ucache.preprocess_url(youtube_without_cc)) assert not os.path.exists( os.path.join(dir_full_path, "data", "subtitles.srt")) assert os.path.exists(os.path.join(dir_full_path, "metadata.json")) # this deletes the summary files on purpose, since theyre somewhat useless assert not os.path.exists(os.path.join(dir_full_path, "html_summary.html"))
def test_youtube_has_subtitles(ucache: URLCache) -> None: # make sure subtitles download to file assert not ucache.in_cache(youtube_with_cc) summ_resp: Summary = ucache.get(youtube_with_cc) assert ucache.in_cache(youtube_with_cc) assert isinstance(summ_resp, Summary) assert summ_resp is not None assert summ_resp.data is not None and "subtitles" in summ_resp.data assert "trade-off between space" in summ_resp.data["subtitles"] # make sure corresponding file exists dcache = ucache.summary_cache.dir_cache assert isinstance(dcache, DirCache) dir_full_path = dcache.get(ucache.preprocess_url(youtube_with_cc)) assert dir_full_path.endswith( "data/2/c/7/6284b2f664f381372fab3276449b2/000") subtitles_file = Path(os.path.join(dir_full_path, "data", "subtitles.srt")) assert subtitles_file.exists() # make sure subtitle is in cache dir assert "trade-off between space" in subtitles_file.read_text()
def test_youtube_preprocessor(ucache: URLCache) -> None: assert youtube_without_cc != "https://www.youtube.com/watch?v=xvQUiX26RfE" assert (ucache.preprocess_url(youtube_without_cc) == "https://www.youtube.com/watch?v=xvQUiX26RfE")