def initial_data(watch_html: str) -> str: """Extract the ytInitialData json from the watch_html page. This mostly contains metadata necessary for rendering the page on-load, such as video information, copyright notices, etc. @param watch_html: Html of the watch page @return: """ patterns = [ r"window\[['\"]ytInitialData['\"]]\s*=\s*", r"ytInitialData\s*=\s*" ] for pattern in patterns: try: return parse_for_object(watch_html, pattern) except HTMLParseError: pass raise RegexMatchError(caller='initial_data', pattern='initial_data_pattern')
def test_invalid_start(): with pytest.raises(HTMLParseError): parse_for_object('test = {}', r'invalid_regex')
def test_parse_context_closer_in_string_value(): test_html = 'test = {"foo": "};"};' result = parse_for_object(test_html, r'test\s*=\s*') assert result == {'foo': '};'}
def test_parse_simple_object(): test_html = 'test = {"foo": [], "bar": {}};' result = parse_for_object(test_html, r'test\s*=\s*') assert result == {'foo': [], 'bar': {}}
def test_parse_empty_object_with_trailing_characters(): test_html = 'test = {};' result = parse_for_object(test_html, r'test\s*=\s*') assert result == {}
def test_parse_simple_empty_object(): result = parse_for_object('test = {}', r'test\s*=\s*') assert result == {}