def test_markdown_render_doesnt_allow_any_explicit_anchors(self): expected = '<p>foo</p>\n' assert markdown.render('<a href="http://example.com/">foo</a>') == expected expected = '<p>foo</p>\n' assert markdown.render('<a href="https://example.com/">foo</a>') == expected expected = '<p>foo</p>\n' assert markdown.render('<a href="javascript:foo">foo</a>') == expected
def test_markdown_render_discards_scripts(self): expected = '<p>Example alert(1);</p>\n' actual = markdown.render('Example <script>alert(1);</script>') assert expected == actual payload = '<sc<script>ript>alert(123)</sc</script>ript>' html = markdown.render(payload) assert '<script>' not in html
def test_markdown_image_src_filtering(self): # Nice data expected = '<p><img src="http:"foo"" /></p>\n' assert markdown.render('![](http:"foo")') == expected expected = '<p><img src="https://example.org/" alt=""bar"" title="'title'" /></p>\n' assert markdown.render('!["bar"](https://example.org/ "\'title\'")') == expected # Naughty data expected = '<p>![foo](javascript:foo)</p>\n' assert markdown.render('![foo](javascript:foo)') == expected
def test_markdown_autolink_filtering(self): # Nice data for url in ('http://a', "https://b?x&y", 'xmpp:c'): expected = '<p><a href="{0}">{0}</a></p>\n'.format(escape(url)) actual = markdown.render('<%s>' % url) assert actual == expected # Naughty data expected = '<p><javascript:foo></p>\n' assert markdown.render('<javascript:foo>') == expected link = 'javascript:0' encoded_link = ''.join('&x{0:x};'.format(ord(c)) for c in link) html = markdown.render('<%s>' % encoded_link) assert link not in html
def test_markdown_link_filtering(self): # Nice data for url in ('http://a', 'https://b', 'xmpp:c'): expected = '<p><a href="{0}" title="bar'">'foo</a></p>\n'.format(url) actual = markdown.render("['foo](%s \"bar'\")" % url) assert actual == expected # Naughty data html = markdown.render('[foo](javascript:xss)') assert html == '<p>[foo](javascript:xss)</p>\n' html = markdown.render('[foo](unknown:bar)') assert html == '<p>[foo](unknown:bar)</p>\n' html = markdown.render('[" xss><xss>]("><xss>)') assert '<xss>' not in html assert '" xss' not in html html = markdown.render('[" xss><xss>](https:"><xss>)') assert '<xss>' not in html assert '" xss' not in html
def test_markdown_render_renders_https_links(self): expected = '<p><a href="https://example.com/">foo</a></p>\n' assert markdown.render('[foo](https://example.com/)') == expected expected = '<p><a href="https://example.com/">https://example.com/</a></p>\n' assert markdown.render('<https://example.com/>') == expected
def test_markdown_render_escapes_scripts(self): expected = '<p>Example alert “hi”;</p>\n' actual = markdown.render('Example <script>alert "hi";</script>') assert expected == actual
def test_markdown_render_does_render(self): expected = "<p>Example</p>\n" actual = markdown.render('Example') assert expected == actual
def test_markdown_render_no_intra_emphasis(self): expected = '<p>Examples like this_one and this other_one.</p>\n' actual = markdown.render('Examples like this_one and this other_one.') assert expected == actual
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry): def compute_percentage(it, total): return sum( (compute_percentage(s, len(s)) if isinstance(s, tuple) else 1) for s in it if s) / total # Load the base locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES source_strings = {} for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file), 'rb') as f: l = Locale(lang) c = l.catalog = read_po(f) share_source_strings(c, source_strings) c.plural_func = get_function_from_rule(c.plural_expr) replace_unused_singulars(c) l.completion = compute_percentage( (m.string for m in c if m.id and not m.fuzzy), len(c)) if l.completion == 0: continue else: locales[lang.lower()] = l try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e) del source_strings # Load the variants for loc_id in babel.localedata.locale_identifiers(): if loc_id in locales: continue i = loc_id.rfind('_') if i == -1: continue base = locales.get(loc_id[:i]) if base: l = locales[loc_id.lower()] = Locale.parse(loc_id) l.catalog = base.catalog l.completion = base.completion l.countries = base.countries l.languages_2 = base.languages_2 # Unload the Babel data that we no longer need # We load a lot of data to populate the LANGUAGE_NAMES dict, we don't want # to keep it all in RAM. used_data_dict_addresses = set(id(l._data._data) for l in locales.values()) for key, data_dict in list(babel.localedata._cache.items()): if id(data_dict) not in used_data_dict_addresses: del babel.localedata._cache[key] # Prepare a unique and sorted list for use in the language switcher loc_url = canonical_scheme + '://%s.' + canonical_host domain, port = (canonical_host.split(':') + [None])[:2] port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp') subdomains = { l.subdomain: loc_url % l.subdomain for l in locales.values() if not l.territory and resolve(l.subdomain + '.' + domain, port) } lang_list = sorted( ((l.completion, l.language, l.language_name.title(), loc_url % l.subdomain) for l in set(locales.values()) if not l.territory and l.completion > 0.5), key=lambda t: (-t[0], t[1]), ) # Add year-less date format year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)') for l in locales.values(): short_format = l.date_formats['short'].pattern assert short_format[0] == 'y' or short_format[-1] == 'y', ( l.language, short_format) l.date_formats['short_yearless'] = year_re.sub('', short_format) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Add universal strings # These strings don't need to be translated, but they have to be in the catalogs # so that they're counted as translated. for l in locales.values(): l.catalog.add("PayPal", "PayPal") # Patch the locales to look less formal locales['fr'].currency_formats['standard'] = parse_pattern( '#,##0.00\u202f\xa4') locales['fr'].currencies['USD'] = 'dollar états-unien' # Load the markdown files docs = {} heading_re = re.compile(r'^(#+ )', re.M) for path in find_files(os.path.join(project_root, 'i18n'), '*.md'): d, b = os.path.split(path) doc = os.path.basename(d) lang = b[:-3] with open(path, 'rb') as f: md = f.read().decode('utf8') if md.startswith('# '): md = '\n'.join(md.split('\n')[1:]).strip() md = heading_re.sub(r'##\1', md) docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md)) return { 'docs': docs, 'lang_list': lang_list, 'locales': locales, 'subdomains': subdomains }
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry): # Load the locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES source_strings = {} for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file), 'rb') as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) share_source_strings(c, source_strings) c.plural_func = get_function_from_rule(c.plural_expr) replace_unused_singulars(c) try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e, {}) del source_strings # Prepare a unique and sorted list for use in the language switcher percent = lambda l, total: sum((percent(s, len(s)) if isinstance(s, tuple) else 1) for s in l if s) / total for l in list(locales.values()): if l.language == 'en': l.completion = 1 continue l.completion = percent([m.string for m in l.catalog if m.id and not m.fuzzy], len(l.catalog)) if l.completion == 0: del locales[l.language] loc_url = canonical_scheme+'://%s.'+canonical_host domain, port = (canonical_host.split(':') + [None])[:2] port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp') subdomains = { l.subdomain: loc_url % l.subdomain for l in locales.values() if resolve(l.subdomain + '.' + domain, port) } lang_list = sorted( ( (l.completion, l.language, l.language_name.title(), loc_url % l.subdomain) for l in set(locales.values()) if l.completion > 0.5 ), key=lambda t: (-t[0], t[1]), ) # Add year-less date format year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)') for l in locales.values(): short_format = l.date_formats['short'].pattern assert short_format[0] == 'y' or short_format[-1] == 'y', (l.language, short_format) l.date_formats['short_yearless'] = year_re.sub('', short_format) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats['standard'] = parse_pattern('#,##0.00\u202f\xa4') locales['fr'].currencies['USD'] = 'dollar états-unien' # Load the markdown files docs = {} heading_re = re.compile(r'^(#+ )', re.M) for path in find_files(os.path.join(project_root, 'i18n'), '*.md'): d, b = os.path.split(path) doc = os.path.basename(d) lang = b[:-3] with open(path, 'rb') as f: md = f.read().decode('utf8') if md.startswith('# '): md = '\n'.join(md.split('\n')[1:]).strip() md = heading_re.sub(r'##\1', md) docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md)) return {'docs': docs, 'lang_list': lang_list, 'locales': locales, 'subdomains': subdomains}
def test_markdown_render_escapes_javascript_links(self): expected = '<p>[foo](javascript:foo)</p>\n' assert markdown.render('[foo](javascript:foo)') == expected expected = '<p><javascript:foo></p>\n' assert markdown.render('<javascript:foo>') == expected
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry): # Load the locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file)) as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) c.plural_func = get_function_from_rule(c.plural_expr) try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e, {}, allow_reraise=True) # Prepare a unique and sorted list for use in the language switcher percent = lambda l: sum( (percent(s) if isinstance(s, tuple) else 1) for s in l if s) / len(l) for l in locales.values(): if l.language == 'en': l.completion = 1 continue l.completion = percent([m.string for m in l.catalog if m.id]) loc_url = canonical_scheme + '://%s.' + canonical_host lang_list = sorted( ((l.completion, l.language, l.language_name.title(), loc_url % l.language) for l in set(locales.values()) if l.completion), key=lambda t: (-t[0], t[1]), ) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4') locales['fr'].currency_symbols['USD'] = '$' # Load the markdown files docs = {} heading_re = re.compile(r'^(#+ )', re.M) for path in find_files(os.path.join(project_root, 'i18n'), '*.md'): d, b = os.path.split(path) doc = os.path.basename(d) lang = b[:-3] with open(path, 'rb') as f: md = f.read().decode('utf8') if md.startswith('# '): md = '\n'.join(md.split('\n')[1:]).strip() md = heading_re.sub(r'##\1', md) docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md)) return {'docs': docs, 'lang_list': lang_list, 'locales': locales}
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry): # Load the locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file)) as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) c.plural_func = get_function_from_rule(c.plural_expr) try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e, {}, allow_reraise=True) # Prepare a unique and sorted list for use in the language switcher for l in locales.values(): strings = [m.string for m in l.catalog] l.completion = sum(1 for s in strings if s) / len(strings) loc_url = canonical_scheme+'://%s.'+canonical_host lang_list = sorted( ( (l.completion, l.language, l.language_name.title(), loc_url % l.language) for l in set(locales.values()) ), key=lambda t: (-t[0], t[1]), ) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats[None] = parse_pattern('#,##0.00\u202f\xa4') locales['fr'].currency_symbols['USD'] = '$' # Load the markdown files docs = {} heading_re = re.compile(r'^(#+ )', re.M) for path in find_files(os.path.join(project_root, 'i18n'), '*.md'): d, b = os.path.split(path) doc = os.path.basename(d) lang = b[:-3] with open(path, 'rb') as f: md = f.read().decode('utf8') if md.startswith('# '): md = '\n'.join(md.split('\n')[1:]).strip() md = heading_re.sub(r'##\1', md) docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md)) return {'docs': docs, 'lang_list': lang_list, 'locales': locales}
def test_markdown_render_autolinks(self): expected = '<p><a href="http://google.com/">http://google.com/</a></p>\n' actual = markdown.render('http://google.com/') assert expected == actual
def load_i18n(canonical_host, canonical_scheme, project_root, tell_sentry): # Load the locales localeDir = os.path.join(project_root, 'i18n', 'core') locales = LOCALES source_strings = {} for file in os.listdir(localeDir): try: parts = file.split(".") if not (len(parts) == 2 and parts[1] == "po"): continue lang = parts[0] with open(os.path.join(localeDir, file)) as f: l = locales[lang.lower()] = Locale(lang) c = l.catalog = read_po(f) share_source_strings(c, source_strings) c.plural_func = get_function_from_rule(c.plural_expr) replace_unused_singulars(c) try: l.countries = make_sorted_dict(COUNTRIES, l.territories) except KeyError: l.countries = COUNTRIES try: l.languages_2 = make_sorted_dict(LANGUAGES_2, l.languages) except KeyError: l.languages_2 = LANGUAGES_2 except Exception as e: tell_sentry(e, {}) del source_strings # Prepare a unique and sorted list for use in the language switcher percent = lambda l, total: sum((percent(s, len(s)) if isinstance(s, tuple) else 1) for s in l if s) / total for l in list(locales.values()): if l.language == 'en': l.completion = 1 continue l.completion = percent( [m.string for m in l.catalog if m.id and not m.fuzzy], len(l.catalog)) if l.completion == 0: del locales[l.language] loc_url = canonical_scheme + '://%s.' + canonical_host domain, port = (canonical_host.split(':') + [None])[:2] port = int(port) if port else socket.getservbyname(canonical_scheme, 'tcp') subdomains = { k: loc_url % k for k in locales if resolve(k + '.' + domain, port) } lang_list = sorted( ((l.completion, l.language, l.language_name.title(), loc_url % l.language) for l in set(locales.values()) if l.completion > 0.5), key=lambda t: (-t[0], t[1]), ) # Add year-less date format year_re = re.compile(r'(^y+[^a-zA-Z]+|[^a-zA-Z]+y+$)') for l in locales.values(): short_format = l.date_formats['short'].pattern assert short_format[0] == 'y' or short_format[-1] == 'y', ( l.language, short_format) l.date_formats['short_yearless'] = year_re.sub('', short_format) # Add aliases for k, v in list(locales.items()): locales.setdefault(ALIASES.get(k, k), v) locales.setdefault(ALIASES_R.get(k, k), v) for k, v in list(locales.items()): locales.setdefault(k.split('_', 1)[0], v) # Patch the locales to look less formal locales['fr'].currency_formats['standard'] = parse_pattern( '#,##0.00\u202f\xa4') locales['fr'].currency_symbols['USD'] = '$' locales['fr'].currencies['USD'] = 'dollar états-unien' # Load the markdown files docs = {} heading_re = re.compile(r'^(#+ )', re.M) for path in find_files(os.path.join(project_root, 'i18n'), '*.md'): d, b = os.path.split(path) doc = os.path.basename(d) lang = b[:-3] with open(path, 'rb') as f: md = f.read().decode('utf8') if md.startswith('# '): md = '\n'.join(md.split('\n')[1:]).strip() md = heading_re.sub(r'##\1', md) docs.setdefault(doc, {}).__setitem__(lang, markdown.render(md)) return { 'docs': docs, 'lang_list': lang_list, 'locales': locales, 'subdomains': subdomains }