def pre_save(self, model_instance, add): value = super().pre_save(model_instance, add) if not self.rendered_field: return value dirty = markdown(text=value, extensions=EXTENSIONS, extension_configs=EXTENSION_CONFIGS) if self.validator.sanitize: if self.validator.linkify: cleaner = bleach.Cleaner( tags=self.validator.allowed_tags, attributes=self.validator.allowed_attrs, filters=[ partial(LinkifyFilter, callbacks=[format_link, blacklist_link]) ]) else: cleaner = bleach.Cleaner( tags=self.validator.allowed_tags, attributes=self.validator.allowed_attrs) clean = cleaner.clean(dirty) setattr(model_instance, self.rendered_field, clean) else: # danger! setattr(model_instance, self.rendered_field, dirty) return value
def _get_cleaner(): linkify_filter = partial(LinkifyFilter, callbacks=[_linkify_target_blank, _linkify_rel]) cleaner = bleach.Cleaner(tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, filters=[linkify_filter]) return cleaner
def clean_html(input): # Reaplce html tags from user input, see utils.test for examples ok_tags = [ u"a", u"img", u"strong", u"b", u"em", u"i", u"u", u"ul", u"li", u"p", u"br", u"blockquote", u"code" ] ok_attributes = { u"a": [u"href", u"rel"], u"img": [u"src", u"alt", u"title"] } # all other tags: replace with the content of the tag # If input contains link in the format: <http://> then convert it to < http:// > # This is because otherwise the library recognizes it as a tag and breaks the link. input = re.sub("\<(http\S+?)\>", r'< \1 >', input) cleaner = bleach.Cleaner(filters=[ EmptyLinkFilter, partial(bleach.linkifier.LinkifyFilter, callbacks=[nofollow]), ], attributes=ok_attributes, tags=ok_tags, strip=True) output = cleaner.clean(input) return output
def get_cleaner(**serializer_kwargs: bool): """ :param serializer_kwargs: options: - alphabetical_attributes - escape_lt_in_attrs - escape_rcdata - inject_meta_charset - minimize_boolean_attributes - omit_optional_tags - quote_attr_values - quote_char - resolve_entities - sanitize - space_before_trailing_solidus - strip_whitespace - use_best_quote_char - use_trailing_solidus :type serializer_kwargs: Dict[str, bool] :rtype: bleach.Cleaner """ cleaner = bleach.Cleaner([], strip=True) for k, v in serializer_kwargs.items(): if k not in HTMLSerializer.options: raise ValueError( 'Parameter %s is not a valid option for HTMLSerializer' % k) setattr(cleaner.serializer, k, v) return cleaner
def __call__(self, stream, source_tooltip=False, wrap=True): if not stream: return '' # Prepare angle bracket autolinks to avoid bleach treating them as tag stream = RE_AUTOLINK.sub(r'[\g<1>](\g<1>)', stream) # Turn markdown to HTML. html = self.markdown(stream) # Deal with callbacks callbacks = [nofollow_callback] if source_tooltip: callbacks.append(source_tooltip_callback) cleaner = bleach.Cleaner( tags=current_app.config['MD_ALLOWED_TAGS'], attributes=current_app.config['MD_ALLOWED_ATTRIBUTES'], styles=current_app.config['MD_ALLOWED_STYLES'], protocols=current_app.config['MD_ALLOWED_PROTOCOLS'], strip_comments=False, filters=[ partial(LinkifyFilter, skip_tags=['pre'], parse_email=False, callbacks=callbacks) ]) html = cleaner.clean(html) if wrap: html = '<div class="markdown">{0}</div>'.format(html.strip()) # Return a `Markup` element considered as safe by Jinja. return Markup(html)
def add_post(content): """Add a post to the 'database' with the current timestamp.""" db = psycopg2.connect(database="forum") c = db.cursor() bleached_content1 = bleach.Cleaner().clean(content) bleached_content_final = bleach.clean(bleached_content1) c.execute("insert into posts values (%s)", (bleached_content_final, )) db.commit() db.close()
def markdownify(text): # Get the settings or set defaults if not set # Bleach settings whitelist_tags = getattr(settings, 'MARKDOWNIFY_WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS) whitelist_attrs = getattr(settings, 'MARKDOWNIFY_WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES) whitelist_styles = getattr(settings, 'MARKDOWNIFY_WHITELIST_STYLES', bleach.sanitizer.ALLOWED_STYLES) whitelist_protocols = getattr(settings, 'MARKDOWNIFY_WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS) # Markdown settings strip = getattr(settings, 'MARKDOWNIFY_STRIP', True) extensions = getattr(settings, 'MARKDOWNIFY_MARKDOWN_EXTENSIONS', []) # Bleach Linkify linkify = None linkify_text = getattr(settings, 'MARKDOWNIFY_LINKIFY_TEXT', True) if linkify_text: linkify_parse_email = getattr(settings, 'MARKDOWNIFY_LINKIFY_PARSE_EMAIL', False) linkify_callbacks = getattr(settings, 'MARKDOWNIFY_LINKIFY_CALLBACKS', None) linkify_skip_tags = getattr(settings, 'MARKDOWNIFY_LINKIFY_SKIP_TAGS', None) linkifyfilter = bleach.linkifier.LinkifyFilter linkify = [ partial(linkifyfilter, callbacks=linkify_callbacks, skip_tags=linkify_skip_tags, parse_email=linkify_parse_email) ] # Convert markdown to html html = markdown.markdown(text or "", extensions=extensions) # Sanitize html if wanted if getattr(settings, 'MARKDOWNIFY_BLEACH', True): cleaner = bleach.Cleaner( tags=whitelist_tags, attributes=whitelist_attrs, styles=whitelist_styles, protocols=whitelist_protocols, strip=strip, filters=linkify, ) html = cleaner.clean(html) return mark_safe(html)
def _get_cleaner(): global cleaner # pylint: disable=global-statement if cleaner is None: linkify_filter = partial( LinkifyFilter, callbacks=[_linkify_target_blank, _linkify_rel] ) cleaner = bleach.Cleaner( tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, filters=[linkify_filter] ) return cleaner
def clean_localized_string(self): # All links (text and markup) are normalized. linkify_filter = partial( bleach.linkifier.LinkifyFilter, callbacks=[linkify_bounce_url_callback, bleach.callbacks.nofollow]) # Keep only the allowed tags and attributes, escape the rest. cleaner = bleach.Cleaner(tags=self.allowed_tags, attributes=self.allowed_attributes, filters=[linkify_filter]) return cleaner.clean(str(self.localized_string))
def _get_cleaner(): global _cleaner if not _cleaner: allowed_tags = bleach.ALLOWED_TAGS + [ # headers "h1", "h2", "h3", "h4", "h5", "h6", # blocks "div", "p", "pre", "hr", "center", # inline nodes "span", "br", "sub", "sup", "s", "del", "ins", "small", # images "figure", "img", "figcaption", _iframe_secret_tag, # tables "table", "tr", "td", "th", "tbody" ] allowed_attributes = dict(bleach.ALLOWED_ATTRIBUTES) allowed_extra_attributes = { "h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"], "table": ["class"], "div": ["class", "style"], "td": ["colspan"], "span": ["class", "translate", "id"], _iframe_secret_tag: ["class", "src"], "figure": ["class", _ngclick_secret_tag], "img": ["src", "class", "alt", "img-id"], } for key in allowed_extra_attributes: if key not in allowed_attributes: allowed_attributes[key] = [] allowed_attributes[key] += allowed_extra_attributes[key] _cleaner = bleach.Cleaner(tags=allowed_tags, attributes=allowed_attributes, styles=bleach.ALLOWED_STYLES + ["clear"], protocols=bleach.ALLOWED_PROTOCOLS, strip=False, strip_comments=True) return _cleaner
def get_cleaner(self): """ This method will help lowering the strictness level of `bleach.Cleaner`. It does so by redefining the safe values we're currently using and considering safe in the platform. """ cleaner = bleach.Cleaner(tags=self._get_allowed_tags(), attributes=self._get_allowed_attributes(), styles=self._get_allowed_styles()) return cleaner
def markdown(s: str) -> str: commented_shortcodes = shortcodes.comment_shortcodes(s) tainted_html = CommonMark.commonmark(commented_shortcodes) # Create a Cleaner that supports parsing of bare links (see filters). cleaner = bleach.Cleaner(tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, styles=ALLOWED_STYLES, strip_comments=False, filters=[bleach.linkifier.LinkifyFilter]) safe_html = cleaner.clean(tainted_html) return safe_html
def markdownify(text): # Convert markdown to html html = markdown.markdown(text, extensions=extensions) # Sanitize html if wanted if getattr(settings, 'MARKDOWNIFY_BLEACH', True): cleaner = bleach.Cleaner( tags=whitelist_tags, attributes=whitelist_attrs, styles=whitelist_styles, protocols=whitelist_protocols, strip=strip, filters=linkify, ) html = cleaner.clean(html) return mark_safe(html)
def __init__(self, config_name='default', strip=False, strip_comments=True, **kwargs): super().__init__(config_name=config_name, **kwargs) conf = settings.CKEDITOR_CONFIGS[config_name] tags = [] attrs = {} for (tag, props) in conf['allowedContent'].items(): if tag != '*': tags.append(tag) if isinstance(props, dict) and 'attributes' in props: attrs[tag] = [] for attr in props['attributes']: if (tag, attr) not in self._attribute_blacklist: attrs[tag].append(attr) self._cleaner = bleach.Cleaner(tags=tags, attributes=attrs, styles=self._styles, protocols=self._protocols, strip=strip, strip_comments=strip_comments)
def linkify_and_sanitize_html(html: str, context: Optional[HTMLSanitizationContext] = None ) -> str: """Use bleach and html5lib filters to linkify and sanitize HTML.""" # list of tag names to exclude from linkification linkify_skipped_tags = ["code", "pre"] tildes_linkifier = partial(LinkifyFilter, skip_tags=linkify_skipped_tags) allowed_attributes = ALLOWED_HTML_ATTRIBUTES_DEFAULT if context: # include overrides for the current context overrides = ALLOWED_HTML_ATTRIBUTES_OVERRIDES.get(context, {}) allowed_attributes = {**allowed_attributes, **overrides} cleaner = bleach.Cleaner( tags=ALLOWED_HTML_TAGS, attributes=allowed_attributes, protocols=ALLOWED_LINK_PROTOCOLS, filters=[tildes_linkifier], ) return cleaner.clean(html)
def markdownify(text, custom_settings="default"): # Check for legacy settings setting_keys = [ 'WHITELIST_TAGS', 'WHITELIST_ATTRS', 'WHITELIST_STYLES', 'WHITELIST_PROTOCOLS', 'STRIP', 'MARKDOWN_EXTENSIONS', 'LINKIFY_TEXT', 'BLEACH', ] has_settings_old_style = False for key in setting_keys: if getattr(settings, f"MARKDOWNIFY_{key}", None): has_settings_old_style = True break if has_settings_old_style: markdownify_settings = legacy() else: try: markdownify_settings = settings.MARKDOWNIFY[custom_settings] except (AttributeError, KeyError): markdownify_settings = {} # Bleach settings whitelist_tags = markdownify_settings.get('WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS) whitelist_attrs = markdownify_settings.get('WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES) whitelist_styles = markdownify_settings.get('WHITELIST_STYLES', bleach.sanitizer.ALLOWED_STYLES) whitelist_protocols = markdownify_settings.get('WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS) # Markdown settings strip = markdownify_settings.get('STRIP', True) extensions = markdownify_settings.get('MARKDOWN_EXTENSIONS', []) # Bleach Linkify linkify = None linkify_text = markdownify_settings.get('LINKIFY_TEXT', {"PARSE_URLS": True}) if linkify_text.get("PARSE_URLS"): linkify_parse_email = linkify_text.get('PARSE_EMAIL', False) linkify_callbacks = linkify_text.get('CALLBACKS', []) linkify_skip_tags = linkify_text.get('SKIP_TAGS', []) linkifyfilter = bleach.linkifier.LinkifyFilter linkify = [partial(linkifyfilter, callbacks=linkify_callbacks, skip_tags=linkify_skip_tags, parse_email=linkify_parse_email )] # Convert markdown to html html = markdown.markdown(text or "", extensions=extensions) # Sanitize html if wanted if markdownify_settings.get("BLEACH", True): cleaner = bleach.Cleaner(tags=whitelist_tags, attributes=whitelist_attrs, styles=whitelist_styles, protocols=whitelist_protocols, strip=strip, filters=linkify, ) html = cleaner.clean(html) return mark_safe(html)
import bleach import markdown as md from bleach.linkifier import LinkifyFilter from django import template allowed_tags = [ 'a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'i', 'li', 'ol', 'p', 'pre', 'strong', 'ul' ] register = template.Library() cleaner = bleach.Cleaner(tags=allowed_tags, filters=[LinkifyFilter]) @register.filter(is_safe=True) def markdown(value): if not value: return "" return cleaner.clean(md.markdown(value)) @register.tag() def markdownify(parser, token): nodelist = parser.parse(('endmarkdownify', )) parser.delete_first_token() return Markdownify(nodelist) class Markdownify(template.Node): def __init__(self, nodelist): self.nodelist = nodelist
# Django from django import template from django.template.defaultfilters import stringfilter from django.utils.safestring import mark_safe # Third Party Libraries import bleach import markdown # Local from ..models import Recipient, Room register = template.Library() cleaner = bleach.Cleaner(tags=bleach.ALLOWED_TAGS + ["p", "div", "br"], strip=True) def linkify_callback(attrs, new=False): attrs[(None, "target")] = "_blank" attrs[(None, "rel")] = "noopener noreferrer nofollow" return attrs @register.filter @stringfilter def as_markdown(text): try: return mark_safe( html.unescape( bleach.linkify(cleaner.clean(markdown.markdown(text)),
TLD_REGEX = bleach.linkifier.build_url_re(tlds=ALLOWED_TLDS, protocols=ALLOWED_PROTOCOLS) def link_callback(attrs, new=False): attrs[None, "target"] = "_blank" return attrs CLEANER = bleach.Cleaner( tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, protocols=ALLOWED_PROTOCOLS, filters=[ partial( bleach.linkifier.LinkifyFilter, url_re=TLD_REGEX, parse_email=True, skip_tags=["pre", "code"], callbacks=bleach.linkifier.DEFAULT_CALLBACKS + [link_callback], ) ], ) NO_LINKS_CLEANER = bleach.Cleaner( tags=copy(ALLOWED_TAGS)[1:], attributes=ALLOWED_ATTRIBUTES, protocols=ALLOWED_PROTOCOLS, strip=True, ) md = markdown.Markdown(extensions=[ "markdown.extensions.nl2br",
from flask_login import current_user from markupsafe import Markup from sqlalchemy import desc from werkzeug.exceptions import HTTPException from sqlalchemy.orm import Query from .custom_json import CustomJSONEncoder from .database import db, Base from .objects import Game, Mod, Featured, ModVersion, ReferralEvent, DownloadEvent, FollowEvent from .search import search_mods TRUE_STR = ('true', 'yes', 'on') PARAGRAPH_PATTERN = re.compile('\n\n|\r\n\r\n') cleaner = bleach.Cleaner(tags=bleach_allowlist.markdown_tags, attributes=bleach_allowlist.markdown_attrs, filters=[bleach.linkifier.LinkifyFilter]) def first_paragraphs(text: str) -> str: return '\n\n'.join(PARAGRAPH_PATTERN.split(text)[0:3]) def many_paragraphs(text: str) -> bool: return len(PARAGRAPH_PATTERN.split(text)) > 3 def sanitize_text(text: str) -> Markup: return Markup(cleaner.clean(text))
_allowed_tags_with_links=_allowed_tags+["a"] _allowed_attributes={'a': ['href', 'title']} _allowed_protocols=['http', 'https'] #filter to make all links show domain on hover def _url_on_hover(attrs, new=False): attrs["title"]=urlparse(attrs["href"]).netloc return attrs _callback_functions=bleach.linkifier.DEFAULT_CALLBACKS+[_url_on_hover] _clean_wo_links = bleach.Cleaner(tags=_allowed_tags, attributes=_allowed_attributes, protocols=_allowed_protocols, ) _clean_w_links = bleach.Cleaner(tags=_allowed_tags, attributes=_allowed_attributes, protocols=_allowed_protocols, filters=[lambda:LinkifyFilter(skip_tags=["pre"], parse_email=False, callbacks=_callback_functions) ] ) def sanitize(text, linkgen=False): if linkgen: return _clean_w_links.clean(text)
# (Ruqqus already forces its own https) new_url = ParseResult(scheme="https", netloc=parsed_url.netloc, path=parsed_url.path, params=parsed_url.params, query=parsed_url.query, fragment=parsed_url.fragment) attrs[(None, "href")] = urlunparse(new_url) return attrs _clean_wo_links = bleach.Cleaner( tags=_allowed_tags, attributes=_allowed_attributes, protocols=_allowed_protocols, ) _clean_w_links = bleach.Cleaner(tags=_allowed_tags_with_links, attributes=_allowed_attributes, protocols=_allowed_protocols, filters=[ partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=[a_modify]) ]) _clean_bio = bleach.Cleaner(tags=_allowed_tags_in_bio, attributes=_allowed_attributes, protocols=_allowed_protocols,
for _, token, next in self.slider(): if token["type"] == "StartTag" and token["name"] in self.elements: if (next["type"] == "Character" and next["data"] == "" or next["type"] == "EmptyTag"): remove = True else: remove = False yield token elif not remove: remove = False yield token cleaner = bleach.Cleaner( tags=bleach.ALLOWED_TAGS + ["p", "div", "br"], strip=True, filters=[whitespace.Filter, RemoveEmptyFilter], ) def linkify_callback(attrs, new=False): attrs[(None, "target")] = "_blank" attrs[(None, "rel")] = "noopener noreferrer nofollow" return attrs def clean_html_content(value): try: return bleach.linkify(cleaner.clean(value), [linkify_callback]) if value else "" except (ValueError, TypeError):
domestos = bleach.Cleaner( tags=[ "a", "abbr", "address", "area", "article", "aside", "audio", "b", "bdi", "bdo", "blockquote", "body", "br", "button", "canvas", "caption", "cite", "code", "col", "colgroup", "data", "datalist", "dd", "del", "details", "dfn", "div", "dl", "dt", "em", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "i", "iframe", "img", "input", "ins", "kbd", "label", "legend", "li", "map", "mark", "meter", "nav", "noscript", "ol", "optgroup", "option", "output", "p", "picture", "pre", "progress", "q", "rp", "rt", "ruby", "s", "samp", "section", "select", "small", "source", "span", "strong", "style", "svg", "sub", "summary", "sup", "table", "tbody", "td", "textarea", "tfoot", "th", "thead", "time", "tr", "track", "u", "ul", "var", "video", "wbr" ], attributes={ '*': [ 'style', 'dir', 'class', 'id', 'lang', 'tabindex', 'title', 'translate' ], 'a': [ 'download', 'href', 'hreflang', 'media', 'referrerpolicy', 'rel', 'target', 'type' ], 'area': [ 'alt', 'coords', 'download', 'href', 'hreflang', 'media', 'rel', 'shape', 'target', 'type' ], 'audio': ['autoplay', 'controls', 'loop', 'muted', 'preload', 'src'], 'bdo': ['dir'], 'blockquote': ['cite'], 'button': [ 'autofocus', 'disabled', 'form', 'formaction', 'formenctype', 'formmethod', 'formnovalidate', 'formtarget', 'name', 'type', 'value' ], 'canvas': ['height', 'width'], 'col': ['span'], 'colgroup': ['span'], 'data': ['value'], 'del': ['cite', 'datetime'], 'details': ['open'], 'fieldset': ['disabled', 'form', 'name'], 'form': [ 'accept-charset', 'action', 'autocomplete', 'enctype', 'method', 'name', 'novalidate', 'rel', 'target' ], 'iframe': [ 'allow', 'allowfullscreen', 'height', 'name', 'referrerpolicy', 'sandbox', 'src', 'srcdoc', 'width' ], 'img': [ 'alt', 'crossorigin', 'height', 'ismap', 'longdesc', 'referrerpolicy', 'sizes', 'src', 'srcset', 'usemap', 'width' ], 'input': [ 'accept', 'alt', 'autocomplete', 'autofocus', 'checked', 'dirname', 'disabled', 'form', 'formaction', 'formenctype', 'formmethod', 'formnovalidate', 'formtarget', 'height', 'list', 'max', 'maxlength', 'min', 'minlength', 'multiple', 'name', 'pattern', 'placeholder', 'readonly', 'required', 'size', 'src', 'step', 'type', 'value', 'width' ], 'ins': ['cite', 'datetime'], 'label': ['for', 'form'], 'li': ['value'], 'map': ['name'], 'meter': ['form', 'high', 'low', 'max', 'min', 'optimum', 'value'], 'ol': ['reversed', 'start', 'type'], 'optgroup': ['disabled', 'label'], 'option': ['disabled', 'label', 'selected', 'value'], 'output': ['for', 'form', 'name'], 'progress': ['max', 'value'], 'q': ['cite'], 'select': [ 'autofocus', 'disabled', 'form', 'multiple', 'name', 'required', 'size' ], 'source': ['media', 'sizes', 'src', 'srcset', 'type'], 'style': ['media', 'type'], 'td': ['colspan', 'headers', 'rowspan'], 'textarea': [ 'autofocus', 'cols', 'dirname', 'disabled', 'form', 'maxlength', 'name', 'placeholder', 'readonly', 'required', 'rows', 'wrap' ], 'th': ['abbr', 'colspan', 'headers', 'rowspan', 'scope'], 'time': ['datetime'], 'track': ['default', 'kind', 'label', 'src', 'srclang'], 'video': [ 'autoplay', 'controls', 'height', 'loop', 'muted', 'poster', 'preload', 'src', 'width' ] }, styles=[ "align-content", "align-items", "align-self", "animation", "animation-delay", "animation-direction", "animation-duration", "animation-fill-mode", "animation-iteration-count", "animation-name", "animation-play-state", "animation-timing-function", "backface-visibility", "background", "background-attachment", "background-blend-mode", "background-clip", "background-color", "background-image", "background-origin", "background-position", "background-repeat", "background-size", "border", "border-bottom", "border-bottom-color", "border-bottom-left-radius", "border-bottom-right-radius", "border-bottom-style", "border-bottom-width", "border-collapse", "border-color", "border-image", "border-image-outset", "border-image-repeat", "border-image-slice", "border-image-source", "border-image-width", "border-left", "border-left-color", "border-left-style", "border-left-width", "border-radius", "border-right", "border-right-color", "border-right-style", "border-right-width", "border-spacing", "border-style", "border-top", "border-top-color", "border-top-left-radius", "border-top-right-radius", "border-top-style", "border-top-width", "border-width", "bottom", "box-decoration-break", "box-shadow", "box-sizing", "break-after", "break-before", "break-inside", "caption-side", "caret-color", "clear", "clip", "color", "column-count", "column-fill", "column-gap", "column-rule", "column-rule-color", "column-rule-style", "column-rule-width", "column-span", "column-width", "columns", "cursor", "direction", "display", "empty-cells", "filter", "flex", "flex-basis", "flex-direction", "flex-flow", "flex-grow", "flex-shrink", "flex-wrap", "float", "font", "font-family", "font-feature-settings", "font-kerning", "font-language-override", "font-size", "font-size-adjust", "font-stretch", "font-style", "font-synthesis", "font-variant", "font-variant-alternates", "font-variant-caps", "font-variant-east-asian", "font-variant-ligatures", "font-variant-numeric", "font-variant-position", "font-weight", "grid", "grid-area", "grid-auto-columns", "grid-auto-flow", "grid-auto-rows", "grid-column", "grid-column-end", "grid-column-gap", "grid-column-start", "grid-gap", "grid-row", "grid-row-end", "grid-row-gap", "grid-row-start", "grid-template", "grid-template-areas", "grid-template-columns", "grid-template-rows", "hanging-punctuation", "height", "hyphens", "image-rendering", "isolation", "justify-content", "@keyframes", "left", "letter-spacing", "line-break", "line-height", "list-style", "list-style-image", "list-style-position", "list-style-type", "margin", "margin-bottom", "margin-left", "margin-right", "margin-top", "max-height", "max-width", "@media", "min-height", "min-width", "mix-blend-mode", "object-fit", "object-position", "opacity", "order", "outline", "outline-color", "outline-offset", "outline-style", "outline-width", "overflow", "overflow-wrap", "overflow-x", "overflow-y", "padding", "padding-bottom", "padding-left", "padding-right", "padding-top", "perspective", "perspective-origin", "pointer-events", "position", "quotes", "resize", "right", "scroll-behavior", "tab-size", "table-layout", "text-align", "text-align-last", "text-combine-upright", "text-decoration", "text-decoration-color", "text-decoration-line", "text-decoration-style", "text-indent", "text-justify", "text-orientation", "text-overflow", "text-shadow", "text-transform", "text-underline-position", "top", "transform", "transform-origin", "transform-style", "transition", "transition-delay", "transition-duration", "transition-property", "transition-timing-function", "unicode-bidi", "user-select", "vertical-align", "visibility", "white-space", "width", "word-break", "word-spacing", "word-wrap", "writing-mode", "z-index" ])
#Force https for all external links in comments # (Ruqqus already forces its own https) new_url=ParseResult(scheme="https", netloc=parsed_url.netloc, path=parsed_url.path, params=parsed_url.params, query=parsed_url.query, fragment=parsed_url.fragment) attrs[(None, "href")]=urlunparse(new_url) return attrs _clean_wo_links = bleach.Cleaner(tags=_allowed_tags, attributes=_allowed_attributes, protocols=_allowed_protocols, ) _clean_w_links = bleach.Cleaner(tags=_allowed_tags, attributes=_allowed_attributes, protocols=_allowed_protocols, filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False , callbacks=[nofollow] ) ] ) def sanitize(text, linkgen=False):
from .kerbdown import EmbedInlineProcessor TRUE_STR = ('true', 'yes', 'on') PARAGRAPH_PATTERN = re.compile('\n\n|\r\n\r\n') def allow_iframe_attr(tagname: str, attrib: str, val: str) -> bool: return (any( val.startswith(prefix) for prefix in EmbedInlineProcessor.IFRAME_SRC_PREFIXES) if attrib == 'src' else attrib in EmbedInlineProcessor.IFRAME_ATTRIBS) cleaner = bleach.Cleaner( tags=bleach_allowlist.markdown_tags + ['iframe'], attributes={ # type: ignore[arg-type] **bleach_allowlist.markdown_attrs, 'iframe': allow_iframe_attr }, filters=[bleach.linkifier.LinkifyFilter]) def first_paragraphs(text: Optional[str]) -> str: return '\n\n'.join(PARAGRAPH_PATTERN.split(text)[0:3]) if text else '' def many_paragraphs(text: str) -> bool: return len(PARAGRAPH_PATTERN.split(text)) > 3 def sanitize_text(text: str) -> Markup: return Markup(cleaner.clean(text))
# -*- coding: utf-8 -*- import bleach from html.parser import HTMLParser from .spaces import ( remove_continuous_spaces, ) h = HTMLParser() tags_cleaner = bleach.Cleaner( tags=[], attributes=[], strip=True, filters=[] ) def convert_html_to_text(text, merge_continuous_spaces=True): if not text: return text # clean html entity first text = h.unescape(text) # clean ALL html tags text = tags_cleaner.clean(text) if merge_continuous_spaces: text = remove_continuous_spaces(text)