def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]) -> None: attr_d = dict(attrs) title = attr_d.get('title', '').strip() if tag in self.link_types: url_attr, rels = self.link_types[tag] if not rels or attr_d.get("rel", None) in rels: target = attr_d.get(url_attr, "") if target: if "#" in target: target = target[:target.index('#')] for proc in self.link_procs: proc(self.message.base_uri, target, tag, title) elif tag == 'base': self.message.base_uri = attr_d.get('href', self.message.base_uri) elif tag == 'meta' and attr_d.get('http-equiv', '').lower() == 'content-type': ct = attr_d.get('content', None) if ct: try: media_type, params = ct.split(";", 1) except ValueError: media_type, params = ct, '' media_type = media_type.lower() param_dict = {} for param in headers.split_string(params, rfc7231.parameter, r"\s*;\s*"): try: a, v = param.split("=", 1) param_dict[a.lower()] = headers.unquote_string(v) except ValueError: param_dict[param.lower()] = None self.message.character_encoding = param_dict.get( 'charset', self.message.character_encoding)
def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]) -> None: attr_d = dict(attrs) title = attr_d.get('title', '').strip() if tag in self.link_types: url_attr, rels = self.link_types[tag] if not rels or attr_d.get("rel", None) in rels: target = attr_d.get(url_attr, "") if target: if "#" in target: target = target[:target.index('#')] for proc in self.link_procs: proc(self.message.base_uri, target, tag, title) elif tag == 'base': self.message.base_uri = attr_d.get('href', self.message.base_uri) elif tag == 'meta' and attr_d.get('http-equiv', '').lower() == 'content-type': ct = attr_d.get('content', None) if ct: try: media_type, params = ct.split(";", 1) except ValueError: media_type, params = ct, '' media_type = media_type.lower() param_dict = {} for param in headers.split_string(params, rfc7231.parameter, r"\s*;\s*"): try: a, v = param.split("=", 1) param_dict[a.lower()] = headers.unquote_string(v) except ValueError: param_dict[param.lower()] = None self.message.character_encoding = param_dict.get('charset', self.message.character_encoding)
def handle_starttag(self, tag, attrs): attr_d = dict(attrs) title = attr_d.get('title', '').strip() if tag in self.link_types.keys(): url_attr, rels = self.link_types[tag] if not rels or attr_d.get("rel", None) in rels: target = attr_d.get(url_attr, "") if target: if "#" in target: target = target[:target.index('#')] for proc in self.link_procs: proc(self.base, target, tag, title) elif tag == 'base': self.base = attr_d.get('href', self.base) elif tag == 'meta' and \ attr_d.get('http-equiv', '').lower() == 'content-type': ct = attr_d.get('content', None) if ct: try: media_type, params = ct.split(";", 1) except ValueError: media_type, params = ct, '' media_type = media_type.lower() param_dict = {} for param in rh.split_string( params, syntax.PARAMETER, "\s*;\s*" ): try: a, v = param.split("=", 1) param_dict[a.lower()] = rh.unquote_string(v) except ValueError: param_dict[param.lower()] = None self.doc_enc = param_dict.get('charset', self.doc_enc)
def test_split_string(self): i = 0 for (instr, expected_outlist, item, split) in [ ('"abc", "def"', ['"abc"', '"def"'], rfc7230.quoted_string, r"\s*,\s*"), (r'"\"ab", "c\d"', [r'"\"ab"', r'"c\d"'], rfc7230.quoted_string, r"\s*,\s*") ]: self.red.__init__() outlist = headers.split_string(str(instr), item, split) self.assertEqual( expected_outlist, outlist, "[%s] %s != %s" % (i, str(expected_outlist), str(outlist))) i += 1
def test_split_string(self): i = 0 for (instr, expected_outlist, item, split) in [ ('"abc", "def"', ['"abc"', '"def"'], syntax.QUOTED_STRING, r"\s*,\s*" ), (r'"\"ab", "c\d"', [r'"\"ab"', r'"c\d"'], syntax.QUOTED_STRING, r"\s*,\s*" ) ]: self.red.__init__() outlist = headers.split_string(unicode(instr), item, split) self.assertEqual(expected_outlist, outlist, "[%s] %s != %s" % (i, str(expected_outlist), str(outlist))) i += 1