def handle_starttag(self, tag, attrs): attr_d = dict(attrs) title = attr_d.get('title', '').strip() if tag in self.link_types.keys(): target = attr_d.get(self.link_types[tag], "") if target: if "#" in target: target = target[:target.index('#')] self.process_link(target, tag, title) elif tag == 'base': self.base = attr_d.get('href', self.base) elif tag == 'meta' and \ attr_d.get('http-equiv', '').lower() == 'content-type': ct = attr_d.get('content', None) if ct: try: media_type, params = ct.split(";", 1) except ValueError: media_type, params = ct, '' media_type = media_type.lower() param_dict = {} for param in rh.split_string( params, syntax.PARAMETER, "\s*;\s*" ): try: a, v = param.split("=", 1) param_dict[a.lower()] = rh.unquote_string(v) except ValueError: param_dict[param.lower()] = None self.doc_enc = param_dict.get('charset', self.doc_enc)
def handle_starttag(self, tag, attrs): attr_d = dict(attrs) title = attr_d.get('title', '').strip() if tag in self.link_types.keys(): target = attr_d.get(self.link_types[tag], "") if target: if "#" in target: target = target[:target.index('#')] self.process_link(target, tag, title) elif tag == 'base': self.base = attr_d.get('href', self.base) elif tag == 'meta' and \ attr_d.get('http-equiv', '').lower() == 'content-type': ct = attr_d.get('content', None) if ct: try: media_type, params = ct.split(";", 1) except ValueError: media_type, params = ct, '' media_type = media_type.lower() param_dict = {} for param in rh.split_string(params, syntax.PARAMETER, "\s*;\s*"): try: a, v = param.split("=", 1) param_dict[a.lower()] = rh.unquote_string(v) except ValueError: param_dict[param.lower()] = None self.doc_enc = param_dict.get('charset', self.doc_enc)
def test_split_string(self): i = 0 for (instr, expected_outlist, item, split) in [ ('"abc", "def"', ['"abc"', '"def"'], syntax.QUOTED_STRING, r"\s*,\s*"), (r'"\"ab", "c\d"', [r'"\"ab"', r'"c\d"'], syntax.QUOTED_STRING, r"\s*,\s*") ]: self.red.__init__() outlist = rh.split_string(unicode(instr), item, split) self.assertEqual( expected_outlist, outlist, "[%s] %s != %s" % (i, str(expected_outlist), str(outlist))) i += 1
def test_split_string(self): i = 0 for (instr, expected_outlist, item, split) in [ ('"abc", "def"', ['"abc"', '"def"'], syntax.QUOTED_STRING, r"\s*,\s*" ), (r'"\"ab", "c\d"', [r'"\"ab"', r'"c\d"'], syntax.QUOTED_STRING, r"\s*,\s*" ) ]: self.red.__init__() outlist = rh.split_string(unicode(instr), item, split) self.assertEqual(expected_outlist, outlist, "[%s] %s != %s" % (i, str(expected_outlist), str(outlist))) i += 1