Пример #1
0
 def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]) -> None:
     attr_d = dict(attrs)
     title = attr_d.get('title', '').strip()
     if tag in self.link_types:
         url_attr, rels = self.link_types[tag]
         if not rels or attr_d.get("rel", None) in rels:
             target = attr_d.get(url_attr, "")
             if target:
                 if "#" in target:
                     target = target[:target.index('#')]
                 for proc in self.link_procs:
                     proc(self.message.base_uri, target, tag, title)
     elif tag == 'base':
         self.message.base_uri = attr_d.get('href', self.message.base_uri)
     elif tag == 'meta' and attr_d.get('http-equiv',
                                       '').lower() == 'content-type':
         ct = attr_d.get('content', None)
         if ct:
             try:
                 media_type, params = ct.split(";", 1)
             except ValueError:
                 media_type, params = ct, ''
             media_type = media_type.lower()
             param_dict = {}
             for param in headers.split_string(params, rfc7231.parameter,
                                               r"\s*;\s*"):
                 try:
                     a, v = param.split("=", 1)
                     param_dict[a.lower()] = headers.unquote_string(v)
                 except ValueError:
                     param_dict[param.lower()] = None
             self.message.character_encoding = param_dict.get(
                 'charset', self.message.character_encoding)
Пример #2
0
 def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]) -> None:
     attr_d = dict(attrs)
     title = attr_d.get('title', '').strip()
     if tag in self.link_types:
         url_attr, rels = self.link_types[tag]
         if not rels or attr_d.get("rel", None) in rels:
             target = attr_d.get(url_attr, "")
             if target:
                 if "#" in target:
                     target = target[:target.index('#')]
                 for proc in self.link_procs:
                     proc(self.message.base_uri, target, tag, title)
     elif tag == 'base':
         self.message.base_uri = attr_d.get('href', self.message.base_uri)
     elif tag == 'meta' and attr_d.get('http-equiv', '').lower() == 'content-type':
         ct = attr_d.get('content', None)
         if ct:
             try:
                 media_type, params = ct.split(";", 1)
             except ValueError:
                 media_type, params = ct, ''
             media_type = media_type.lower()
             param_dict = {}
             for param in headers.split_string(params, rfc7231.parameter, r"\s*;\s*"):
                 try:
                     a, v = param.split("=", 1)
                     param_dict[a.lower()] = headers.unquote_string(v)
                 except ValueError:
                     param_dict[param.lower()] = None
             self.message.character_encoding = param_dict.get('charset',
                                                              self.message.character_encoding)
Пример #3
0
 def handle_starttag(self, tag, attrs):
     attr_d = dict(attrs)
     title = attr_d.get('title', '').strip()
     if tag in self.link_types.keys():
         url_attr, rels = self.link_types[tag]
         if not rels or attr_d.get("rel", None) in rels:
             target = attr_d.get(url_attr, "")
             if target:
                 if "#" in target:
                     target = target[:target.index('#')]
                 for proc in self.link_procs:
                     proc(self.base, target, tag, title)
     elif tag == 'base':
         self.base = attr_d.get('href', self.base)
     elif tag == 'meta' and \
       attr_d.get('http-equiv', '').lower() == 'content-type':
         ct = attr_d.get('content', None)
         if ct:
             try:
                 media_type, params = ct.split(";", 1)
             except ValueError:
                 media_type, params = ct, ''
             media_type = media_type.lower()
             param_dict = {}
             for param in rh.split_string(
                 params, syntax.PARAMETER, "\s*;\s*"
             ):
                 try:
                     a, v = param.split("=", 1)
                     param_dict[a.lower()] = rh.unquote_string(v)
                 except ValueError:
                     param_dict[param.lower()] = None
             self.doc_enc = param_dict.get('charset', self.doc_enc)
Пример #4
0
 def test_split_string(self):
     i = 0
     for (instr, expected_outlist, item, split) in [
         ('"abc", "def"', ['"abc"',
                           '"def"'], rfc7230.quoted_string, r"\s*,\s*"),
         (r'"\"ab", "c\d"', [r'"\"ab"',
                             r'"c\d"'], rfc7230.quoted_string, r"\s*,\s*")
     ]:
         self.red.__init__()
         outlist = headers.split_string(str(instr), item, split)
         self.assertEqual(
             expected_outlist, outlist,
             "[%s] %s != %s" % (i, str(expected_outlist), str(outlist)))
         i += 1
Пример #5
0
 def test_split_string(self):
     i = 0
     for (instr, expected_outlist, item, split) in [
         ('"abc", "def"', 
          ['"abc"', '"def"'], 
          syntax.QUOTED_STRING, 
          r"\s*,\s*"
         ),
         (r'"\"ab", "c\d"', 
          [r'"\"ab"', r'"c\d"'], 
          syntax.QUOTED_STRING, 
          r"\s*,\s*"
         )
     ]:
         self.red.__init__()
         outlist = headers.split_string(unicode(instr), item, split)
         self.assertEqual(expected_outlist, outlist, 
             "[%s] %s != %s" % (i, str(expected_outlist), str(outlist)))
         i += 1