Пример #1
0
        def http_error_302(self, req, fp, code, msg, headers):
            # Code from mechanize._urllib2_fork.HTTPRedirectHandler:
            if 'location' in headers:
                newurl = headers.getheaders('location')[0]
            elif 'uri' in headers:
                newurl = headers.getheaders('uri')[0]
            else:
                return
            newurl = _rfc3986.clean_url(newurl, "latin-1")
            newurl = _rfc3986.urljoin(req.get_full_url(), newurl)

            new = self.redirect_request(req, fp, code, msg, headers, newurl)
            if new is None:
                return

            if hasattr(req, 'redirect_dict'):
                visited = new.redirect_dict = req.redirect_dict
                if (visited.get(newurl, 0) >= self.max_repeats or
                            len(visited) >= self.max_redirections):
                    raise urllib.error.HTTPError(req.get_full_url(), code,
                                    self.inf_msg + msg, headers, fp)
            else:
                visited = new.redirect_dict = req.redirect_dict = {}
            visited[newurl] = visited.get(newurl, 0) + 1

            fp.read()
            fp.close()

            # If the redirected URL doesn't match
            new_url = new.get_full_url()
            if not re.search('^http(?:s)?\:\/\/.*www\.linkedin\.com', new_url):
                return _response.make_response('', headers.items(), new_url, 200, 'OK')
            else:
                return self.parent.open(new)
Пример #2
0
    def _test_link_encoding(self, factory):
        import mechanize
        from mechanize._rfc3986 import clean_url
        url = "http://example.com/"
        for encoding in ["UTF-8", "latin-1"]:
            encoding_decl = "; charset=%s" % encoding
            b = TestBrowser(factory=factory)
            r = MockResponse(url, """\
<a href="http://example.com/foo/bar&mdash;&#x2014;.html"
   name="name0&mdash;&#x2014;">blah&mdash;&#x2014;</a>
""", #"
{"content-type": "text/html%s" % encoding_decl})
            b.add_handler(make_mock_handler()([("http_open", r)]))
            r = b.open(url)

            Link = mechanize.Link
            try:
                mdashx2 = u"\u2014".encode(encoding)*2
            except UnicodeError:
                mdashx2 = '&mdash;&#x2014;'
            qmdashx2 = clean_url(mdashx2, encoding)
            # base_url, url, text, tag, attrs
            exp = Link(url, "http://example.com/foo/bar%s.html" % qmdashx2,
                       "blah"+mdashx2, "a",
                       [("href", "http://example.com/foo/bar%s.html" % mdashx2),
                        ("name", "name0%s" % mdashx2)])
            # nr
            link = b.find_link()
##             print
##             print exp
##             print link
            self.assertEqual(link, exp)
Пример #3
0
    def test_link_encoding(self):
        import mechanize
        from mechanize._rfc3986 import clean_url
        url = "http://example.com/"
        for encoding in ["UTF-8", "latin-1"]:
            encoding_decl = "; charset=%s" % encoding
            b = TestBrowser()
            r = MockResponse(url, """\
<a href="http://example.com/foo/bar&mdash;&#x2014;.html"
   name="name0&mdash;&#x2014;">blah&mdash;&#x2014;</a>
""", {"content-type": "text/html%s" % encoding_decl})
            b.add_handler(make_mock_handler()([("http_open", r)]))
            r = b.open(url)

            Link = mechanize.Link
            mdashx2 = em_dash.encode('utf-8') * 2
            qmdashx2 = clean_url(mdashx2, encoding)
            # base_url, url, text, tag, attrs
            exp = Link(
                url, "http://example.com/foo/bar%s.html" % qmdashx2,
                unicode_type("blah") + (em_dash * 2), "a",
                [("href", unicode_type("http://example.com/foo/bar%s.html") % (
                    em_dash*2)),
                 ("name", unicode_type("name0") + em_dash + em_dash)])
            # nr
            link = b.find_link()
            #             print
            #             print exp
            #             print link
            self.assertEqual(link, exp)
Пример #4
0
    def _test_link_encoding(self, factory):
        import mechanize
        from mechanize._rfc3986 import clean_url
        url = "http://example.com/"
        for encoding in ["UTF-8", "latin-1"]:
            encoding_decl = "; charset=%s" % encoding
            b = TestBrowser(factory=factory)
            r = MockResponse(
                url,
                """\
<a href="http://example.com/foo/bar&mdash;&#x2014;.html"
   name="name0&mdash;&#x2014;">blah&mdash;&#x2014;</a>
""",  #"
                {"content-type": "text/html%s" % encoding_decl})
            b.add_handler(make_mock_handler()([("http_open", r)]))
            r = b.open(url)

            Link = mechanize.Link
            try:
                mdashx2 = u"\u2014".encode(encoding) * 2
            except UnicodeError:
                mdashx2 = '&mdash;&#x2014;'
            qmdashx2 = clean_url(mdashx2, encoding)
            # base_url, url, text, tag, attrs
            exp = Link(
                url, "http://example.com/foo/bar%s.html" % qmdashx2,
                "blah" + mdashx2, "a",
                [("href", "http://example.com/foo/bar%s.html" % mdashx2),
                 ("name", "name0%s" % mdashx2)])
            # nr
            link = b.find_link()
            ##             print
            ##             print exp
            ##             print link
            self.assertEqual(link, exp)