示例#1
0
    def test_redirect(self):
        from_url = "http://example.com/a.html"
        to_url = "http://example.com/b.html"
        h = HTTPRedirectHandler()
        o = h.parent = MockOpener()

        # ordinary redirect behaviour
        for code in 301, 302, 303, 307, "refresh":
            for data in None, "blah\nblah\n":
                method = getattr(h, "http_error_%s" % code)
                req = Request(from_url, data)
                req.add_header("Nonsense", "viking=withhold")
                req.add_unredirected_header("Spam", "spam")
                req.origin_req_host = "example.com"  # XXX
                try:
                    method(req, MockFile(), code, "Blah",
                           MockHeaders({"location": to_url}))
                except urllib2.HTTPError:
                    # 307 in response to POST requires user OK
                    self.assert_(code == 307 and data is not None)
                self.assert_(o.req.get_full_url() == to_url)
                try:
                    self.assert_(o.req.get_method() == "GET")
                except AttributeError:
                    self.assert_(not o.req.has_data())
                self.assert_(o.req.headers["Nonsense"] == "viking=withhold")
                self.assert_(not o.req.headers.has_key("Spam"))
                self.assert_(not o.req.unredirected_hdrs.has_key("Spam"))

        # loop detection
        def redirect(h, req, url=to_url):
            h.http_error_302(req, MockFile(), 302, "Blah",
                             MockHeaders({"location": url}))
        # Note that the *original* request shares the same record of
        # redirections with the sub-requests caused by the redirections.

        # detect infinite loop redirect of a URL to itself
        req = Request(from_url)
        req.origin_req_host = "example.com"
        count = 0
        try:
            while 1:
                redirect(h, req, "http://example.com/")
                count = count + 1
        except urllib2.HTTPError:
            # don't stop until max_repeats, because cookies may introduce state
            self.assert_(count == HTTPRedirectHandler.max_repeats)

        # detect endless non-repeating chain of redirects
        req = Request(from_url)
        req.origin_req_host = "example.com"
        count = 0
        try:
            while 1:
                redirect(h, req, "http://example.com/%d" % count)
                count = count + 1
        except urllib2.HTTPError:
            self.assert_(count == HTTPRedirectHandler.max_redirections)
示例#2
0
        def test_ftp(self):
            import ftplib, socket
            data = "rheum rhaponicum"
            h = NullFTPHandler(data)
            o = h.parent = MockOpener()

            for url, host, port, type_, dirs, filename, mimetype in [
                ("ftp://localhost/foo/bar/baz.html", "localhost",
                 ftplib.FTP_PORT, "I", ["foo",
                                        "bar"], "baz.html", "text/html"),
                    # XXXX Bug: FTPHandler tries to gethostbyname "localhost:80",
                    #  with the port still there.
                    #("ftp://localhost:80/foo/bar/",
                    # "localhost", 80, "D",
                    # ["foo", "bar"], "", None),
                    # XXXX bug: second use of splitattr() in FTPHandler should be
                    #  splitvalue()
                    #("ftp://localhost/baz.gif;type=a",
                    # "localhost", ftplib.FTP_PORT, "A",
                    # [], "baz.gif", "image/gif"),
            ]:
                r = h.ftp_open(Request(url))
                # ftp authentication not yet implemented by FTPHandler
                self.assert_(h.user == h.passwd == "")
                self.assert_(h.host == socket.gethostbyname(host))
                self.assert_(h.port == port)
                self.assert_(h.dirs == dirs)
                self.assert_(h.ftpwrapper.filename == filename)
                self.assert_(h.ftpwrapper.filetype == type_)
                headers = r.info()
                self.assert_(headers["Content-type"] == mimetype)
                self.assert_(int(headers["Content-length"]) == len(data))
示例#3
0
    def test_processors(self):
        # *_request / *_response methods get called appropriately
        o = OpenerDirector()
        meth_spec = [
            [("http_request", "return request"),
             ("http_response", "return response")],
            [("http_request", "return request"),
             ("http_response", "return response")],
        ]
        handlers = add_ordered_mock_handlers(o, meth_spec)

        req = Request("http://example.com/")
        r = o.open(req)
        # processor methods are called on *all* handlers that define them,
        # not just the first handler
        calls = [(handlers[0], "http_request"), (handlers[1], "http_request"),
                 (handlers[0], "http_response"),
                 (handlers[1], "http_response")]

        for i in range(len(o.calls)):
            handler, name, args, kwds = o.calls[i]
            if i < 2:
                # *_request
                self.assert_((handler, name) == calls[i])
                self.assert_(len(args) == 1)
                self.assert_(isinstance(args[0], Request))
            else:
                # *_response
                self.assert_((handler, name) == calls[i])
                self.assert_(len(args) == 2)
                self.assert_(isinstance(args[0], Request))
                # response from opener.open is None, because there's no
                # handler that defines http_open to handle it
                self.assert_(args[1] is None
                             or isinstance(args[1], MockResponse))
示例#4
0
    def test_http_error(self):
        # XXX http_error_default
        # http errors are a special case
        o = OpenerDirector()
        meth_spec = [
            [("http_open", "error 302")],
            [("http_error_400", "raise"), "http_open"],
            [("http_error_302", "return response"), "http_error_303",
             "http_error"],
            [("http_error_302")],
        ]
        handlers = add_ordered_mock_handlers(o, meth_spec)

        class Unknown:
            pass

        req = Request("http://example.com/")
        r = o.open(req)
        assert len(o.calls) == 2
        calls = [(handlers[0], "http_open", (req, )),
                 (handlers[2], "http_error_302", (req, Unknown, 302, "", {}))]
        for i in range(len(calls)):
            handler, method_name, args, kwds = o.calls[i]
            self.assert_((handler, method_name) == calls[i][:2])
            # check handler methods were called with expected arguments
            expected_args = calls[i][2]
            for j in range(len(args)):
                if expected_args[j] is not Unknown:
                    self.assert_(args[j] == expected_args[j])
示例#5
0
    def test_robots(self):
        # XXX useragent
        try:
            import robotparser
        except ImportError:
            return  # skip test
        else:
            from ClientCookie import HTTPRobotRulesProcessor
        rfpc = MockRobotFileParserClass()
        h = HTTPRobotRulesProcessor(rfpc)

        url = "http://example.com:80/foo/bar.html"
        req = Request(url)
        # first time: initialise and set up robots.txt parser before checking
        #  whether OK to fetch URL
        h.http_request(req)
        self.assert_(rfpc.calls == [
            "__call__",
            ("set_url", "http://example.com:80/robots.txt"),
            "read",
            ("can_fetch", "", url),
        ])
        # second time: just use existing parser
        rfpc.clear()
        req = Request(url)
        h.http_request(req)
        self.assert_(rfpc.calls == [
            ("can_fetch", "", url),
        ])
        # different URL on same server: same again
        rfpc.clear()
        url = "http://example.com:80/blah.html"
        req = Request(url)
        h.http_request(req)
        self.assert_(rfpc.calls == [
            ("can_fetch", "", url),
        ])
        # disallowed URL
        rfpc.clear()
        rfpc._can_fetch = False
        url = "http://example.com:80/rhubarb.html"
        req = Request(url)
        try:
            h.http_request(req)
        except urllib2.HTTPError, e:
            self.assert_(e.request == req)
            self.assert_(e.code == 403)
示例#6
0
    def test_referer(self):
        h = HTTPRefererProcessor()
        o = h.parent = MockOpener()

        # normal case
        url = "http://example.com/"
        req = Request(url)
        r = MockResponse(200, "OK", {}, "", url)
        newr = h.http_response(req, r)
        self.assert_(r is newr)
        self.assert_(h.referer == url)
        newreq = h.http_request(req)
        self.assert_(req is newreq)
        self.assert_(req.unredirected_hdrs["Referer"] == url)
        # don't clobber existing Referer
        ref = "http://set.by.user.com/"
        req.add_unredirected_header("Referer", ref)
        newreq = h.http_request(req)
        self.assert_(req is newreq)
        self.assert_(req.unredirected_hdrs["Referer"] == ref)
示例#7
0
    def test_referer(self):
        h = HTTPRefererProcessor()
        o = h.parent = MockOpener()

        # normal case
        url = "http://example.com/"
        req = Request(url)
        r = MockResponse(200, "OK", {}, "", url)
        newr = h.http_response(req, r)
        self.assert_(r is newr)
        self.assert_(h.referer == url)
        newreq = h.http_request(req)
        self.assert_(req is newreq)
        self.assert_(req.unredirected_hdrs["Referer"] == url)
        # don't clobber existing Referer
        ref = "http://set.by.user.com/"
        req.add_unredirected_header("Referer", ref)
        newreq = h.http_request(req)
        self.assert_(req is newreq)
        self.assert_(req.unredirected_hdrs["Referer"] == ref)
示例#8
0
    def test_refresh(self):
        # XXX processor constructor optional args
        h = HTTPRefreshProcessor()
        o = h.parent = MockOpener()

        req = Request("http://example.com/")
        headers = MockHeaders({"refresh": '0; url="http://example.com/foo/"'})
        r = MockResponse(200, "OK", headers, "")
        newr = h.http_response(req, r)
        self.assert_(o.proto == "http")
        self.assert_(o.args == (req, r, "refresh", "OK", headers))
示例#9
0
    def test_raise(self):
        # raising URLError stops processing of request
        o = OpenerDirector()
        meth_spec = [
            [("http_open", "raise")],
            [("http_open", "return self")],
        ]
        handlers = add_ordered_mock_handlers(o, meth_spec)

        req = Request("http://example.com/")
        self.assertRaises(urllib2.URLError, o.open, req)
        self.assert_(o.calls == [(handlers[0], "http_open", (req, ), {})])
示例#10
0
    def test_refresh(self):
        # XXX test processor constructor optional args
        h = HTTPRefreshProcessor(max_time=None, honor_time=False)

        for val in ['0; url="http://example.com/foo/"', "2"]:
            o = h.parent = MockOpener()
            req = Request("http://example.com/")
            headers = MockHeaders({"refresh": val})
            r = MockResponse(200, "OK", headers, "")
            newr = h.http_response(req, r)
            self.assertEqual(o.proto, "http")
            self.assertEqual(o.args, (req, r, "refresh", "OK", headers))
示例#11
0
    def test_http_equiv(self):
        h = HTTPEquivProcessor()
        o = h.parent = MockOpener()

        req = Request("http://example.com/")
        r = MockResponse(200, "OK", {"Foo": "Bar"},
                         '<html><head>'
                         '<meta http-equiv="Refresh" content="spam">'
                         '</head></html>')
        newr = h.http_response(req, r)
        headers = newr.info()
        self.assert_(headers["Refresh"] == "spam")
        self.assert_(headers["Foo"] == "Bar")
示例#12
0
    def test_cookies(self):
        cj = MockCookieJar()
        h = HTTPCookieProcessor(cj)
        o = h.parent = MockOpener()

        req = Request("http://example.com/")
        r = MockResponse(200, "OK", {}, "")
        newreq = h.http_request(req)
        self.assert_(cj.ach_req is req is newreq)
        self.assert_(req.origin_req_host == "example.com")
        self.assert_(cj.ach_u == False)
        newr = h.http_response(req, r)
        self.assert_(cj.ec_req is req)
        self.assert_(cj.ec_r is r is newr)
        self.assert_(cj.ec_u == False)
示例#13
0
    def test_errors(self):
        h = HTTPErrorProcessor()
        o = h.parent = MockOpener()

        url = "http://example.com/"
        req = Request(url)
        # 200 OK is passed through
        r = MockResponse(200, "OK", {}, "", url)
        newr = h.http_response(req, r)
        self.assert_(r is newr)
        self.assert_(not hasattr(o, "proto"))  # o.error not called
        # anything else calls o.error (and MockOpener returns None, here)
        r = MockResponse(201, "Created", {}, "", url)
        self.assert_(h.http_response(req, r) is None)
        self.assert_(o.proto == "http")  # o.error called
        self.assert_(o.args == (req, r, 201, "Created", {}))
示例#14
0
 def handle(self, fn_name, action, *args, **kwds):
     self.parent.calls.append((self, fn_name, args, kwds))
     if action is None:
         return None
     elif action == "return self":
         return self
     elif action == "return response":
         res = MockResponse(200, "OK", {}, "")
         return res
     elif action == "return request":
         return Request("http://blah/")
     elif startswith(action, "error"):
         code = int(action[-3:])
         res = MockResponse(200, "OK", {}, "")
         return self.parent.error("http", args[0], res, code, "", {})
     elif action == "raise":
         raise urllib2.URLError("blah")
     assert False
示例#15
0
    def test_request_upgrade(self):
        new_req_class = hasattr(urllib2.Request, "has_header")

        h = HTTPRequestUpgradeProcessor()
        o = h.parent = MockOpener()

        # urllib2.Request gets upgraded, unless it's the new Request
        # class from 2.4
        req = urllib2.Request("http://example.com/")
        newreq = h.http_request(req)
        if new_req_class:
            self.assert_(newreq is req)
        else:
            self.assert_(newreq is not req)
        if new_req_class:
            self.assert_(newreq.__class__ is not Request)
        else:
            self.assert_(newreq.__class__ is Request)
        # ClientCookie._urllib2_support.Request doesn't get upgraded
        req = Request("http://example.com/")
        newreq = h.http_request(req)
        self.assert_(newreq is req)
        self.assert_(newreq.__class__ is Request)
示例#16
0
    def test_handled(self):
        # handler returning non-None means no more handlers will be called
        o = OpenerDirector()
        meth_spec = [
            ["http_open", "ftp_open", "http_error_302"],
            ["ftp_open"],
            [("http_open", "return self")],
            [("http_open", "return self")],
        ]
        handlers = add_ordered_mock_handlers(o, meth_spec)

        req = Request("http://example.com/")
        r = o.open(req)
        # Second http_open gets called, third doesn't, since second returned
        # non-None.  Handlers without http_open never get any methods called
        # on them.
        # In fact, second mock handler returns self (instead of response),
        # which becomes the OpenerDirector's return value.
        self.assert_(r == handlers[2])
        calls = [(handlers[0], "http_open"), (handlers[2], "http_open")]
        for i in range(len(o.calls)):
            handler, name, args, kwds = o.calls[i]
            self.assert_((handler, name) == calls[i])
            self.assert_(args == (req, ))
示例#17
0
    def test_http(self):
        h = AbstractHTTPHandler()
        o = h.parent = MockOpener()

        url = "http://example.com/"
        for method, data in [("GET", None), ("POST", "blah")]:
            req = Request(url, data, {"Foo": "bar"})
            req.add_unredirected_header("Spam", "eggs")
            http = MockHTTPClass()
            r = h.do_open(http, req)

            # result attributes
            r.read
            r.readline  # wrapped MockFile methods
            r.info
            r.geturl  # addinfourl methods
            r.code, r.msg == 200, "OK"  # added from MockHTTPClass.getreply()
            hdrs = r.info()
            hdrs.get
            hdrs.has_key  # r.info() gives dict from .getreply()
            self.assert_(r.geturl() == url)

            self.assert_(http.host == "example.com")
            self.assert_(http.level == 0)
            self.assert_(http.method == method)
            self.assert_(http.selector == "/")
            http.req_headers.sort()
            self.assert_(
                http.req_headers == [("Connection",
                                      "close"), ("Foo", "bar"), ("Spam",
                                                                 "eggs")])
            self.assert_(http.data == data)

        # check socket.error converted to URLError
        http.raise_on_endheaders = True
        self.assertRaises(urllib2.URLError, h.do_open, http, req)

        # check adding of standard headers
        o.addheaders = [("Spam", "eggs")]
        for data in "", None:  # POST, GET
            req = Request("http://example.com/", data)
            r = MockResponse(200, "OK", {}, "")
            newreq = h.do_request_(req)
            if data is None:  # GET
                self.assert_(
                    not req.unredirected_hdrs.has_key("Content-length"))
                self.assert_(not req.unredirected_hdrs.has_key("Content-type"))
            else:  # POST
                # No longer true, due to workarouhd for buggy httplib
                # in Python versions < 2.4:
                #self.assert_(req.unredirected_hdrs["Content-length"] == "0")
                self.assert_(req.unredirected_hdrs["Content-type"] ==
                             "application/x-www-form-urlencoded")
            # XXX the details of Host could be better tested
            self.assert_(req.unredirected_hdrs["Host"] == "example.com")
            self.assert_(req.unredirected_hdrs["Spam"] == "eggs")

            # don't clobber existing headers
            req.add_unredirected_header("Content-length", "foo")
            req.add_unredirected_header("Content-type", "bar")
            req.add_unredirected_header("Host", "baz")
            req.add_unredirected_header("Spam", "foo")
            newreq = h.do_request_(req)
            self.assert_(req.unredirected_hdrs["Content-length"] == "foo")
            self.assert_(req.unredirected_hdrs["Content-type"] == "bar")
            self.assert_(req.unredirected_hdrs["Host"] == "baz")
            self.assert_(req.unredirected_hdrs["Spam"] == "foo")
示例#18
0
    def test_http(self):
        h = AbstractHTTPHandler()
        o = h.parent = MockOpener()

        url = "http://example.com/"
        for method, data in [("GET", None), ("POST", "blah")]:
            req = Request(url, data, {"Foo": "bar"})
            req.add_unredirected_header("Spam", "eggs")
            http = MockHTTPClass()
            r = h.do_open(http, req)

            # result attributes
            r.read; r.readline  # wrapped MockFile methods
            r.info; r.geturl  # addinfourl methods
            r.code, r.msg == 200, "OK"  # added from MockHTTPClass.getreply()
            hdrs = r.info()
            hdrs.get; hdrs.has_key  # r.info() gives dict from .getreply()
            self.assert_(r.geturl() == url)

            self.assert_(http.host == "example.com")
            self.assert_(http.level == 0)
            self.assert_(http.method == method)
            self.assert_(http.selector == "/")
            http.req_headers.sort()
            self.assert_(http.req_headers == [
                ("Connection", "close"),
                ("Foo", "bar"), ("Spam", "eggs")])
            self.assert_(http.data == data)

        # check socket.error converted to URLError
        http.raise_on_endheaders = True
        self.assertRaises(urllib2.URLError, h.do_open, http, req)

        # check adding of standard headers
        o.addheaders = [("Spam", "eggs")]
        for data in "", None:  # POST, GET
            req = Request("http://example.com/", data)
            r = MockResponse(200, "OK", {}, "")
            newreq = h.do_request_(req)
            if data is None:  # GET
                self.assert_(not req.unredirected_hdrs.has_key("Content-length"))
                self.assert_(not req.unredirected_hdrs.has_key("Content-type"))
            else:  # POST
                # No longer true, due to workarouhd for buggy httplib
                # in Python versions < 2.4:
                #self.assert_(req.unredirected_hdrs["Content-length"] == "0")
                self.assert_(req.unredirected_hdrs["Content-type"] ==
                             "application/x-www-form-urlencoded")
            # XXX the details of Host could be better tested
            self.assert_(req.unredirected_hdrs["Host"] == "example.com")
            self.assert_(req.unredirected_hdrs["Spam"] == "eggs")

            # don't clobber existing headers
            req.add_unredirected_header("Content-length", "foo")
            req.add_unredirected_header("Content-type", "bar")
            req.add_unredirected_header("Host", "baz")
            req.add_unredirected_header("Spam", "foo")
            newreq = h.do_request_(req)
            self.assert_(req.unredirected_hdrs["Content-length"] == "foo")
            self.assert_(req.unredirected_hdrs["Content-type"] == "bar")
            self.assert_(req.unredirected_hdrs["Host"] == "baz")
            self.assert_(req.unredirected_hdrs["Spam"] == "foo")
示例#19
0
    def test_redirect(self):
        from_url = "http://example.com/a.html"
        to_url = "http://example.com/b.html"
        h = HTTPRedirectHandler()
        o = h.parent = MockOpener()

        # ordinary redirect behaviour
        for code in 301, 302, 303, 307, "refresh":
            for data in None, "blah\nblah\n":
                method = getattr(h, "http_error_%s" % code)
                req = Request(from_url, data)
                req.add_header("Nonsense", "viking=withhold")
                req.add_unredirected_header("Spam", "spam")
                req.origin_req_host = "example.com"  # XXX
                try:
                    method(req, MockFile(), code, "Blah",
                           MockHeaders({"location": to_url}))
                except urllib2.HTTPError:
                    # 307 in response to POST requires user OK
                    self.assert_(code == 307 and data is not None)
                self.assert_(o.req.get_full_url() == to_url)
                try:
                    self.assert_(o.req.get_method() == "GET")
                except AttributeError:
                    self.assert_(not o.req.has_data())
                self.assert_(o.req.headers["Nonsense"] == "viking=withhold")
                self.assert_(not o.req.headers.has_key("Spam"))
                self.assert_(not o.req.unredirected_hdrs.has_key("Spam"))

        # loop detection
        def redirect(h, req, url=to_url):
            h.http_error_302(req, MockFile(), 302, "Blah",
                             MockHeaders({"location": url}))

        # Note that the *original* request shares the same record of
        # redirections with the sub-requests caused by the redirections.

        # detect infinite loop redirect of a URL to itself
        req = Request(from_url)
        req.origin_req_host = "example.com"
        count = 0
        try:
            while 1:
                redirect(h, req, "http://example.com/")
                count = count + 1
        except urllib2.HTTPError:
            # don't stop until max_repeats, because cookies may introduce state
            self.assert_(count == HTTPRedirectHandler.max_repeats)

        # detect endless non-repeating chain of redirects
        req = Request(from_url)
        req.origin_req_host = "example.com"
        count = 0
        try:
            while 1:
                redirect(h, req, "http://example.com/%d" % count)
                count = count + 1
        except urllib2.HTTPError:
            self.assert_(count == HTTPRedirectHandler.max_redirections)
示例#20
0
class HandlerTests(unittest.TestCase):

    if hasattr(sys,
               "version_info") and sys.version_info > (2, 1, 3, "final", 0):

        def test_ftp(self):
            import ftplib, socket
            data = "rheum rhaponicum"
            h = NullFTPHandler(data)
            o = h.parent = MockOpener()

            for url, host, port, type_, dirs, filename, mimetype in [
                ("ftp://localhost/foo/bar/baz.html", "localhost",
                 ftplib.FTP_PORT, "I", ["foo",
                                        "bar"], "baz.html", "text/html"),
                    # XXXX Bug: FTPHandler tries to gethostbyname "localhost:80",
                    #  with the port still there.
                    #("ftp://localhost:80/foo/bar/",
                    # "localhost", 80, "D",
                    # ["foo", "bar"], "", None),
                    # XXXX bug: second use of splitattr() in FTPHandler should be
                    #  splitvalue()
                    #("ftp://localhost/baz.gif;type=a",
                    # "localhost", ftplib.FTP_PORT, "A",
                    # [], "baz.gif", "image/gif"),
            ]:
                r = h.ftp_open(Request(url))
                # ftp authentication not yet implemented by FTPHandler
                self.assert_(h.user == h.passwd == "")
                self.assert_(h.host == socket.gethostbyname(host))
                self.assert_(h.port == port)
                self.assert_(h.dirs == dirs)
                self.assert_(h.ftpwrapper.filename == filename)
                self.assert_(h.ftpwrapper.filetype == type_)
                headers = r.info()
                self.assert_(headers["Content-type"] == mimetype)
                self.assert_(int(headers["Content-length"]) == len(data))

        def test_file(self):
            import time, rfc822, socket
            h = urllib2.FileHandler()
            o = h.parent = MockOpener()

            #TESTFN = test_support.TESTFN
            TESTFN = "test.txt"
            urlpath = sanepathname2url(os.path.abspath(TESTFN))
            towrite = "hello, world\n"
            try:
                fqdn = socket.gethostbyname(socket.gethostname())
            except socket.gaierror:
                fqdn = "localhost"
            for url in [
                    "file://localhost%s" % urlpath,
                    "file://%s" % urlpath,
                    "file://%s%s" %
                (socket.gethostbyname('localhost'), urlpath),
                    "file://%s%s" % (fqdn, urlpath)
            ]:
                f = open(TESTFN, "wb")
                try:
                    try:
                        f.write(towrite)
                    finally:
                        f.close()

                    r = h.file_open(Request(url))
                    try:
                        data = r.read()
                        headers = r.info()
                        newurl = r.geturl()
                    finally:
                        r.close()
                    stats = os.stat(TESTFN)
                    modified = rfc822.formatdate(stats.st_mtime)
                finally:
                    os.remove(TESTFN)
                self.assertEqual(data, towrite)
                self.assertEqual(headers["Content-type"], "text/plain")
                self.assertEqual(headers["Content-length"], "13")
                self.assertEqual(headers["Last-modified"], modified)

            for url in [
                    "file://localhost:80%s" % urlpath,
                    # XXXX bug: these fail with socket.gaierror, should be URLError
                    ##             "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
                    ##                                    os.getcwd(), TESTFN),
                    ##             "file://somerandomhost.ontheinternet.com%s/%s" %
                    ##             (os.getcwd(), TESTFN),
            ]:
                try:
                    f = open(TESTFN, "wb")
                    try:
                        f.write(towrite)
                    finally:
                        f.close()

                    self.assertRaises(urllib2.URLError, h.file_open,
                                      Request(url))
                finally:
                    os.remove(TESTFN)

            h = urllib2.FileHandler()
            o = h.parent = MockOpener()
            # XXXX why does // mean ftp (and /// mean not ftp!), and where
            #  is file: scheme specified?  I think this is really a bug, and
            #  what was intended was to distinguish between URLs like:
            # file:/blah.txt (a file)
            # file://localhost/blah.txt (a file)
            # file:///blah.txt (a file)
            # file://ftp.example.com/blah.txt (an ftp URL)
            for url, ftp in [
                ("file://ftp.example.com//foo.txt", True),
                ("file://ftp.example.com///foo.txt", False),
                    # XXXX bug: fails with OSError, should be URLError
                ("file://ftp.example.com/foo.txt", False),
            ]:
                req = Request(url)
                try:
                    h.file_open(req)
                # XXXX remove OSError when bug fixed
                except (urllib2.URLError, OSError):
                    self.assert_(not ftp)
                else:
                    self.assert_(o.req is req)
                    self.assertEqual(req.type, "ftp")

    def test_http(self):
        h = AbstractHTTPHandler()
        o = h.parent = MockOpener()

        url = "http://example.com/"
        for method, data in [("GET", None), ("POST", "blah")]:
            req = Request(url, data, {"Foo": "bar"})
            req.add_unredirected_header("Spam", "eggs")
            http = MockHTTPClass()
            r = h.do_open(http, req)

            # result attributes
            r.read
            r.readline  # wrapped MockFile methods
            r.info
            r.geturl  # addinfourl methods
            r.code, r.msg == 200, "OK"  # added from MockHTTPClass.getreply()
            hdrs = r.info()
            hdrs.get
            hdrs.has_key  # r.info() gives dict from .getreply()
            self.assert_(r.geturl() == url)

            self.assert_(http.host == "example.com")
            self.assert_(http.level == 0)
            self.assert_(http.method == method)
            self.assert_(http.selector == "/")
            http.req_headers.sort()
            self.assert_(
                http.req_headers == [("Connection",
                                      "close"), ("Foo", "bar"), ("Spam",
                                                                 "eggs")])
            self.assert_(http.data == data)

        # check socket.error converted to URLError
        http.raise_on_endheaders = True
        self.assertRaises(urllib2.URLError, h.do_open, http, req)

        # check adding of standard headers
        o.addheaders = [("Spam", "eggs")]
        for data in "", None:  # POST, GET
            req = Request("http://example.com/", data)
            r = MockResponse(200, "OK", {}, "")
            newreq = h.do_request_(req)
            if data is None:  # GET
                self.assert_(
                    not req.unredirected_hdrs.has_key("Content-length"))
                self.assert_(not req.unredirected_hdrs.has_key("Content-type"))
            else:  # POST
                # No longer true, due to workarouhd for buggy httplib
                # in Python versions < 2.4:
                #self.assert_(req.unredirected_hdrs["Content-length"] == "0")
                self.assert_(req.unredirected_hdrs["Content-type"] ==
                             "application/x-www-form-urlencoded")
            # XXX the details of Host could be better tested
            self.assert_(req.unredirected_hdrs["Host"] == "example.com")
            self.assert_(req.unredirected_hdrs["Spam"] == "eggs")

            # don't clobber existing headers
            req.add_unredirected_header("Content-length", "foo")
            req.add_unredirected_header("Content-type", "bar")
            req.add_unredirected_header("Host", "baz")
            req.add_unredirected_header("Spam", "foo")
            newreq = h.do_request_(req)
            self.assert_(req.unredirected_hdrs["Content-length"] == "foo")
            self.assert_(req.unredirected_hdrs["Content-type"] == "bar")
            self.assert_(req.unredirected_hdrs["Host"] == "baz")
            self.assert_(req.unredirected_hdrs["Spam"] == "foo")

    def test_request_upgrade(self):
        new_req_class = hasattr(urllib2.Request, "has_header")

        h = HTTPRequestUpgradeProcessor()
        o = h.parent = MockOpener()

        # urllib2.Request gets upgraded, unless it's the new Request
        # class from 2.4
        req = urllib2.Request("http://example.com/")
        newreq = h.http_request(req)
        if new_req_class:
            self.assert_(newreq is req)
        else:
            self.assert_(newreq is not req)
        if new_req_class:
            self.assert_(newreq.__class__ is not Request)
        else:
            self.assert_(newreq.__class__ is Request)
        # ClientCookie._urllib2_support.Request doesn't get upgraded
        req = Request("http://example.com/")
        newreq = h.http_request(req)
        self.assert_(newreq is req)
        self.assert_(newreq.__class__ is Request)

    def test_referer(self):
        h = HTTPRefererProcessor()
        o = h.parent = MockOpener()

        # normal case
        url = "http://example.com/"
        req = Request(url)
        r = MockResponse(200, "OK", {}, "", url)
        newr = h.http_response(req, r)
        self.assert_(r is newr)
        self.assert_(h.referer == url)
        newreq = h.http_request(req)
        self.assert_(req is newreq)
        self.assert_(req.unredirected_hdrs["Referer"] == url)
        # don't clobber existing Referer
        ref = "http://set.by.user.com/"
        req.add_unredirected_header("Referer", ref)
        newreq = h.http_request(req)
        self.assert_(req is newreq)
        self.assert_(req.unredirected_hdrs["Referer"] == ref)

    def test_errors(self):
        h = HTTPErrorProcessor()
        o = h.parent = MockOpener()

        url = "http://example.com/"
        req = Request(url)
        # 200 OK is passed through
        r = MockResponse(200, "OK", {}, "", url)
        newr = h.http_response(req, r)
        self.assert_(r is newr)
        self.assert_(not hasattr(o, "proto"))  # o.error not called
        # anything else calls o.error (and MockOpener returns None, here)
        r = MockResponse(201, "Created", {}, "", url)
        self.assert_(h.http_response(req, r) is None)
        self.assert_(o.proto == "http")  # o.error called
        self.assert_(o.args == (req, r, 201, "Created", {}))

    def test_robots(self):
        # XXX useragent
        try:
            import robotparser
        except ImportError:
            return  # skip test
        else:
            from ClientCookie import HTTPRobotRulesProcessor
        rfpc = MockRobotFileParserClass()
        h = HTTPRobotRulesProcessor(rfpc)

        url = "http://example.com:80/foo/bar.html"
        req = Request(url)
        # first time: initialise and set up robots.txt parser before checking
        #  whether OK to fetch URL
        h.http_request(req)
        self.assert_(rfpc.calls == [
            "__call__",
            ("set_url", "http://example.com:80/robots.txt"),
            "read",
            ("can_fetch", "", url),
        ])
        # second time: just use existing parser
        rfpc.clear()
        req = Request(url)
        h.http_request(req)
        self.assert_(rfpc.calls == [
            ("can_fetch", "", url),
        ])
        # different URL on same server: same again
        rfpc.clear()
        url = "http://example.com:80/blah.html"
        req = Request(url)
        h.http_request(req)
        self.assert_(rfpc.calls == [
            ("can_fetch", "", url),
        ])
        # disallowed URL
        rfpc.clear()
        rfpc._can_fetch = False
        url = "http://example.com:80/rhubarb.html"
        req = Request(url)
        try:
            h.http_request(req)
        except urllib2.HTTPError, e:
            self.assert_(e.request == req)
            self.assert_(e.code == 403)
        # new host: reload robots.txt (even though the host and port are
        #  unchanged, we treat this as a new host because
        #  "example.com" != "example.com:80")
        rfpc.clear()
        rfpc._can_fetch = True
        url = "http://example.com/rhubarb.html"
        req = Request(url)
        h.http_request(req)
        self.assert_(rfpc.calls == [
            "__call__",
            ("set_url", "http://example.com/robots.txt"),
            "read",
            ("can_fetch", "", url),
        ])
        # https url -> should fetch robots.txt from https url too
        rfpc.clear()
        url = "https://example.org/rhubarb.html"
        req = Request(url)
        h.http_request(req)
        self.assert_(rfpc.calls == [
            "__call__",
            ("set_url", "https://example.org/robots.txt"),
            "read",
            ("can_fetch", "", url),
        ])
示例#21
0
        def test_file(self):
            import time, rfc822, socket
            h = urllib2.FileHandler()
            o = h.parent = MockOpener()

            #TESTFN = test_support.TESTFN
            TESTFN = "test.txt"
            urlpath = sanepathname2url(os.path.abspath(TESTFN))
            towrite = "hello, world\n"
            try:
                fqdn = socket.gethostbyname(socket.gethostname())
            except socket.gaierror:
                fqdn = "localhost"
            for url in [
                    "file://localhost%s" % urlpath,
                    "file://%s" % urlpath,
                    "file://%s%s" %
                (socket.gethostbyname('localhost'), urlpath),
                    "file://%s%s" % (fqdn, urlpath)
            ]:
                f = open(TESTFN, "wb")
                try:
                    try:
                        f.write(towrite)
                    finally:
                        f.close()

                    r = h.file_open(Request(url))
                    try:
                        data = r.read()
                        headers = r.info()
                        newurl = r.geturl()
                    finally:
                        r.close()
                    stats = os.stat(TESTFN)
                    modified = rfc822.formatdate(stats.st_mtime)
                finally:
                    os.remove(TESTFN)
                self.assertEqual(data, towrite)
                self.assertEqual(headers["Content-type"], "text/plain")
                self.assertEqual(headers["Content-length"], "13")
                self.assertEqual(headers["Last-modified"], modified)

            for url in [
                    "file://localhost:80%s" % urlpath,
                    # XXXX bug: these fail with socket.gaierror, should be URLError
                    ##             "file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
                    ##                                    os.getcwd(), TESTFN),
                    ##             "file://somerandomhost.ontheinternet.com%s/%s" %
                    ##             (os.getcwd(), TESTFN),
            ]:
                try:
                    f = open(TESTFN, "wb")
                    try:
                        f.write(towrite)
                    finally:
                        f.close()

                    self.assertRaises(urllib2.URLError, h.file_open,
                                      Request(url))
                finally:
                    os.remove(TESTFN)

            h = urllib2.FileHandler()
            o = h.parent = MockOpener()
            # XXXX why does // mean ftp (and /// mean not ftp!), and where
            #  is file: scheme specified?  I think this is really a bug, and
            #  what was intended was to distinguish between URLs like:
            # file:/blah.txt (a file)
            # file://localhost/blah.txt (a file)
            # file:///blah.txt (a file)
            # file://ftp.example.com/blah.txt (an ftp URL)
            for url, ftp in [
                ("file://ftp.example.com//foo.txt", True),
                ("file://ftp.example.com///foo.txt", False),
                    # XXXX bug: fails with OSError, should be URLError
                ("file://ftp.example.com/foo.txt", False),
            ]:
                req = Request(url)
                try:
                    h.file_open(req)
                # XXXX remove OSError when bug fixed
                except (urllib2.URLError, OSError):
                    self.assert_(not ftp)
                else:
                    self.assert_(o.req is req)
                    self.assertEqual(req.type, "ftp")