def test_cookies(self): import urllib2 # this test page depends on cookies, and an http-equiv refresh #cj = CreateBSDDBCookieJar("/home/john/db.db") cj = CookieJar() handlers = [ HTTPCookieProcessor(cj), HTTPRefreshProcessor(max_time=None, honor_time=False), HTTPEquivProcessor(), HTTPRedirectHandler(), # needed for Refresh handling in 2.4.0 # HTTPHandler(True), # HTTPRedirectDebugProcessor(), # HTTPResponseDebugProcessor(), ] o = apply(build_opener, handlers) try: install_opener(o) try: r = urlopen(urljoin(self.uri, "/cgi-bin/cookietest.cgi")) except urllib2.URLError, e: #print e.read() raise data = r.read() #print data self.assert_( data.find("Your browser supports cookies!") >= 0) self.assert_(len(cj) == 1) # test response.seek() (added by HTTPEquivProcessor) r.seek(0) samedata = r.read() r.close() self.assert_(samedata == data)
def check_no_seek(opener): r = opener.open(urljoin(self.uri, "test_fixtures/cctest2.txt")) self.assert_(not hasattr(r, "seek")) try: opener.open(urljoin(self.uri, "nonexistent")) except mechanize.HTTPError, exc: self.assert_(not hasattr(exc, "seek"))
def test_robots(self): plain_opener = self.build_opener([mechanize.HTTPRobotRulesProcessor]) browser = self.make_browser() for opener in plain_opener, browser: opener.open(urljoin(self.uri, "robots")) self.assertRaises(mechanize.RobotExclusionError, opener.open, urljoin(self.uri, "norobots"))
def test_robots(self): plain_opener = mechanize.build_opener(mechanize.HTTPRobotRulesProcessor) browser = mechanize.Browser() for opener in plain_opener, browser: r = opener.open(urljoin(self.uri, "robots")) self.assertEqual(r.code, 200) self.assertRaises( mechanize.RobotExclusionError, opener.open, urljoin(self.uri, "norobots"))
def test_robots(self): plain_opener = mechanize.build_opener( mechanize.HTTPRobotRulesProcessor) browser = mechanize.Browser() for opener in plain_opener, browser: r = opener.open(urljoin(self.uri, "robots")) self.assertEqual(r.code, 200) self.assertRaises(mechanize.RobotExclusionError, opener.open, urljoin(self.uri, "norobots"))
def test_robots(self): plain_opener = self.build_opener( [mechanize.HTTPRobotRulesProcessor]) browser = self.make_browser() for opener in plain_opener, browser: opener.open(urljoin(self.uri, "robots")) self.assertRaises( mechanize.RobotExclusionError, opener.open, urljoin(self.uri, "norobots"))
def test_referer(self): br = self.make_browser() br.set_handle_refresh(True, honor_time=False) referer = urljoin(self.uri, "test_fixtures/referertest.html") info = urljoin(self.uri, "/cgi-bin/cookietest.cgi") r = br.open(info) self.assert_(referer not in r.get_data()) br.open(referer) r = br.follow_link(text="Here") self.assert_(referer in r.get_data())
def test_referer(self): br = mechanize.Browser() referer = urljoin(self.uri, "bits/referertest.html") info = urljoin(self.uri, "/cgi-bin/cookietest.cgi") r = br.open(info) self.assert_(referer not in r.get_data()) br.open(referer) r = br.follow_link(text="Here") self.assert_(referer in r.get_data())
def test_referer(self): br = self.make_browser() br.set_handle_refresh(True, honor_time=False) referer = urljoin(self.uri, "test_fixtures/referertest.html") info = urljoin(self.uri, "/dynamic") r = br.open(info) self.assertNotIn(referer.encode('ascii'), r.get_data()) br.open(referer) r = br.follow_link(text="Here") self.assertIn(referer.encode('ascii'), r.get_data())
def check(opener, excs_also): r = opener.open(urljoin(self.uri, "test_fixtures/cctest2.txt")) data = r.read() r.seek(0) self.assertEqual(data, r.read(), r.get_data()) try: opener.open(urljoin(self.uri, "nonexistent")) except mechanize.HTTPError, exc: data = exc.read() if excs_also: exc.seek(0) self.assertEqual(data, exc.read(), exc.get_data())
def test_cookies(self): # this test page depends on cookies, and an http-equiv refresh # cj = CreateBSDDBCookieJar("/home/john/db.db") cj = CookieJar() handlers = [ HTTPCookieProcessor(cj), HTTPRefreshProcessor(max_time=None, honor_time=False), HTTPEquivProcessor(), HTTPRedirectHandler(), # needed for Refresh handling in 2.4.0 # HTTPHandler(True), # HTTPRedirectDebugProcessor(), # HTTPResponseDebugProcessor(), ] opener = self.build_opener(handlers) r = opener.open(urljoin(self.uri, "/cgi-bin/cookietest.cgi")) data = r.read() self.assert_(data.find("Your browser supports cookies!") >= 0) self.assertEquals(len(cj), 2) # test response.seek() (added by HTTPEquivProcessor) r.seek(0) samedata = r.read() r.close() self.assertEquals(samedata, data)
def test_urlretrieve(self): url = urljoin(self.uri, "/mechanize/") test_filename = "python.html" def check_retrieve(opener, filename, headers): self.assertEqual(headers.get('Content-Type'), 'text/html') f = open(filename) data = f.read() f.close() opener.close() from urllib import urlopen r = urlopen(url) self.assertEqual(data, r.read()) r.close() opener = mechanize.build_opener() verif = CallbackVerifier(self) filename, headers = opener.retrieve(url, test_filename, verif.callback) try: self.assertEqual(filename, test_filename) check_retrieve(opener, filename, headers) self.assert_(os.path.isfile(filename)) finally: os.remove(filename) opener = mechanize.build_opener() verif = CallbackVerifier(self) filename, headers = opener.retrieve(url, reporthook=verif.callback) check_retrieve(opener, filename, headers) # closing the opener removed the temporary file self.failIf(os.path.isfile(filename))
def test_cookies(self): import urllib2 # this test page depends on cookies, and an http-equiv refresh #cj = CreateBSDDBCookieJar("/home/john/db.db") cj = CookieJar() handlers = [ HTTPCookieProcessor(cj), HTTPRefreshProcessor(max_time=None, honor_time=False), HTTPEquivProcessor(), HTTPRedirectHandler(), # needed for Refresh handling in 2.4.0 # HTTPHandler(True), # HTTPRedirectDebugProcessor(), # HTTPResponseDebugProcessor(), ] o = apply(build_opener, handlers) try: install_opener(o) try: r = urlopen(urljoin(self.uri, "/cgi-bin/cookietest.cgi")) except urllib2.URLError, e: #print e.read() raise data = r.read() #print data self.assert_(data.find("Your browser supports cookies!") >= 0) self.assert_(len(cj) == 1) # test response.seek() (added by HTTPEquivProcessor) r.seek(0) samedata = r.read() r.close() self.assert_(samedata == data)
def test_404(self): br = self.make_browser() self.assertRaises( mechanize.HTTPError, br.open, urljoin(self.uri, "/does-not-exist"), )
def test_reload_read_incomplete(self): browser = self.make_browser() r1 = browser.open(urljoin(self.uri, "test_fixtures/mechanize_reload_test.html")) # if we don't do anything and go straight to another page, most of the # last page's response won't be .read()... browser.open(urljoin(self.uri, "mechanize")) self.assert_(len(r1.get_data()) < 4097) # we only .read() a little bit # ...so if we then go back, .follow_link() for a link near the end (a # few kb in, past the point that always gets read in HTML files because # of HEAD parsing) will only work if it causes a .reload()... r3 = browser.back() browser.follow_link(text="near the end") # ... good, no LinkNotFoundError, so we did reload. # we have .read() the whole file self.assertEqual(len(r3._seek_wrapper__cache.getvalue()), 4202)
def http_error_302(self, req, fp, code, msg, headers): # Code from mechanize._urllib2_fork.HTTPRedirectHandler: if 'location' in headers: newurl = headers.getheaders('location')[0] elif 'uri' in headers: newurl = headers.getheaders('uri')[0] else: return newurl = _rfc3986.clean_url(newurl, "latin-1") newurl = _rfc3986.urljoin(req.get_full_url(), newurl) new = self.redirect_request(req, fp, code, msg, headers, newurl) if new is None: return if hasattr(req, 'redirect_dict'): visited = new.redirect_dict = req.redirect_dict if (visited.get(newurl, 0) >= self.max_repeats or len(visited) >= self.max_redirections): raise urllib.error.HTTPError(req.get_full_url(), code, self.inf_msg + msg, headers, fp) else: visited = new.redirect_dict = req.redirect_dict = {} visited[newurl] = visited.get(newurl, 0) + 1 fp.read() fp.close() # If the redirected URL doesn't match new_url = new.get_full_url() if not re.search('^http(?:s)?\:\/\/.*www\.linkedin\.com', new_url): return _response.make_response('', headers.items(), new_url, 200, 'OK') else: return self.parent.open(new)
def test_cookies(self): # this test page depends on cookies, and an http-equiv refresh #cj = CreateBSDDBCookieJar("/home/john/db.db") cj = CookieJar() handlers = [ HTTPCookieProcessor(cj), HTTPRefreshProcessor(max_time=None, honor_time=False), HTTPEquivProcessor(), HTTPRedirectHandler(), # needed for Refresh handling in 2.4.0 # HTTPHandler(True), # HTTPRedirectDebugProcessor(), # HTTPResponseDebugProcessor(), ] opener = self.build_opener(handlers) r = opener.open(urljoin(self.uri, "/cgi-bin/cookietest.cgi")) data = r.read() self.assert_(data.find("Your browser supports cookies!") >= 0) self.assertEquals(len(cj), 2) # test response.seek() (added by HTTPEquivProcessor) r.seek(0) samedata = r.read() r.close() self.assertEquals(samedata, data)
def test_seek_wrapper_class_name(self): opener = self.make_user_agent() opener.set_seekable_responses(True) try: opener.open(urljoin(self.uri, "nonexistent")) except mechanize.HTTPError, exc: self.assert_("HTTPError instance" in repr(exc))
def test_302_and_404(self): # the combination of 302 and 404 (/redirected is configured to redirect # to a non-existent URL /nonexistent) has caused problems in the past # due to accidental double-wrapping of the error response self.assertRaises( mechanize.HTTPError, self.browser.open, urljoin(self.uri, "/redirected"), )
def test_seekable_response_opener(self): opener = mechanize.OpenerFactory( mechanize.SeekableResponseOpener).build_opener() r = opener.open(urljoin(self.uri, "bits/cctest2.txt")) r.read() r.seek(0) self.assertEqual(r.read(), r.get_data(), "Hello ClientCookie functional test suite.\n")
def test_seek_wrapper_class_name(self): opener = self.make_user_agent() opener.set_seekable_responses(True) rexec = '' try: opener.open(urljoin(self.uri, "nonexistent")) except mechanize.HTTPError as exc: rexec = repr(exc) self.assertIn("HTTPError instance", rexec)
def test_redirect_with_timeout(self): timeout_log = self._monkey_patch_socket() timeout = 10. # 301 redirect due to missing final '/' req = mechanize.Request(urljoin(self.test_uri, "test_fixtures"), timeout=timeout) r = self.browser.open(req) self.assert_("GeneralFAQ.html" in r.read(2048)) timeout_log.verify(timeout)
def test_retrieve_to_named_file(self): url = urljoin(self.uri, "/mechanize/") test_filename = os.path.join(self.make_temp_dir(), "python.html") opener = self.build_opener() verif = CallbackVerifier(self) filename, headers = opener.retrieve(url, test_filename, verif.callback) self.assertEqual(filename, test_filename) self._check_retrieve(url, filename, headers) self.assert_(os.path.isfile(filename))
def _mech_open(self, url, data=None, update_history=True, visit=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): try: url.get_full_url except AttributeError: # string URL -- convert to absolute URL if required scheme, authority = _rfc3986.urlsplit(url)[:2] if scheme is None: # relative URL if self._response is None: raise BrowserStateError("can't fetch relative reference: " "not viewing any document") url = _rfc3986.urljoin(self._response.geturl(), url) request = self._request(url, data, visit, timeout) visit = request.visit if visit is None: visit = True if visit: self._visit_request(request, update_history) success = True try: response = UserAgentBase.open(self, request, data) except urllib2.HTTPError as error: success = False if error.fp is None: # not a response raise response = error ## except (IOError, socket.error, OSError), error: ## # Yes, urllib2 really does raise all these :-(( ## # See test_urllib2.py for examples of socket.gaierror and OSError, ## # plus note that FTPHandler raises IOError. ## # XXX I don't seem to have an example of exactly socket.error being ## # raised, only socket.gaierror... ## # I don't want to start fixing these here, though, since this is a ## # subclass of OpenerDirector, and it would break old code. Even in ## # Python core, a fix would need some backwards-compat. hack to be ## # acceptable. ## raise if visit: self._set_response(response, False) response = copy.copy(self._response) elif response is not None: response = _response.upgrade_response(response) if not success: raise response return response
def test_open_novisit(self): def test_state(br): self.assert_(br.request is None) self.assert_(br.response() is None) self.assertRaises(mechanize.BrowserStateError, br.back) test_state(self.browser) # note this involves a redirect, which should itself be non-visiting r = self.browser.open_novisit(urljoin(self.uri, "bits")) test_state(self.browser) self.assert_("GeneralFAQ.html" in r.read(2048))
def test_urlretrieve(self): timeout_log = self._monkey_patch_socket() timeout = 10. url = urljoin(self.uri, "/mechanize/") verif = CallbackVerifier(self) filename, headers = mechanize.urlretrieve(url, reporthook=verif.callback, timeout=timeout) timeout_log.stop() self._check_retrieve(url, filename, headers) timeout_log.verify(timeout)
def test_redirect(self): # 301 redirect due to missing final '/' codes = [] class ObservingHandler(mechanize.BaseHandler): def http_response(self, request, response): codes.append(response.code) return response self.browser.add_handler(ObservingHandler()) r = self.browser.open(urljoin(self.uri, "redirected_good")) self.assertEqual(r.code, 200) self.assertIn(302, codes) self.assert_("GeneralFAQ.html" in r.read(2048))
def test_retrieve(self): # not passing an explicit filename downloads to a temporary file # using a Request object instead of a URL works url = urljoin(self.uri, "/mechanize/") opener = self.build_opener() verif = CallbackVerifier(self) request = mechanize.Request(url) filename, headers = opener.retrieve(request, reporthook=verif.callback) self.assertEquals(request.visit, False) self._check_retrieve(url, filename, headers) opener.close() # closing the opener removed the temporary file self.failIf(os.path.isfile(filename))
def test_redirect(self): # 301 redirect due to missing final '/' codes = [] class ObservingHandler(mechanize.BaseHandler): def http_response(self, request, response): codes.append(response.code) return response self.browser.add_handler(ObservingHandler()) r = self.browser.open(urljoin(self.uri, "test_fixtures")) self.assertEqual(r.code, 200) self.assertTrue(301 in codes) self.assert_("GeneralFAQ.html" in r.read(2048))
def setUp(self): mechanize._testcase.TestCase.setUp(self) self.test_uri = urljoin(self.uri, "test_fixtures") self.server = self.get_cached_fixture("server") if self.no_proxies: old_opener_m = mechanize._opener._opener old_opener_u = urllib2._opener mechanize.install_opener(mechanize.build_opener( mechanize.ProxyHandler(proxies={}))) urllib2.install_opener(urllib2.build_opener( urllib2.ProxyHandler(proxies={}))) def revert_install(): mechanize.install_opener(old_opener_m) urllib2.install_opener(old_opener_u) self.add_teardown(revert_install)
def setUp(self): mechanize._testcase.TestCase.setUp(self) self.test_uri = urljoin(self.uri, "test_fixtures") self.server = self.get_cached_fixture("server") if self.no_proxies: old_opener_m = mechanize._opener._opener mechanize.install_opener( mechanize.build_opener(mechanize.ProxyHandler(proxies={}))) install_opener(build_opener(ProxyHandler(proxies={}))) def revert_install(): mechanize.install_opener(old_opener_m) install_opener(None) self.add_teardown(revert_install)
def test_open_novisit(self): def test_state(br): self.assert_(br.request is None) self.assert_(br.response() is None) self.assertRaises(mechanize.BrowserStateError, br.back) test_state(self.browser) uri = urljoin(self.uri, "test_fixtures") # note this involves a redirect, which should itself be non-visiting r = self.browser.open_novisit(uri) test_state(self.browser) self.assert_("GeneralFAQ.html" in r.read(2048)) # Request argument instead of URL r = self.browser.open_novisit(mechanize.Request(uri)) test_state(self.browser) self.assert_("GeneralFAQ.html" in r.read(2048))
def test_open_novisit(self): def test_state(br): self.assertTrue(br.request is None) self.assertTrue(br.response() is None) self.assertRaises(mechanize.BrowserStateError, br.back) test_state(self.browser) uri = urljoin(self.uri, "test_fixtures") # note this involves a redirect, which should itself be non-visiting r = self.browser.open_novisit(uri) test_state(self.browser) self.assertTrue("GeneralFAQ.html" in r.read(2048)) # Request argument instead of URL r = self.browser.open_novisit(mechanize.Request(uri)) test_state(self.browser) self.assertTrue("GeneralFAQ.html" in r.read(2048))
def _mech_open(self, url, data=None, update_history=True, visit=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT, headers=None): try: url.get_full_url if ' ' in url._Request__original: url._Request__original = url._Request__original.replace(' ', '%20') except AttributeError: # string URL -- convert to absolute URL if required scheme, _authority = _rfc3986.urlsplit(url)[:2] if scheme is None: # relative URL if self._response is None: raise BrowserStateError( "can't fetch relative reference: " "not viewing any document") url = _rfc3986.urljoin(self._response.geturl(), url) if ' ' in url: url = url.replace(' ', '%20') request = self._request(url, data, visit, timeout) request.add_header("User-agent", "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0)") if headers: for headerName, headerValue in headers.items(): request.add_header(headerName, headerValue) visit = request.visit if visit is None: visit = True if visit: self._visit_request(request, update_history) success = True try: response = UserAgentBase.open(self, request, data) except urllib2.HTTPError, error: success = False if error.fp is None: # not a response raise response = error
def test_redirect(self): # 301 redirect due to missing final '/' class ObservingHandler(mechanize.BaseHandler): def __init__(self): self.codes = [] def http_response(self, request, response): self.codes.append(response.code) return response self.browser.add_handler(ObservingHandler()) for br in self.browser, copy.copy(self.browser): r = br.open(urljoin(self.uri, "redirected_good")) self.assertEqual(r.code, 200) self.assert_("GeneralFAQ.html" in r.read(2048)) self.assertEqual([[c for c in h.codes if c == 302] for h in br.handlers_by_class(ObservingHandler)], [[302]])
def _test_cookiejar(self, get_cookiejar, commit): cookiejar = get_cookiejar() br = mechanize.Browser() br.set_cookiejar(cookiejar) br.set_handle_refresh(False) url = urljoin(self.uri, "/cgi-bin/cookietest.cgi") # no cookie was set on the first request html = br.open(url).read() self.assertEquals(html.find("Your browser supports cookies!"), -1) self.assertEquals(len(cookiejar), 1) # ... but now we have the cookie html = br.open(url).read() self.assert_("Your browser supports cookies!" in html) commit(cookiejar) # should still have the cookie when we load afresh cookiejar = get_cookiejar() br.set_cookiejar(cookiejar) html = br.open(url).read() self.assert_("Your browser supports cookies!" in html)
def hidden_test_close_pickle_load(self): print ("Test test_close_pickle_load is expected to fail unless Python " "standard library patch http://python.org/sf/1144636 has been " "applied") import pickle b = self.make_browser() r = b.open(urljoin(self.uri, "test_fixtures/cctest2.txt")) r.read() r.close() r.seek(0) self.assertEqual(r.read(), "Hello ClientCookie functional test suite.\n") HIGHEST_PROTOCOL = -1 p = pickle.dumps(b, HIGHEST_PROTOCOL) b = pickle.loads(p) r = b.response() r.seek(0) self.assertEqual(r.read(), "Hello ClientCookie functional test suite.\n")
def _test_cookiejar(self, make_cookiejar, commit): cookiejar = make_cookiejar() br = self.make_browser() # br.set_debug_http(True) br.set_cookiejar(cookiejar) br.set_handle_refresh(False) url = urljoin(self.uri, "/cgi-bin/cookietest.cgi") # no cookie was set on the first request html = br.open(url).read() self.assertEquals(html.find("Your browser supports cookies!"), -1) self.assertEquals(len(cookiejar), 2) # ... but now we have the cookie html = br.open(url).read() self.assertIn("Your browser supports cookies!", html) self.assertIn("Received session cookie", html) commit(cookiejar) # should still have the cookie when we load afresh cookiejar = make_cookiejar() br.set_cookiejar(cookiejar) html = br.open(url).read() self.assertIn("Your browser supports cookies!", html) self.assertNotIn("Received session cookie", html)
def _test_cookiejar(self, make_cookiejar, commit): cookiejar = make_cookiejar() br = self.make_browser() #br.set_debug_http(True) br.set_cookiejar(cookiejar) br.set_handle_refresh(False) url = urljoin(self.uri, "/cgi-bin/cookietest.cgi") # no cookie was set on the first request html = br.open(url).read() self.assertEquals(html.find("Your browser supports cookies!"), -1) self.assertEquals(len(cookiejar), 2) # ... but now we have the cookie html = br.open(url).read() self.assertIn("Your browser supports cookies!", html) self.assertIn("Received session cookie", html) commit(cookiejar) # should still have the cookie when we load afresh cookiejar = make_cookiejar() br.set_cookiejar(cookiejar) html = br.open(url).read() self.assertIn("Your browser supports cookies!", html) self.assertNotIn("Received session cookie", html)
def refresh_request(seconds): uri = urljoin(self.uri, "/cgi-bin/cookietest.cgi") val = urllib.quote_plus('%d; url="%s"' % (seconds, self.uri)) return uri + ("?refresh=%s" % val)
def operation(): retrieve_fn(urljoin("file://", filename), os.path.join(temp_dir, "retrieved"))
def test_redirect(self): # 301 redirect due to missing final '/' r = self.browser.open(urljoin(self.uri, "bits")) self.assertEqual(r.code, 200) self.assert_("GeneralFAQ.html" in r.read(2048))
def test_basic_auth(self): uri = urljoin(self.uri, "basic_auth") self.assertRaises(mechanize.URLError, self.browser.open, uri) self.browser.add_password(uri, "john", "john") self.browser.open(uri) self.assertEqual(self.browser.title(), 'Basic Auth Protected Area')
def test_digest_auth(self): uri = urljoin(self.uri, "digest_auth") self.assertRaises(mechanize.URLError, self.browser.open, uri) self.browser.add_password(uri, "digestuser", "digestuser") self.browser.open(uri) self.assertEqual(self.browser.title(), 'Digest Auth Protected Area')