def base_request(self, method, container=None, name=None, prefix=None, headers={}, proxy=None, contents=None, full_listing=None): # Common request method url = self.url if self.token: headers['X-Auth-Token'] = self.token if container: url = '%s/%s' % (url.rstrip('/'), quote(container)) if name: url = '%s/%s' % (url.rstrip('/'), quote(name)) url += '?format=json' if prefix: url += '&prefix=%s' % prefix if proxy: proxy = urlparse.urlparse(proxy) proxy = urllib2.ProxyHandler({proxy.scheme: proxy.netloc}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) req = urllib2.Request(url, headers=headers, data=contents) req.get_method = lambda: method urllib2.urlopen(req) conn = urllib2.urlopen(req) body = conn.read() try: body_data = json.loads(body) except ValueError: body_data = None return [None, body_data]
def get_data(self, argv): # argv = {"urls" : [], "worker" : , } content = None error_code = None self.logger.debug("start fetch " + argv["url"]) try: url = argv["url"] try: with eventlet.Timeout(self.timeout, False): headers = { "User-Agent":"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1" } if self.proxy is None: req = urllib2.Request(url, headers = headers) res = urllib2.urlopen(req) content = res.read() else: proxy_handler = urllib2.ProxyHandler(self.proxy) opener = urllib2.build_opener(proxy_handler) header_list = [] for header in headers: header_list.append((header, headers[header])) opener.addheaders = header_list res = opener.open(url) content = res.read() except urllib2.HTTPError, e: raise Exception(e.code) except urllib2.URLError, e: raise Exception("URLError")
def setUp(self): FakeProxyHandler.digest_auth_handler.set_users( {self.USER: self.PASSWD}) FakeProxyHandler.digest_auth_handler.set_realm(self.REALM) self.server = LoopbackHttpServerThread(self.PORT, FakeProxyHandler) self.server.start() self.server.ready.wait() handler = urllib2.ProxyHandler({"http": self.PROXY_URL}) self._digest_auth_handler = urllib2.ProxyDigestAuthHandler() self.opener = urllib2.build_opener(handler, self._digest_auth_handler)
def test_proxy(self): o = OpenerDirector() ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128")) o.add_handler(ph) meth_spec = [[("http_open", "return response")]] handlers = add_ordered_mock_handlers(o, meth_spec) req = Request("http://acme.example.com/") self.assertEqual(req.get_host(), "acme.example.com") r = o.open(req) self.assertEqual(req.get_host(), "proxy.example.com:3128") self.assertEqual([(handlers[0], "http_open")], [tup[0:2] for tup in o.calls])
def request(self, host, handler, request_body, verbose): '''Send xml-rpc request using proxy''' # We get a traceback if we don't have this attribute: self.verbose = verbose url = 'http://' + host + handler request = urllib2.Request(url) request.add_data(request_body) # Note: 'Host' and 'Content-Length' are added automatically base64string = base64.encodestring( '%s:%s' % (self._username, self._password)).replace('\n', '') request.add_header("Authorization", "Basic %s" % base64string) request.add_header('User-Agent', self.user_agent) request.add_header('Content-Type', 'text/xml') proxy_handler = urllib2.ProxyHandler() opener = urllib2.build_opener(proxy_handler) fhandle = opener.open(request) return (self.parse_response(fhandle))
def test_proxy_basic_auth(self): opener = OpenerDirector() ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128")) opener.add_handler(ph) password_manager = MockPasswordManager() auth_handler = urllib2.ProxyBasicAuthHandler(password_manager) realm = "ACME Networks" http_handler = MockHTTPHandler( 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm) opener.add_handler(auth_handler) opener.add_handler(http_handler) self._test_basic_auth( opener, auth_handler, "Proxy-authorization", realm, http_handler, password_manager, "http://acme.example.com:3128/protected", "proxy.example.com:3128", )
def _install_opener(self): if has_valid_attr(self.settings, 'PROXY_HOST') and has_valid_attr( self.settings, 'PROXY_PORT'): proxy_info = { #proxy information 'user': getattr(self.settings, 'PROXY_USER', ''), 'pass': getattr(self.settings, 'PROXY_PASS', ''), 'host': getattr(self.settings, 'PROXY_HOST', ''), #localhost 'port': getattr(self.settings, 'PROXY_PORT', 80) } # build a new opener that uses a proxy requiring authorization proxy = urllib2.ProxyHandler({ "http": "http://%(user)s:%(pass)s@%(host)s:%(port)d" % proxy_info }) self.opener = urllib2.build_opener(proxy, self.cookie_handler) else: self.opener = urllib2.build_opener(self.cookie_handler)
def proxyTest(self, row): proxy = row[0] + ":" + row[1] if 'HTTPS' in row[3]: proxies = {"https": "https://" + proxy} else: proxies = {"http": "http://" + proxy} ip = row[0] port = row[1] theProxy = urllib2.ProxyHandler(proxies) opener = urllib2.build_opener(theProxy) urllib2.install_opener(opener) testResult = 'ok!' try: webcode = urllib2.urlopen("https://www.fliggy.com/", timeout=10).getcode() #logger.info("Proxy %s is ok" % proxy) except Exception, e: #logger.warn("Proxy %s is nolonger ok" % proxy) self.clean(ip=ip, port=port) testResult = 'nolonger ok!'
def handle404(self, reqorig, url, container, obj): """ Return a webob.Response which fetches the thumbnail from the thumb host and returns it. Note also that the thumb host might write it out to Swift so it won't 404 next time. """ # go to the thumb media store for unknown files reqorig.host = self.thumbhost # upload doesn't like our User-agent, otherwise we could call it # using urllib2.url() proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost}) redirect_handler = DumbRedirectHandler() opener = urllib2.build_opener(redirect_handler, proxy_handler) # Thumbor doesn't need (and doesn't like) the proxy thumbor_opener = urllib2.build_opener(redirect_handler) # Pass on certain headers from the caller squid to the scalers opener.addheaders = [] if reqorig.headers.get('User-Agent') is not None: opener.addheaders.append(('User-Agent', reqorig.headers.get('User-Agent'))) else: opener.addheaders.append(('User-Agent', self.user_agent)) for header_to_pass in ['X-Forwarded-For', 'X-Forwarded-Proto', 'Accept', 'Accept-Encoding', 'X-Original-URI']: if reqorig.headers.get(header_to_pass) is not None: opener.addheaders.append((header_to_pass, reqorig.headers.get(header_to_pass))) thumbor_opener.addheaders = opener.addheaders # At least in theory, we shouldn't be handing out links to originals # that we don't have (or in the case of thumbs, can't generate). # However, someone may have a formerly valid link to a file, so we # should do them the favor of giving them a 404. try: # break apach the url, url-encode it, and put it back together urlobj = list(urlparse.urlsplit(reqorig.url)) # encode the URL but don't encode %s and /s urlobj[2] = urllib2.quote(urlobj[2], '%/') encodedurl = urlparse.urlunsplit(urlobj) # Thumbor never needs URL mangling and it needs a different host if self.thumborhost: thumbor_reqorig = reqorig.copy() thumbor_reqorig.host = self.thumborhost thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url)) thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/') thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj) # if sitelang, we're supposed to mangle the URL so that # http://upload.wm.o/wikipedia/commons/thumb/a/a2/Foo_.jpg/330px-Foo_.jpg # changes to # http://commons.wp.o/w/thumb_handler.php/a/a2/Foo_.jpg/330px-Foo_.jpg if self.backend_url_format == 'sitelang': match = re.match( r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') # and here are all the legacy special cases, imported from thumb_handler.php if(proj == 'wikipedia'): if(lang in ['meta', 'commons', 'internal', 'grants']): proj = 'wikimedia' if(lang in ['mediawiki']): lang = 'www' proj = 'mediawiki' hostname = '%s.%s.%s' % (lang, proj, self.tld) if(proj == 'wikipedia' and lang == 'sources'): # yay special case hostname = 'wikisource.%s' % self.tld # ok, replace the URL with just the part starting with thumb/ # take off the first two parts of the path # (eg /wikipedia/commons/); make sure the string starts # with a / encodedurl = 'http://%s/w/thumb_handler.php/%s' % ( hostname, match.group('path')) # add in the X-Original-URI with the swift got (minus the hostname) opener.addheaders.append( ('X-Original-URI', list(urlparse.urlsplit(reqorig.url))[2])) else: # ASSERT this code should never be hit since only thumbs # should call the 404 handler self.logger.warn("non-thumb in 404 handler! encodedurl = %s" % encodedurl) resp = webob.exc.HTTPNotFound('Unexpected error') return resp else: # log the result of the match here to test and make sure it's # sane before enabling the config match = re.match( r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') self.logger.warn( "sitelang match has proj %s lang %s encodedurl %s" % ( proj, lang, encodedurl)) else: self.logger.warn("no sitelang match on encodedurl: %s" % encodedurl) # To turn thumbor off and have thumbnail traffic served by image scalers, # replace the line below with this one: # upcopy = opener.open(encodedurl) upcopy = thumbor_opener.open(thumbor_encodedurl) except urllib2.HTTPError, error: # copy the urllib2 HTTPError into a webob HTTPError class as-is class CopiedHTTPError(webob.exc.HTTPError): code = error.code title = error.msg def html_body(self, environ): return self.detail def __init__(self): super(CopiedHTTPError, self).__init__( detail="".join(error.readlines()), headers=error.hdrs.items()) return CopiedHTTPError()
def base_request(self, method, container=None, name=None, prefix=None, headers=None, proxy=None, contents=None, full_listing=None, logger=None, additional_info=None): # Common request method trans_start = time() url = self.url if headers is None: headers = {} if self.token: headers['X-Auth-Token'] = self.token if container: url = '%s/%s' % (url.rstrip('/'), quote(container)) if name: url = '%s/%s' % (url.rstrip('/'), quote(name)) else: url += '?format=json' if prefix: url += '&prefix=%s' % prefix if proxy: proxy = urlparse.urlparse(proxy) proxy = urllib2.ProxyHandler({proxy.scheme: proxy.netloc}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) req = urllib2.Request(url, headers=headers, data=contents) req.get_method = lambda: method urllib2.urlopen(req) conn = urllib2.urlopen(req) body = conn.read() try: body_data = json.loads(body) except ValueError: body_data = None trans_stop = time() if logger: sent_content_length = 0 for n, v in headers.items(): nl = n.lower() if nl == 'content-length': try: sent_content_length = int(v) break except ValueError: pass logger.debug("-> " + " ".join( quote(str(x) if x else "-", ":/") for x in (strftime('%Y-%m-%dT%H:%M:%S', gmtime(trans_stop)), method, url, conn.getcode(), sent_content_length, conn.info()['content-length'], trans_start, trans_stop, trans_stop - trans_start, additional_info))) return [None, body_data]
def handle404(self, reqorig, url, container, obj): """ Return a swob.Response which fetches the thumbnail from the thumb host and returns it. Note also that the thumb host might write it out to Swift so it won't 404 next time. """ # go to the thumb media store for unknown files reqorig.host = self.thumbhost # upload doesn't like our User-agent, otherwise we could call it # using urllib2.url() proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost}) redirect_handler = DumbRedirectHandler() opener = urllib2.build_opener(redirect_handler, proxy_handler) # Thumbor doesn't need (and doesn't like) the proxy thumbor_opener = urllib2.build_opener(redirect_handler) # Pass on certain headers from the caller squid to the scalers opener.addheaders = [] if reqorig.headers.get('User-Agent') is not None: opener.addheaders.append( ('User-Agent', reqorig.headers.get('User-Agent'))) else: opener.addheaders.append(('User-Agent', self.user_agent)) for header_to_pass in [ 'X-Forwarded-For', 'X-Forwarded-Proto', 'Accept', 'Accept-Encoding', 'X-Original-URI' ]: if reqorig.headers.get(header_to_pass) is not None: opener.addheaders.append( (header_to_pass, reqorig.headers.get(header_to_pass))) thumbor_opener.addheaders = opener.addheaders # At least in theory, we shouldn't be handing out links to originals # that we don't have (or in the case of thumbs, can't generate). # However, someone may have a formerly valid link to a file, so we # should do them the favor of giving them a 404. try: # break apach the url, url-encode it, and put it back together urlobj = list(urlparse.urlsplit(reqorig.url)) # encode the URL but don't encode %s and /s urlobj[2] = urllib2.quote(urlobj[2], '%/') encodedurl = urlparse.urlunsplit(urlobj) # Thumbor never needs URL mangling and it needs a different host if self.thumborhost: thumbor_reqorig = swob.Request(reqorig.environ.copy()) thumbor_reqorig.host = self.thumborhost thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url)) thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/') thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj) # if sitelang, we're supposed to mangle the URL so that # http://upload.wm.o/wikipedia/commons/thumb/a/a2/Foo_.jpg/330px-Foo_.jpg # changes to # http://commons.wp.o/w/thumb_handler.php/a/a2/Foo_.jpg/330px-Foo_.jpg if self.backend_url_format == 'sitelang': match = re.match( r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') # and here are all the legacy special cases, imported from thumb_handler.php if (proj == 'wikipedia'): if (lang in ['meta', 'commons', 'internal', 'grants']): proj = 'wikimedia' if (lang in ['mediawiki']): lang = 'www' proj = 'mediawiki' hostname = '%s.%s.%s' % (lang, proj, self.tld) if (proj == 'wikipedia' and lang == 'sources'): # yay special case hostname = 'wikisource.%s' % self.tld # ok, replace the URL with just the part starting with thumb/ # take off the first two parts of the path # (eg /wikipedia/commons/); make sure the string starts # with a / encodedurl = 'http://%s/w/thumb_handler.php/%s' % ( hostname, match.group('path')) # add in the X-Original-URI with the swift got (minus the hostname) opener.addheaders.append( ('X-Original-URI', list(urlparse.urlsplit(reqorig.url))[2])) else: # ASSERT this code should never be hit since only thumbs # should call the 404 handler self.logger.warn( "non-thumb in 404 handler! encodedurl = %s" % encodedurl) resp = swob.HTTPNotFound('Unexpected error') return resp else: # log the result of the match here to test and make sure it's # sane before enabling the config match = re.match( r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') self.logger.warn( "sitelang match has proj %s lang %s encodedurl %s" % (proj, lang, encodedurl)) else: self.logger.warn("no sitelang match on encodedurl: %s" % encodedurl) # To turn thumbor off and have thumbnail traffic served by image scalers, # replace the line below with this one: # upcopy = opener.open(encodedurl) upcopy = thumbor_opener.open(thumbor_encodedurl) except urllib2.HTTPError as error: # Wrap the urllib2 HTTPError into a swob HTTPException status = error.code if status not in swob.RESPONSE_REASONS: # Generic status description in case of unknown status reasons. status = "%s Error" % status return swob.HTTPException(status=status, body=error.msg, headers=error.hdrs.items()) except urllib2.URLError as error: msg = 'There was a problem while contacting the thumbnailing service: %s' % \ error.reason return swob.HTTPServiceUnavailable(msg) # get the Content-Type. uinfo = upcopy.info() c_t = uinfo.gettype() resp = swob.Response(app_iter=upcopy, content_type=c_t) headers_whitelist = [ 'Content-Length', 'Content-Disposition', 'Last-Modified', 'Accept-Ranges', 'XKey', 'Thumbor-Engine', 'Server', 'Nginx-Request-Date', 'Nginx-Response-Date', 'Thumbor-Processing-Time', 'Thumbor-Processing-Utime', 'Thumbor-Request-Id', 'Thumbor-Request-Date' ] # add in the headers if we've got them for header in headers_whitelist: if (uinfo.getheader(header) != ''): resp.headers[header] = uinfo.getheader(header) # also add CORS; see also our CORS middleware resp.headers['Access-Control-Allow-Origin'] = '*' return resp