def spider(lon,lat,num): http_proxy = {'http': '116.28.109.64:808'} url = "https://mwx.mobike.com/mobike-api/rent/nearbyBikesInfo.do" payload = "?latitude=%s&longitude=%s&errMsg=getMapCenterLocation" % (lat, lon) url = url + payload headers = { 'charset': "utf-8", 'platform': "4", "referer": "https://servicewechat.com/wx40f112341ae33edb/1/", 'content-type': "application/x-www-form-urlencoded", 'user-agent': "MicroMessenger/6.5.4.1000 NetType/WIFI Language/zh_CN", 'host': "mwx.mobike.com", 'connection': "Keep-Alive", 'accept-encoding': "gzip", 'cache-control': "no-cache" } proxy = [ '222.128.13.94:8081', '222.128.13.94:8081', '121.232.147.200:9000', '218.89.97.127:9000', '117.90.252.123:9000', '121.232.147.206:9000', '121.232.146.12:9000', '222.208.66.14:9000', '121.232.147.206:9000', '121.232.145.93:9000', '118.178.227.171:80', '121.31.103.138:8123', '110.73.34.186:8123', '121.31.152.122:8123', '116.28.109.64:808', '115.213.1.83:8998', '60.178.137.158:8081', '117.90.3.185:9000', '117.90.1.204:9000', '110.73.48.146:8123', '111.13.7.42:83', '118.178.227.171:80', '182.42.46.43:808', '111.74.56.249:9000', '111.74.56.249:9000', '122.96.59.105:82', '117.90.2.208:9000', '121.232.146.149:9000', '60.178.13.75:8081', '182.129.243.22:9000', '110.73.33.147:8123', '123.169.84.88:808', '120.27.49.85:8090', '122.193.14.114:82', '121.232.145.104:9000', '171.38.36.78:8123', '163.125.222.240:8118', '114.230.31.225:3128', '171.92.4.67:9000', '121.232.147.247:9000', '121.232.146.148:9000', '121.232.144.201:9000', '121.232.145.251:9000', '110.73.55.150:8123', '112.33.7.9:8081', '182.141.46.93:9000', '210.76.163.216:8118', '221.230.7.59:9000', '121.232.146.139:9000', '121.232.145.43:9000', '118.117.139.117:9000', '121.232.146.205:9000', '121.232.147.112:9000', '202.141.161.30:8118', '117.90.0.205:9000', '121.232.144.156:9000', '122.96.59.105:82', '122.96.59.106:80', '222.208.66.14:9000', '111.12.96.188:80', '121.31.152.178:8123', '59.66.202.98:1080', '110.73.4.93:8123', '110.73.40.102:8123', '118.117.136.82:9000', '121.232.145.5:9000', '110.73.0.233:8123', '111.74.56.247:9000', '118.117.137.188:9000', '110.73.3.197:8123', '117.90.5.166:9000', '113.140.25.4:81', '115.230.60.199:808', '123.169.91.85:808', '117.90.7.181:9000', '122.193.14.114:82', '121.232.144.234:9000', '171.215.237.225:9000', '121.232.146.50:9000', '121.31.101.137:8123', '182.129.242.219:9000', '182.90.106.23:8123', '121.232.144.209:9000', '117.90.7.142:9000', '121.232.144.158:9000', '171.215.226.246:9000', '121.232.146.181:9000', '117.90.3.61:9000', '121.232.147.222:9000', '121.232.145.29:9000', '111.74.56.244:9000', '111.13.7.42:81', '117.90.6.233:9000', '163.125.251.253:8118', '112.33.7.9:8081', '121.232.144.113:9000', '122.96.59.106:843', '110.72.16.89:8123', '183.154.215.0:9000', '171.92.32.87:9000', '182.129.240.160:9000', '121.232.194.184:9000', '125.72.125.14:808', '120.77.255.133:8088', '121.232.144.67:9000', '121.232.146.15:9000', '121.31.101.188:8123', '61.232.121.166:8123', '117.90.4.70:9000', '182.129.249.38:9000', '121.232.147.148:9000', '218.104.148.157:8080', '58.217.255.184:1080', '117.90.4.150:9000', '121.232.144.41:9000', '121.232.147.165:9000', '117.90.0.211:9000', '117.90.7.246:9000', '121.232.147.195:9000', '202.141.161.30:8118', '117.90.5.200:9000', '121.232.146.127:9000', '121.232.145.128:9000', '110.73.28.60:8123', '60.178.169.117:8081', '183.240.87.229:8080', '111.155.116.229:8123', '121.232.146.217:9000', '121.232.144.190:9000', '121.232.148.17:9000', '117.90.1.254:9000', '118.117.137.217:9000', '163.125.251.49:8118', '120.77.206.98:8118', '121.232.145.52:9000', '121.232.144.105:9000', '118.26.183.215:8080', '121.232.147.200:9000', '171.92.52.2:9000', '121.232.144.89:9000', '117.90.5.133:9000', '60.178.169.106:8081', '220.191.14.233:808', '118.117.136.55:9000', '117.90.2.28:9000', '121.232.144.236:9000', '121.232.144.237:9000', '117.90.0.98:9000', '117.90.2.65:9000', '121.232.147.56:9000', '182.141.42.29:9000', '121.232.144.141:9000', '121.232.145.28:9000', '210.44.213.63:1080', '60.178.170.66:8081', '121.232.147.96:9000', '171.37.170.153:8123', '139.196.121.161:80', '118.117.138.101:9000', '121.232.147.203:9000', '111.155.116.207:8123', '218.89.97.88:9000', '110.73.51.213:8123', '121.232.146.236:9000', '121.232.147.241:9000', '118.117.137.2:9000', '121.31.148.28:8123', '171.215.227.75:9000', '121.232.146.86:9000', '111.13.7.42:80', '110.73.31.223:8123', '121.232.146.161:9000', '117.90.4.61:9000', '111.155.116.215:8123', '123.169.90.147:808', '117.90.5.69:9000', '121.232.146.192:9000', '121.232.147.246:9000', '163.125.251.46:8118', '121.232.147.145:9000', '121.232.144.138:9000', '60.178.1.74:8081', '180.110.17.213:808', '120.83.99.212:808', '111.1.52.45:80', '61.157.198.66:8080', '121.232.146.75:9000', '121.232.147.164:9000', '121.232.147.56:9000', '114.99.21.133:808', '121.232.148.31:9000', '121.232.146.71:9000', '121.232.147.113:9000', '101.86.86.101:8118', '182.129.241.132:9000', '60.178.10.96:8081', '121.40.164.232:8118', '183.240.87.229:8080', '121.232.147.78:9000', '122.96.59.104:80', '121.232.146.55:9000', '114.238.42.105:808', '121.232.147.199:9000', '125.92.33.20:3128', '60.178.3.139:8081', '121.31.155.232:8123', '121.232.147.156:9000', '121.232.147.55:9000', '121.15.170.171:8080', '121.232.144.249:9000', '118.117.137.2:9000', '115.46.76.205:8123', '171.39.4.208:8123', '221.239.81.83:8118', '121.232.145.190:9000', '115.213.203.188:808', '221.192.134.92:8081', '125.67.74.248:9000', '182.129.240.154:9000', '121.232.147.197:9000', '171.92.53.59:9000', '222.187.20.51:8998', '183.207.176.252:1080', '110.72.30.1:8123', '121.232.145.115:9000', '122.96.59.106:81', '121.232.144.37:9000', '223.86.37.135:8998', '122.96.59.106:843', '210.76.163.216:8118', '210.44.213.63:1080', '121.232.148.43:9000', '59.49.129.60:8998', '49.86.62.100:808', '123.206.225.120:8888', '60.178.128.19:8081', '218.104.148.157:8080', '220.176.93.100:9000', '182.129.242.241:9000', '121.232.148.61:9000', '122.112.230.18:8080', '121.232.147.250:9000', '59.62.6.93:9000', '111.155.116.221:8123', '121.232.147.28:9000', '171.215.241.87:9000', '118.117.138.101:9000', '60.178.3.2:8081', '114.115.218.71:80', '114.115.218.143:8118', '121.232.144.82:9000', '183.147.22.6:9000', '202.121.96.33:8086', '121.232.147.205:9000', '125.117.132.239:9000', '125.67.74.248:9000', '121.232.145.151:9000', '121.232.145.2:9000', '139.196.121.161:80', '180.119.65.217:3128', '222.208.83.160:9000', '114.99.21.133:808', '121.13.55.162:8118', '180.119.65.25:3128', '121.31.147.77:8123', '122.112.230.18:8080', '121.232.147.34:9000', '123.169.90.229:808', '180.173.109.149:8118', '121.232.147.130:9000', ] temp = random.randint(0, 269) http_proxy['http'] = proxy[temp] print (proxy[temp]) proxy = urllib2.ProxyHandler(http_proxy) opener = urllib2.build_opener(proxy) request = urllib2.Request(url, headers=headers) try: response = opener.open(request, timeout=1) info = response.read() print info info = json.loads(info) bikes = info['object'] # for bike in bikes: # print bike['distY'] except Exception as ex: num = num-1 if num < 0: print ("die total") else: print ("die once") with open('./baidu.txt','a') as fp: fp.write(str(http_proxy['http']+'\n')) spider(lon,lat,num) pass print ('ok')
def __init__(self, base_url): self._base_url = base_url.rstrip('/') self._proxy_handler = urllib2.ProxyHandler({})
def _send_request(self, method="GET", path=None, args=None, data=None, auth=False): """ Send a request to the Wrike API @param method: the HTTP method @param path: the path relative to the repository URL @param data: the data to send @param auth: this is an authorization request """ repository = self.repository # Request URL api = "oauth2/token" if auth else "api/v3" url = "/".join((repository.url.rstrip("/"), api)) if path: url = "/".join((url, path.lstrip("/"))) if args: url = "?".join((url, urllib.urlencode(args))) # Create the request req = urllib2.Request(url=url) handlers = [] if not auth: # Install access token header access_token = self.access_token if not access_token: message = "Authorization failed: no access token" current.log.error(message) return None, message req.add_header("Authorization", "%s %s" % (self.token_type, access_token)) # JSONify request data request_data = json.dumps(data) if data else "" if request_data: req.add_header("Content-Type", "application/json") else: # URL-encode request data for auth request_data = urllib.urlencode(data) if data else "" # Indicate that we expect JSON response req.add_header("Accept", "application/json") # Proxy handling config = repository.config proxy = repository.proxy or config.proxy or None if proxy: current.log.debug("using proxy=%s" % proxy) proxy_handler = urllib2.ProxyHandler({"https": proxy}) handlers.append(proxy_handler) # Install all handlers if handlers: opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) # Execute the request response = None message = None try: if method == "POST": f = urllib2.urlopen(req, data=request_data) else: f = urllib2.urlopen(req) except urllib2.HTTPError, e: message = "HTTP %s: %s" % (e.code, e.reason)
def _post_multipart_stnd(self, host, selector, fields, files, ssl=False, port=80, proxy_url=None, proxy_port=None): """ performs a multi-post to AGOL, Portal, or AGS using standard library Inputs: host - string - root url (no http:// or https://) ex: www.arcgis.com selector - string - everything after the host ex: /PWJUSsdoJDp7SgLj/arcgis/rest/services/GridIndexFeatures/FeatureServer/0/1/addAttachment fields - dictionary - additional parameters like token and format information files - tuple array- tuple with the file name type, filename, full path ssl - option to use SSL proxy_url - string - url to proxy server proxy_port - interger - port value if not on port 80 Output: JSON response as dictionary Useage: import urlparse url = "http://sampleserver3.arcgisonline.com/ArcGIS/rest/services/SanFrancisco/311Incidents/FeatureServer/0/10261291" parsed_url = urlparse.urlparse(url) params = {"f":"json"} print _post_multipart(host=parsed_url.hostname, selector=parsed_url.path, files=files, fields=params ) """ content_type, body = self._encode_multipart_formdata(fields, files) if ssl: url = "https://%s%s" % (host, selector) else: url = "http://%s%s" % (host, selector) if proxy_url is not None: if proxy_port is None: proxy_port = 80 proxies = { "http": "http://%s:%s" % (proxy_url, proxy_port), "https": "https://%s:%s" % (proxy_url, proxy_port) } proxy_support = urllib2.ProxyHandler(proxies) opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler(debuglevel=0)) urllib2.install_opener(opener) request = urllib2.Request(url) request.add_header('User-agent', 'ArcREST') request.add_header('Content-type', content_type) request.add_header('Content-length', len(body)) request.add_data(body) result = urllib2.urlopen(request).read() if result == "": return "" jres = json.loads(result) if 'error' in jres: if jres['error']['message'] == 'Request not made over ssl': if url.startswith('http://'): url = url.replace('http://', 'https://') return self._post_multipart(host, selector, fields, files, ssl=True, port=port, proxy_url=proxy_url, proxy_port=proxy_port) return self._unicode_convert(jres)
print "------------------------------------------------------------------" client = KunClient(host=HOST, port=PORT) # make instance #client.conn() # conn Method return 1, if success client.header.parse(REQMSG) # make initial Head 0.9 client.header.add("HTTP_PHONE_NUMBER", MDN) client.header.add("HTTP_MDN_INFO", MDN) client.header.add("Host", host) ######### cj = cookielib.CookieJar() cookie_handler = urllib2.HTTPCookieProcessor(cj) #proxy_support = urllib2.ProxyHandler({'http':'http://localhost:8080'}) proxy_support = urllib2.ProxyHandler({'http':'http://127.0.0.1:8080'}) http_handler = urllib2.HTTPHandler(debuglevel=1000) # DEBUG SHOW OPTION ON handlers = [ http_handler, proxy_support, cookie_handler ] #opener = urllib2.build_opener(handler, proxy_support, urllib2.HTTPCookieProcessor(cj)) opener = urllib2.build_opener(*handlers) #opener = urllib2.build_opener(http_handler, urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) theurl = 'http://localhost:8080' #TEST PAGES theurl = 'http://m.knpu.org' theurl = 'http://m.naver.com' #naver require exact phone number #theurl = 'http://sting.xozen.com' #theurl = 'http://kbank.altou.com' theurl = 'http://www.magicn.com/'
""" from datetime import datetime import re from StringIO import StringIO import sys import urllib2 import zipfile RULE_TEMPLATE = r'alert udp $HOME_NET any -> $DNS_SERVERS 53 (msg:"DNS Query for a dynamic domain <domain_here>"; content:"|01 00 00 01 00 00 00 00 00 00|"; depth:10; offset:2; content:"<content_here>"; fast_pattern; nocase; distance:0; classtype:bad-unknown; sid:<sid_here>; rev:1;)' STARTING_SID = 1000000 HTTP_PROXY = 'http://127.0.0.1:3128' FILE_URL = 'http://www.malware-domains.com/files/dynamic_dns.zip' FILE = 'dynamic_dns.txt' RULES_FILE = 'local.rules' proxy_support = urllib2.ProxyHandler({'http': HTTP_PROXY}) opener = urllib2.build_opener(proxy_support) urllib2.install_opener(opener) try: url = urllib2.urlopen(FILE_URL) zf = zipfile.ZipFile(StringIO(url.read())) except: print "Couldn't connect" sys.exit(1) sid = STARTING_SID timestamp = datetime.utcnow() with open(RULES_FILE, 'w') as rules_fh: rules_fh.write('#autogenerated on ' + timestamp.isoformat('T') + 'Z' +
def request_file_content(self, url, timeout=20): # default value response_content = "" # if we are using a proxy server - read its configurations if self.config.USE_PROXY == "YES": proxy_dict = { "http": self.config.PROXY_SERVER, "https": self.config.PROXY_SERVER, "ftp": self.config.PROXY_SERVER } proxy = urllib2.ProxyHandler(proxy_dict) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) # build the request request = urllib2.Request(url) base64string = base64.encodestring( "%s:%s" % (self.config.API_ID, self.config.API_KEY)).replace( '\n', '') request.add_header("Authorization", "Basic %s" % base64string) try: # open the connection to the URL if self.config.USE_CUSTOM_CA_FILE == "YES": response = urllib2.urlopen(request, timeout=timeout, cafile=self.config.CUSTOM_CA_FILE) else: response = urllib2.urlopen(request, timeout=timeout) # if we got a 200 OK response if response.code == 200: self.logger.info("Successfully downloaded file from URL %s" % url) # read the response content response_content = response.read() # if we got another response code else: self.logger.error( "Failed to download file %s. Response code is %s. Info is %s", url, response.code, response.info()) # close the response response.close() # return the content string return response_content # if we got a 401 or 404 responses except urllib2.HTTPError, e: if e.code == 404: self.logger.error( "Could not find file %s. Response code is %s", url, e.code) return response_content elif e.code == 401: self.logger.error( "Authorization error - Failed to download file %s. Response code is %s", url, e.code) raise Exception("Authorization error") elif e.code == 429: self.logger.error( "Rate limit exceeded - Failed to download file %s. Response code is %s", url, e.code) raise Exception("Rate limit error") else: self.logger.error( "An error has occur while making a open connection to %s. %s", url, str(e.code)) raise Exception("Connection error")
def proxy(self): proxy = urllib2.ProxyHandler({'http': '127.0.0.1:8888'}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener)
class SougouSpider: # 睡眠时长 __amount = 0 __start_amount = 0 __counter = 0 def __init__(self, totalPageNum=1, image_path='/data2/xijun.gong/jd_image_data'): """ :param totalPageNum: 下载页数 :param image_path: 图片放置目录 """ self.download = download self.proxy_new = proxy.Proxy() self.image_path = image_path self.__amount = totalPageNum * maxImageNum + self.__start_amount self.headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36', 'Qunar-App': 'SQxTLo6t4k5HSnsykL7nuz9jL/2FvrM9QfppVEbOhQYxIS5tR6I/w3GIq9wpZLbur3Hw7W//Ec+nFnorxB7gTlSSND1Xrbaj3zmRkWAZUaiRm+djpINDhvsYOXlFZHlrQ0BPZ+uZRIn5xnSAfPTpW1xJehqHDr1769Xs0Ly8rZM=' } def __getImg(self, sub_dest_dir, word='dame', extra_word='provice'): """ :param word:搜索的关键字 """ _keyword = extra_word.encode('utf-8') + ' ' + word.encode('utf-8') # print _keyword; search = urllib.pathname2url(str(_keyword)) # pn int 图片数 pn = self.__start_amount root_path = os.path.join(self.image_path, sub_dest_dir) try: if not os.path.exists(root_path): os.makedirs(root_path) except Exception, e: print e # 判断名字是否重复,获取图片长度 self.__counter = len(os.listdir(root_path)) + 1 while pn <= self.__amount and self.__counter <= self.__amount: url = 'http://pic.sogou.com/pics?query=' + search + '&mode=1&start=' + str( pn) + '&reqType=ajax&reqFrom=result&tn=1' # page = None; try: #使用代理 proxy = urllib2.ProxyHandler(self.proxy_new.getProxyByTxt()) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) _text = urllib2.Request(url, headers=self.headers) data = urllib2.urlopen(_text, timeout=50).read() # req = urllib2.Request(url=url, headers=self.headers) # page = urllib2.urlopen(req) # data = page.read(); # 解析json # print data self.__saveImage(root_path, self.parseJson(data, 'thumbUrl'), word) pn += maxImageNum except Exception as e: print '下载图片异常:', e, url finally: self.__counter = len(os.listdir(root_path)) + 1 # if page is not None: # page.close(); print "下载图片:", word + " 结束" pass
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, XHR=False, limit=None, referer=None, cookie=None, output='', timeout=None): try: handlers = [] if timeout == '' or timeout == None: timeout = '30' if not proxy == None: handlers += [ urllib2.ProxyHandler({'http': '%s' % (proxy)}), urllib2.HTTPHandler ] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) if output == 'cookie' or output == 'extended' or not close == True: cookies = cookielib.LWPCookieJar() handlers += [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) try: if sys.version_info < (2, 7, 9): raise Exception() import ssl ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE handlers += [urllib2.HTTPSHandler(context=ssl_context)] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) except: pass try: headers.update(headers) except: headers = {} if 'User-Agent' in headers: pass elif not mobile == True: #headers['User-Agent'] = agent() headers['User-Agent'] = cache.get(randomagent, 1) else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer == None: headers['Referer'] = '%s://%s/' % (urlparse.urlparse(url).scheme, urlparse.urlparse(url).netloc) else: headers['Referer'] = referer if not 'Accept-Language' in headers: headers['Accept-Language'] = 'en-US' if 'X-Requested-With' in headers: pass elif XHR == True: headers['X-Requested-With'] = 'XMLHttpRequest' if 'Cookie' in headers: pass elif not cookie == None: headers['Cookie'] = cookie if redirect == False: class NoRedirection(urllib2.HTTPErrorProcessor): def http_response(self, request, response): return response opener = urllib2.build_opener(NoRedirection) opener = urllib2.install_opener(opener) try: del headers['Referer'] except: pass request = urllib2.Request(url, data=post, headers=headers) try: response = urllib2.urlopen(request, timeout=int(timeout)) except urllib2.HTTPError as response: if response.code == 503: cf_result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': cf_result = gzip.GzipFile( fileobj=StringIO.StringIO(cf_result)).read() if 'cf-browser-verification' in cf_result: netloc = '%s://%s' % (urlparse.urlparse(url).scheme, urlparse.urlparse(url).netloc) ua = headers['User-Agent'] cf = cache.get(cfcookie().get, 168, netloc, ua, timeout) headers['Cookie'] = cf request = urllib2.Request(url, data=post, headers=headers) response = urllib2.urlopen(request, timeout=int(timeout)) elif error == False: return elif error == False: return if output == 'cookie': try: result = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass if close == True: response.close() return result elif output == 'geturl': result = response.geturl() if close == True: response.close() return result elif output == 'headers': result = response.headers if close == True: response.close() return result elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) if close == True: response.close() return result if limit == '0': result = response.read(224 * 1024) elif not limit == None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read() if 'sucuri_cloudproxy_js' in result: su = sucuri().get(result) headers['Cookie'] = su request = urllib2.Request(url, data=post, headers=headers) response = urllib2.urlopen(request, timeout=int(timeout)) if limit == '0': result = response.read(224 * 1024) elif not limit == None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile( fileobj=StringIO.StringIO(result)).read() if output == 'extended': response_headers = response.headers response_code = str(response.code) try: cookie = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass if close == True: response.close() return (result, response_code, response_headers, headers, cookie) else: if close == True: response.close() return result except: return
import sys import os import urllib2 import glob import FileAccess import time Path = sys.argv[1] G_startYear = sys.argv[2] G_endYear = sys.argv[3] #proxy : http://autoproxy.cec.eu.int/proxy.pac proxy_url = "http://*****:*****@147.67.138.13:8012" #proxy BXL proxy_urls = "http://*****:*****@147.67.117.13:8012" #proxy LUX proxy_support = urllib2.ProxyHandler({'https': proxy_urls}) opener = urllib2.build_opener(proxy_support) urllib2.install_opener(opener) dirUse = Path dirLog = dirUse + '\\Log' dirTXT = dirUse + '\\Input\\txt' fichiersXML = glob.glob(dirUse + '\\Input\\xml\\*.xml') dirFile = dirUse + '\\Input' def getFileUN(startYear, endYear): nbrYear = int(endYear) - int(startYear) + 1 code = 'K9tEO6K2x1gJoFnXR/qcd8gwzQgyXpkLcugnAN4Wj45g2jtfyj/9S6GqzH+KozrFerR4R4igrn717EjaBxQkgJKQts61M1U+dVxcdkRPZzGxClkhvNSLyxdp5OoXJ256L6xAvOpc/jJnvP0ZzLfeDsSN8CeXx+pvnTYCJbCbU/Y=' cc = '??????' px = 'H0'
def fetch_url(module, url, data=None, headers=None, method=None, use_proxy=True, force=False, last_mod_time=None, timeout=10): ''' Fetches a file from an HTTP/FTP server using urllib2 ''' if not HAS_URLLIB: module.fail_json(msg='urllib is not installed') if not HAS_URLLIB2: module.fail_json(msg='urllib2 is not installed') elif not HAS_URLPARSE: module.fail_json(msg='urlparse is not installed') r = None handlers = [] info = dict(url=url) distribution = get_distribution() # Get validate_certs from the module params validate_certs = module.params.get('validate_certs', True) # FIXME: change the following to use the generic_urlparse function # to remove the indexed references for 'parsed' parsed = urlparse.urlparse(url) if parsed[0] == 'https': if not HAS_SSL and validate_certs: if distribution == 'Redhat': module.fail_json( msg= 'SSL validation is not available in your version of python. You can use validate_certs=no, however this is unsafe and not recommended. You can also install python-ssl from EPEL' ) else: module.fail_json( msg= 'SSL validation is not available in your version of python. You can use validate_certs=no, however this is unsafe and not recommended' ) elif validate_certs: # do the cert validation netloc = parsed[1] if '@' in netloc: netloc = netloc.split('@', 1)[1] if ':' in netloc: hostname, port = netloc.split(':', 1) else: hostname = netloc port = 443 # create the SSL validation handler and # add it to the list of handlers ssl_handler = SSLValidationHandler(module, hostname, port) handlers.append(ssl_handler) if parsed[0] != 'ftp': username = module.params.get('url_username', '') if username: password = module.params.get('url_password', '') netloc = parsed[1] elif '@' in parsed[1]: credentials, netloc = parsed[1].split('@', 1) if ':' in credentials: username, password = credentials.split(':', 1) else: username = credentials password = '' parsed = list(parsed) parsed[1] = netloc # reconstruct url without credentials url = urlparse.urlunparse(parsed) if username: passman = urllib2.HTTPPasswordMgrWithDefaultRealm() # this creates a password manager passman.add_password(None, netloc, username, password) # because we have put None at the start it will always # use this username/password combination for urls # for which `theurl` is a super-url authhandler = urllib2.HTTPBasicAuthHandler(passman) # create the AuthHandler handlers.append(authhandler) if not use_proxy: proxyhandler = urllib2.ProxyHandler({}) handlers.append(proxyhandler) # pre-2.6 versions of python cannot use the custom https # handler, since the socket class is lacking this method if hasattr(socket, 'create_connection'): handlers.append(CustomHTTPSHandler) opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if method: if method.upper() not in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE', 'CONNECT'): module.fail_json(msg='invalid HTTP request method; %s' % method.upper()) request = RequestWithMethod(url, method.upper(), data) else: request = urllib2.Request(url, data) # add the custom agent header, to help prevent issues # with sites that block the default urllib agent string request.add_header('User-agent', module.params.get('http_agent')) # if we're ok with getting a 304, set the timestamp in the # header, otherwise make sure we don't get a cached copy if last_mod_time and not force: tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000') request.add_header('If-Modified-Since', tstamp) else: request.add_header('cache-control', 'no-cache') # user defined headers now, which may override things we've set above if headers: if not isinstance(headers, dict): module.fail_json("headers provided to fetch_url() must be a dict") for header in headers: request.add_header(header, headers[header]) try: if sys.version_info < (2, 6, 0): # urlopen in python prior to 2.6.0 did not # have a timeout parameter r = urllib2.urlopen(request, None) else: r = urllib2.urlopen(request, None, timeout) info.update(r.info()) info['url'] = r.geturl() # The URL goes in too, because of redirects. info.update( dict(msg="OK (%s bytes)" % r.headers.get('Content-Length', 'unknown'), status=200)) except urllib2.HTTPError, e: info.update(dict(msg=str(e), status=e.code))
def getRegexParsed( regexs, url, cookieJar=None, forCookieJarOnly=False, recursiveCall=False, cachedPages={}, rawPost=False, cookie_jar_file=None): #0,1,2 = URL, regexOnly, CookieJarOnly #cachedPages = {} #print 'url',url doRegexs = re.compile('\$doregex\[([^\]]*)\]').findall(url) # print 'doRegexs',doRegexs,regexs setresolved = True for k in doRegexs: if k in regexs: #print 'processing ' ,k m = regexs[k] #print m cookieJarParam = False if 'cookiejar' in m: # so either create or reuse existing jar #print 'cookiejar exists',m['cookiejar'] cookieJarParam = m['cookiejar'] if '$doregex' in cookieJarParam: cookieJar = getRegexParsed(regexs, m['cookiejar'], cookieJar, True, True, cachedPages) cookieJarParam = True else: cookieJarParam = True #print 'm[cookiejar]',m['cookiejar'],cookieJar if cookieJarParam: if cookieJar == None: #print 'create cookie jar' cookie_jar_file = None if 'open[' in m['cookiejar']: cookie_jar_file = m['cookiejar'].split( 'open[')[1].split(']')[0] # print 'cookieJar from file name',cookie_jar_file cookieJar = getCookieJar(cookie_jar_file) # print 'cookieJar from file',cookieJar if cookie_jar_file: saveCookieJar(cookieJar, cookie_jar_file) #import cookielib #cookieJar = cookielib.LWPCookieJar() #print 'cookieJar new',cookieJar elif 'save[' in m['cookiejar']: cookie_jar_file = m['cookiejar'].split('save[')[1].split( ']')[0] complete_path = os.path.join(profile, cookie_jar_file) # print 'complete_path',complete_path saveCookieJar(cookieJar, cookie_jar_file) if m['page'] and '$doregex' in m['page']: pg = getRegexParsed(regexs, m['page'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if len(pg) == 0: pg = 'http://regexfailed' m['page'] = pg if 'setcookie' in m and m['setcookie'] and '$doregex' in m[ 'setcookie']: m['setcookie'] = getRegexParsed(regexs, m['setcookie'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'appendcookie' in m and m['appendcookie'] and '$doregex' in m[ 'appendcookie']: m['appendcookie'] = getRegexParsed(regexs, m['appendcookie'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'post' in m and '$doregex' in m['post']: m['post'] = getRegexParsed(regexs, m['post'], cookieJar, recursiveCall=True, cachedPages=cachedPages) # print 'post is now',m['post'] if 'rawpost' in m and '$doregex' in m['rawpost']: m['rawpost'] = getRegexParsed(regexs, m['rawpost'], cookieJar, recursiveCall=True, cachedPages=cachedPages, rawPost=True) #print 'rawpost is now',m['rawpost'] if 'rawpost' in m and '$epoctime$' in m['rawpost']: m['rawpost'] = m['rawpost'].replace('$epoctime$', getEpocTime()) if 'rawpost' in m and '$epoctime2$' in m['rawpost']: m['rawpost'] = m['rawpost'].replace('$epoctime2$', getEpocTime2()) link = '' if m['page'] and m[ 'page'] in cachedPages and not 'ignorecache' in m and forCookieJarOnly == False: #print 'using cache page',m['page'] link = cachedPages[m['page']] else: if m['page'] and not m['page'] == '' and m['page'].startswith( 'http'): if '$epoctime$' in m['page']: m['page'] = m['page'].replace('$epoctime$', getEpocTime()) if '$epoctime2$' in m['page']: m['page'] = m['page'].replace('$epoctime2$', getEpocTime2()) #print 'Ingoring Cache',m['page'] page_split = m['page'].split('|') pageUrl = page_split[0] header_in_page = None if len(page_split) > 1: header_in_page = page_split[1] # if # proxy = urllib2.ProxyHandler({ ('https' ? proxytouse[:5]=="https":"http") : proxytouse}) # opener = urllib2.build_opener(proxy) # urllib2.install_opener(opener) # import urllib2 # print 'urllib2.getproxies',urllib2.getproxies() current_proxies = urllib2.ProxyHandler( urllib2.getproxies()) #print 'getting pageUrl',pageUrl req = urllib2.Request(pageUrl) if 'proxy' in m: proxytouse = m['proxy'] # print 'proxytouse',proxytouse # urllib2.getproxies= lambda: {} if pageUrl[:5] == "https": proxy = urllib2.ProxyHandler({'https': proxytouse}) #req.set_proxy(proxytouse, 'https') else: proxy = urllib2.ProxyHandler({'http': proxytouse}) #req.set_proxy(proxytouse, 'http') opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1' ) proxytouse = None if 'referer' in m: req.add_header('Referer', m['referer']) if 'accept' in m: req.add_header('Accept', m['accept']) if 'agent' in m: req.add_header('User-agent', m['agent']) if 'x-req' in m: req.add_header('X-Requested-With', m['x-req']) if 'x-addr' in m: req.add_header('x-addr', m['x-addr']) if 'x-forward' in m: req.add_header('X-Forwarded-For', m['x-forward']) if 'setcookie' in m: # print 'adding cookie',m['setcookie'] req.add_header('Cookie', m['setcookie']) if 'appendcookie' in m: # print 'appending cookie to cookiejar',m['appendcookie'] cookiestoApend = m['appendcookie'] cookiestoApend = cookiestoApend.split(';') for h in cookiestoApend: n, v = h.split('=') w, n = n.split(':') ck = cookielib.Cookie(version=0, name=n, value=v, port=None, port_specified=False, domain=w, domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookieJar.set_cookie(ck) if 'origin' in m: req.add_header('Origin', m['origin']) if header_in_page: header_in_page = header_in_page.split('&') for h in header_in_page: n, v = h.split('=') req.add_header(n, v) if not cookieJar == None: # print 'cookieJarVal',cookieJar cookie_handler = urllib2.HTTPCookieProcessor(cookieJar) opener = urllib2.build_opener( cookie_handler, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) # print 'noredirect','noredirect' in m if 'noredirect' in m: opener = urllib2.build_opener( cookie_handler, NoRedirection, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) elif 'noredirect' in m: opener = urllib2.build_opener( NoRedirection, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) if 'connection' in m: # print '..........................connection//////.',m['connection'] from keepalive import HTTPHandler keepalive_handler = HTTPHandler() opener = urllib2.build_opener(keepalive_handler) urllib2.install_opener(opener) #print 'after cookie jar' post = None if 'post' in m: postData = m['post'] #if '$LiveStreamRecaptcha' in postData: # (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar) # if captcha_challenge: # postData=postData.replace('$LiveStreamRecaptcha','manual_recaptcha_challenge_field:'+captcha_challenge+',recaptcha_response_field:'+catpcha_word+',id:'+idfield) splitpost = postData.split(',') post = {} for p in splitpost: n = p.split(':')[0] v = p.split(':')[1] post[n] = v post = urllib.urlencode(post) if 'rawpost' in m: post = m['rawpost'] #if '$LiveStreamRecaptcha' in post: # (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar) # if captcha_challenge: # post=post.replace('$LiveStreamRecaptcha','&manual_recaptcha_challenge_field='+captcha_challenge+'&recaptcha_response_field='+catpcha_word+'&id='+idfield) link = '' try: if post: response = urllib2.urlopen(req, post) else: response = urllib2.urlopen(req) if response.info().get('Content-Encoding') == 'gzip': from StringIO import StringIO import gzip buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) link = f.read() else: link = response.read() if 'proxy' in m and not current_proxies is None: urllib2.install_opener( urllib2.build_opener(current_proxies)) link = javascriptUnEscape(link) #print repr(link) #print link This just print whole webpage in LOG if 'includeheaders' in m: #link+=str(response.headers.get('Set-Cookie')) link += '$$HEADERS_START$$:' for b in response.headers: link += b + ':' + response.headers.get( b) + '\n' link += '$$HEADERS_END$$:' # print link response.close() except: pass cachedPages[m['page']] = link #print link #print 'store link for',m['page'],forCookieJarOnly if forCookieJarOnly: return cookieJar # do nothing elif m['page'] and not m['page'].startswith('http'): if m['page'].startswith('$pyFunction:'): val = doEval(m['page'].split('$pyFunction:')[1], '', cookieJar, m) if forCookieJarOnly: return cookieJar # do nothing link = val link = javascriptUnEscape(link) else: link = m['page'] if '$doregex' in m['expres']: m['expres'] = getRegexParsed(regexs, m['expres'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if not m['expres'] == '': #print 'doing it ',m['expres'] if '$LiveStreamCaptcha' in m['expres']: val = askCaptcha(m, link, cookieJar) #print 'url and val',url,val url = url.replace("$doregex[" + k + "]", val) elif m['expres'].startswith( '$pyFunction:') or '#$pyFunction' in m['expres']: #print 'expeeeeeeeeeeeeeeeeeee',m['expres'] val = '' if m['expres'].startswith('$pyFunction:'): val = doEval(m['expres'].split('$pyFunction:')[1], link, cookieJar, m) else: val = doEvalFunction(m['expres'], link, cookieJar, m) if 'ActivateWindow' in m['expres']: return if forCookieJarOnly: return cookieJar # do nothing if 'listrepeat' in m: listrepeat = m['listrepeat'] return listrepeat, eval(val), m, regexs, cookieJar try: url = url.replace(u"$doregex[" + k + "]", val) except: url = url.replace("$doregex[" + k + "]", val.decode("utf-8")) else: if 'listrepeat' in m: listrepeat = m['listrepeat'] ret = re.findall(m['expres'], link) return listrepeat, ret, m, regexs val = '' if not link == '': #print 'link',link reg = re.compile(m['expres']).search(link) try: val = reg.group(1).strip() except: traceback.print_exc() elif m['page'] == '' or m['page'] == None: val = m['expres'] if rawPost: # print 'rawpost' val = urllib.quote_plus(val) if 'htmlunescape' in m: #val=urllib.unquote_plus(val) import HTMLParser val = HTMLParser.HTMLParser().unescape(val) try: url = url.replace("$doregex[" + k + "]", val) except: url = url.replace("$doregex[" + k + "]", val.decode("utf-8")) #print 'ur',url #return val else: url = url.replace("$doregex[" + k + "]", '') if '$epoctime$' in url: url = url.replace('$epoctime$', getEpocTime()) if '$epoctime2$' in url: url = url.replace('$epoctime2$', getEpocTime2()) if '$GUID$' in url: import uuid url = url.replace('$GUID$', str(uuid.uuid1()).upper()) if '$get_cookies$' in url: url = url.replace('$get_cookies$', getCookiesString(cookieJar)) if recursiveCall: return url #print 'final url',repr(url) if url == "": return else: return url, setresolved
'-Dapplication.url=%s://localhost:%s' % (protocol, http_port), 'play.modules.testrunner.FirePhoque' ] try: subprocess.call(java_cmd, env=os.environ) except OSError: print "Could not execute the headless browser. " sys.exit(-1) print "~" time.sleep(1) # Kill if exists http_port = app.readConf('http.port') try: proxy_handler = urllib2.ProxyHandler({}) opener = urllib2.build_opener(proxy_handler) opener.open('%s://localhost:%s/@kill' % (protocol, http_port)) except Exception, e: pass if os.path.exists(os.path.join(app.path, 'test-result/result.passed')): print "~ All tests passed" print "~" testspassed = True if os.path.exists(os.path.join(app.path, 'test-result/result.failed')): print "~ Some tests have failed. See file://%s for results" % test_result print "~" sys.exit(1)
#-*- conding:utf-8 -*- """ 1,Proxy的设置 urllib2默认会使用环境变量http_proxy来设置HTTP Proxy。 如果想在程序中明确控制Proxy而不受环境变量的影响,可以使用代理。 """ ##例1 import urllib2 enable_proxy = True proxy_handler = urllib2.ProxyHandler({'http':'http://some-proxy.com:2222'}) null_proxy_handler = urllib2.ProxyHandler({}) if enable_proxy: opener = urllib2.build_opener(proxy_handler) else: opener = urllib2.build_opener(null_proxy_handler) urllib2.install_opener(opener) '''使用urllib2.install_opener(opener)会设置urllib2的全局opener, 比较好的办法是不使用install_opener去更改全局的设置,而只是直接调用opener.open(url) 的方法代替全局的urlopen()方法''' """ 2,Timeout设置 urllib2的API并没有暴露Timeout的设置,要设置Timeout值,只能更改Socket的全局Timeout值。 """ ##例2 import urllib2
if continue_tests == True: settings.IGNORE_ERR_MSG = True else: raise SystemExit() response = False except urllib2.URLError, err: if "Connection refused" in err.reason: print "\n" + Back.RED + "(x) Critical: The target host is not responding." + \ " Please ensure that is up and try again." + Style.RESET_ALL raise SystemExit() # Check if defined Tor. elif menu.options.tor: try: proxy = urllib2.ProxyHandler({ settings.PROXY_PROTOCOL: settings.PRIVOXY_IP + ":" + PRIVOXY_PORT }) response = inject_cookie(url, vuln_parameter, payload, proxy) except urllib2.HTTPError, err: if settings.IGNORE_ERR_MSG == False: print "\n" + Back.RED + "(x) Error: " + str( err) + Style.RESET_ALL continue_tests = checks.continue_tests(err) if continue_tests == True: settings.IGNORE_ERR_MSG = True else: raise SystemExit() response = False except urllib2.URLError, err: if "Connection refused" in err.reason: print "\n" + Back.RED + "(x) Critical: The target host is not responding." + \
def run(self, obj, config): key = config.get('vt_api_key', '') url = config.get('vt_download_url', '') sizeLimit = config.get('size_limit', '') replace = config.get('replace_sample', False) do_triage = config.get('run_triage', False) user = self.current_task.user sample = Sample.objects(md5=obj.md5).first() if not sample: sample = Sample() sample.md5 = md5_digest self._info("Checking if binary already exists in CRITs.") sample.discover_binary() if sample.filedata and replace == False: #if we already have this binary and don't have permission to replace self._info( "CRITs already has this binary. Enable the 'Replace' option to overwrite with data from VirusTotal." ) self._add_result("Download Canceled", "Binary already exists in CRITs.") return if not user.has_access_to(SampleACL.WRITE): self._info("User does not have permission to add Samples to CRITs") self._add_result( "Download Canceled", "User does not have permission to add Samples to CRITs") return parameters = urllib.urlencode({"hash": obj.md5, "apikey": key}) if settings.HTTP_PROXY: proxy = urllib2.ProxyHandler({ 'http': settings.HTTP_PROXY, 'https': settings.HTTP_PROXY }) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) try: req = url + "?" + parameters self._info( "Requesting binary with md5 '{0}' from VirusTotal.".format( obj.md5)) request = urllib2.Request(req) response = urllib2.urlopen(request) size = response.info().getheaders("Content-Length")[0] self._info("Binary size: {0} bytes".format(size)) if int(size) > sizeLimit: # Check if within size limit self._error( "Binary size is {0} bytes, which is greater than maximum of {1} bytes. This limit can be changed in options." .format(size, sizeLimit)) self._add_result( "Download Aborted", "Match found, but binary is larger than maximum size limit." ) return data = response.read() except urllib2.HTTPError as e: if e.code == 404: self._info( "No results were returned. Either VirusTotal does not have the requested binary, or the request URL is incorrect." ) self._add_result( "Not Found", "Binary was not found in the VirusTotal database") elif e.code == 403: self._error("Download forbidden. {0}".format(e)) self._add_result( "Download Canceled", "CRITs was forbidden from downloading the binary.") else: self._error("An HTTP Error occurred: {0}".format(e)) return except Exception as e: logger.error("VirusTotal: Failed connection ({0})".format(e)) self._error("Failed to get data from VirusTotal: {0}".format(e)) return if data: # Retrieved some data from VT if replace == True: try: self._info( "Replace = True. Deleting any previous binary with md5 {0}" .format(obj.md5)) sample.filedata.delete() except Exception as e: logger.error( "VirusTotal: Error deleting existing binary ({0})". format(e)) self._error("Failed to delete existing binary") self._info("Adding new binary to CRITs.") try: handle_file(filename=obj.md5, data=data, source="VirusTotal", reference="Binary downloaded from VT based on MD5", user="******", method="VirusTotal Download Service", md5_digest=obj.md5) except Exception as e: logger.error( "VirusTotal: Sample creation failed ({0})".format(e)) self._error("Failed to create new Sample: {0}".format(e)) return if do_triage: self._info("Running sample triage for data-reliant services.") sample.reload() run_triage(sample, user="******") self._add_result( "Download Successful", "Binary was successfully downloaded from VirusTotal") else: self._error("No data returned by VirusTotal.")
username = sys.argv[count + 1] elif arg == '-U': userFile = sys.argv[count + 1] elif arg == '-p': password = sys.argv[count + 1] elif arg == '-P': passFile = sys.argv[count + 1] elif arg == '-v': verbose = 1 elif arg == '-s': signal = sys.argv[count + 1] elif arg == '-g': agent = sys.argv[count + 1] elif arg == '-x': lstTmp = sys.argv[count + 1].split(':') proxyHandler = urllib2.ProxyHandler( {lstTmp[0]: lstTmp[1] + ':' + lstTmp[2]}) useProxy = 1 elif arg == '-f': continues = 1 count += 1 if (len(username) > 0 and len(password) > 0): mode = 1 #single elif (len(username) > 0 and len(passFile) > 0): mode = 2 # elif (len(userFile) > 0 and len(password) > 0): mode = 3 elif (len(userFile) > 0 and len(passFile) > 0): mode = 4 #
#-*-coding:utf-8-*- ''' rllib2 默认会使用环境变量 http_proxy 来设置 HTTP Proxy。 假如一个网站它会检测某一段时间某个IP 的访问次数, 如果访问次数过多,它会禁止你的访问。 所以你可以设置一些代理服务器来帮助你做工作, 每隔一段时间换一个代理, 网站君都不知道是谁在捣鬼了,这酸爽! 下面一段代码说明了代理的设置用法 ''' import urllib2 enable_proxy = True #使用代理 proxy_handler = urllib2.ProxyHandler({'http': '180.168.179.193:8080'}) #不使用代理 null_proxy_handler = urllib2.ProxyHandler({}) if enable_proxy: opener = urllib2.build_opener(proxy_handler) else: opener = urllib2.build_opener(null_proxy_handler) urllib2.install_opener(opener) response = urllib2.urlopen('http://www.baidu.com') print response.read()
def get_data(host, query, idx, limit, debug, threshold=300, ckey=None, cert=None, das_headers=True): """Contact DAS server and retrieve data for given DAS query""" params = {'input': query, 'idx': idx, 'limit': limit} path = '/das/cache' pat = re.compile('http[s]{0,1}://') if not pat.match(host): msg = 'Invalid hostname: %s' % host raise Exception(msg) url = host + path client = '%s (%s)' % (DAS_CLIENT, os.environ.get('USER', '')) headers = {"Accept": "application/json", "User-Agent": client} encoded_data = urllib.urlencode(params, doseq=True) url += '?%s' % encoded_data req = urllib2.Request(url=url, headers=headers) if ckey and cert: ckey = fullpath(ckey) cert = fullpath(cert) http_hdlr = HTTPSClientAuthHandler(ckey, cert, debug) else: http_hdlr = urllib2.HTTPHandler(debuglevel=debug) proxy_handler = urllib2.ProxyHandler({}) cookie_jar = cookielib.CookieJar() cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar) opener = urllib2.build_opener(http_hdlr, proxy_handler, cookie_handler) fdesc = opener.open(req) data = fdesc.read() fdesc.close() pat = re.compile(r'^[a-z0-9]{32}') if data and isinstance(data, str) and pat.match(data) and len(data) == 32: pid = data else: pid = None iwtime = 2 # initial waiting time in seconds wtime = 20 # final waiting time in seconds sleep = iwtime time0 = time.time() while pid: params.update({'pid': data}) encoded_data = urllib.urlencode(params, doseq=True) url = host + path + '?%s' % encoded_data req = urllib2.Request(url=url, headers=headers) try: fdesc = opener.open(req) data = fdesc.read() fdesc.close() except urllib2.HTTPError as err: return {"status": "fail", "reason": str(err)} if data and isinstance(data, str) and pat.match(data) and len(data) == 32: pid = data else: pid = None time.sleep(sleep) if sleep < wtime: sleep *= 2 elif sleep == wtime: sleep = iwtime # start new cycle else: sleep = wtime if (time.time() - time0) > threshold: reason = "client timeout after %s sec" % int(time.time() - time0) return {"status": "fail", "reason": reason} jsondict = json.loads(data) return jsondict
def _download_file_stnd(self, url, save_path, file_name=None, param_dict=None, proxy_url=None, proxy_port=None): """ downloads a file """ try: #if url.find("http://") > -1: # url = url.replace("http://", "https://") if proxy_url is not None: if proxy_port is None: proxy_port = 80 proxies = { "http": "http://%s:%s" % (proxy_url, proxy_port), "https": "https://%s:%s" % (proxy_url, proxy_port) } proxy_support = urllib2.ProxyHandler(proxies) opener = urllib2.build_opener( proxy_support, urllib2.HTTPHandler(debuglevel=0), AGOLRedirectHandler()) urllib2.install_opener(opener) else: opener = urllib2.build_opener( urllib2.HTTPHandler(debuglevel=0), AGOLRedirectHandler()) urllib2.install_opener(opener) if param_dict is not None: encoded_args = urllib.urlencode(param_dict) url = url + '/?' + encoded_args file_data = urllib2.urlopen(url) file_data.getcode() file_data.geturl() if file_name is None: url = file_data.geturl() a = file_data.info().getheader('Content-Disposition') if a is not None: a = a.strip() file_name = re.findall(r'filename=\"(.+?)\"', a)[0] else: file_name = os.path.basename( file_data.geturl().split('?')[0]) if hasattr(file_data, "status") and \ (int(file_data.status) >= 300 and int(file_data.status) < 400): self._download_file(url=file_data.geturl(), save_path=save_path, file_name=file_name, proxy_url=self._proxy_url, proxy_port=self._proxy_port) return save_path + os.sep + file_name if (file_data.info().getheader('Content-Length')): total_size = int( file_data.info().getheader('Content-Length').strip()) downloaded = 0 CHUNK = 4096 with open(save_path + os.sep + file_name, 'wb') as out_file: while True: chunk = file_data.read(CHUNK) downloaded += len(chunk) if not chunk: break out_file.write(chunk) elif file_data.headers.maintype == 'image': with open(save_path + os.sep + file_name, 'wb') as out_file: buf = file_data.read() out_file.write(buf) return save_path + os.sep + file_name except urllib2.HTTPError, e: print "HTTP Error:", e.code, url return False
def do_check(): # Check if 'tor' is installed. requirment = "tor" requirments.do_check(requirment) # Check if 'privoxy' is installed. requirment = "privoxy" requirments.do_check(requirment) check_privoxy_proxy = True info_msg = "Testing Tor SOCKS proxy settings (" info_msg += settings.PRIVOXY_IP + ":" + PRIVOXY_PORT info_msg += ")... " sys.stdout.write(settings.print_info_msg(info_msg)) sys.stdout.flush() try: privoxy_proxy = urllib2.ProxyHandler( {settings.SCHEME: settings.PRIVOXY_IP + ":" + PRIVOXY_PORT}) opener = urllib2.build_opener(privoxy_proxy) urllib2.install_opener(opener) except: check_privoxy_proxy = False pass if check_privoxy_proxy: try: check_tor_page = opener.open( "https://check.torproject.org/").read() found_ip = re.findall(r": <strong>" + "(.*)" + "</strong></p>", check_tor_page) if not "You are not using Tor" in check_tor_page: sys.stdout.write("[" + Fore.GREEN + " SUCCEED " + Style.RESET_ALL + "]\n") sys.stdout.flush() if menu.options.tor_check: success_msg = "Tor connection is properly set. " else: success_msg = "" success_msg += "Your ip address appears to be " + found_ip[ 0] + ".\n" sys.stdout.write(settings.print_success_msg(success_msg)) warn_msg = "Increasing default value for option '--time-sec' to" warn_msg += " " + str( settings.TIMESEC) + " because switch '--tor' was provided." print settings.print_warning_msg(warn_msg) else: print "[" + Fore.RED + " FAILED " + Style.RESET_ALL + "]" if menu.options.tor_check: err_msg = "It seems that your Tor connection is not properly set. " else: err_msg = "" err_msg += "Can't establish connection with the Tor SOCKS proxy. " err_msg += "Please make sure that you have " err_msg += "Tor installed and running so " err_msg += "you could successfully use " err_msg += "switch '--tor'." print settings.print_critical_msg(err_msg) raise SystemExit() except urllib2.URLError, err_msg: print "[" + Fore.RED + " FAILED " + Style.RESET_ALL + "]" if menu.options.tor_check: err_msg = "It seems that your Tor connection is not properly set. " else: err_msg = "" err_msg = "Please make sure that you have " err_msg += "Tor installed and running so " err_msg += "you could successfully use " err_msg += "switch '--tor'." print settings.print_critical_msg(err_msg) raise SystemExit() except httplib.BadStatusLine, err_msg: print "[ " + Fore.RED + "FAILED" + Style.RESET_ALL + " ]" if len(err_msg.line) > 2: print err_msg.line, err_msg.message raise SystemExit()
def handler(proxy): proxy_handler = urllib2.ProxyHandler({'http': proxy, 'https': proxy}) opener = urllib2.build_opener(proxy_handler) urllib2.install_opener(opener) return request
def urlopen_with_retry(req, proxy_all): current_proxy = random.choice(proxy_all) opener = urllib2.build_opener(urllib2.ProxyHandler({"http" : current_proxy})) #urllib2.ProxyHandler() urllib2.install_opener(opener) return urllib2.urlopen(req)
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, XHR=False, limit=None, referer=None, cookie=None, output='', timeout='30', headRequest=False): try: handlers = [] if not proxy is None: control.log("proxy: %s" % proxy) handlers += [urllib2.ProxyHandler(proxy), urllib2.HTTPHandler] if output == 'cookie' or output == 'extended' or output == 'cookiejar' or not close == True: cookies = cookielib.LWPCookieJar() handlers += [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] try: if sys.version_info < (2, 7, 9): raise Exception() import ssl ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE handlers += [urllib2.HTTPSHandler(context=ssl_context)] except: pass try: headers.update(headers) except: headers = {} if 'User-Agent' in headers: pass elif not mobile == True: headers['User-Agent'] = cache.get(randomagent, 1) else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer is None: headers['Referer'] = '%s://%s/' % (urlparse.urlparse(url).scheme, urlparse.urlparse(url).netloc) else: headers['Referer'] = referer if not 'Accept-Language' in headers: headers['Accept-Language'] = 'en-US' if 'X-Requested-With' in headers: pass elif XHR is True: headers['X-Requested-With'] = 'XMLHttpRequest' if 'Cookie' in headers: pass elif not cookie is None: headers['Cookie'] = printCookieDict(cookie) if isinstance( cookie, dict) else cookie if redirect is False: class NoRedirection(urllib2.HTTPErrorProcessor): def http_response(self, request, response): return response handlers += [NoRedirection] try: del headers['Referer'] except: pass if handlers: opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) else: urllib2.install_opener(None) request = urllib2.Request(url, data=post, headers=headers) rid = uuid.uuid4().hex control.log("Url request (%s): %s" % (rid, url)) if headRequest: request.get_method = lambda: 'HEAD' try: response = urllib2.urlopen(request, timeout=int(timeout)) except urllib2.HTTPError as response: if response.code == 503: cf_result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': cf_result = gzip.GzipFile( fileobj=StringIO.StringIO(cf_result)).read() if 'cf-browser-verification' in cf_result: netloc = '%s://%s' % (urlparse.urlparse(url).scheme, urlparse.urlparse(url).netloc) ua = headers['User-Agent'] cf = cache.get(cfcookie().get, 168, netloc, ua, timeout) headers['Cookie'] = cf request = urllib2.Request(url, data=post, headers=headers) response = urllib2.urlopen(request, timeout=int(timeout)) elif error is False: control.log("Response error code (%s): %s" % (rid, response.code)) return elif response.code == 403: control.log("Response error code (%s): %s" % (rid, response.code)) raise Exception("Permission Denied") elif error is True: control.log("Response error code (%s): %s" % (rid, response.code)) raise Exception("ERROR (%s): %s" % (url, response.code)) elif error is False: control.log("Response error code (%s): %s" % (rid, response.code)) return control.log("Response code (%s): %s" % (rid, response.code)) if response.code == 403: raise Exception("Permission Denied") elif response.code >= 400: raise Exception("Request Error (%s): %s" % (url, response.code)) if output == 'cookie': try: result = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass if close is True: response.close() return result if output == 'cookiejar': cookies_dict = {} for i in cookies: cookies_dict[i.name] = i.value return cookies_dict elif output == 'geturl': result = response.geturl() if close is True: response.close() return result elif output == 'headers': result = response.headers if close is True: response.close() return result elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) if close is True: response.close() return result if limit == '0': result = response.read(224 * 1024) elif not limit is None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read() if 'sucuri_cloudproxy_js' in result: su = sucuri().get(result) headers['Cookie'] = su request = urllib2.Request(url, data=post, headers=headers) response = urllib2.urlopen(request, timeout=int(timeout)) if limit == '0': result = response.read(224 * 1024) elif not limit is None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile( fileobj=StringIO.StringIO(result)).read() control.log("response (%s): %s" % (rid, result[:500])) if response.headers and response.headers.get('content-type') and ( 'application/json' in response.headers.get('content-type') or 'text/javascript' in response.headers.get('content-type')): return json.loads(result, object_pairs_hook=OrderedDict ) if OrderedDict else json.loads(result) if output == 'extended': response_headers = response.headers response_code = str(response.code) try: cookie = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass if close is True: response.close() return result, response_code, response_headers, headers, cookie else: if close is True: response.close() return result except Exception, e: traceback.print_exc() control.log("Request ERROR: %s" % str(e)) return
def get_content(toUrl,count): """ Return the content of given url Args: toUrl: aim url count: index of this connect Return: content if success 'Fail' if fail """ cf = ConfigParser.ConfigParser() cf.read("config.ini") cookie = cf.get("cookie", "cookie") headers = { 'Cookie': cookie, 'Host':'www.zhihu.com', 'Referer':'http://www.zhihu.com/', 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36', # 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Accept-Encoding':'gzip' } req = urllib2.Request( url = toUrl, headers = headers ) proxy_all = [ "10.25.170.247:5678", "10.25.171.82:5678", "10.47.114.111:5678", "10.47.54.77:5678", "10.25.60.218:5678", "10.47.54.180:5678", "10.47.54.115:5678", "10.47.106.138:5678" ] current_proxy = random.choice(proxy_all) try: opener = urllib2.build_opener(urllib2.ProxyHandler({"http" : current_proxy})) #urllib2.ProxyHandler() urllib2.install_opener(opener) page = urllib2.urlopen(req,timeout = 15) headers = page.info() content = page.read() # except Exception,e: # if count % 1 == 0: # print str(count) + ", Error: " + str(e) + " URL: " + toUrl # return "FAIL" except urllib2.HTTPError, e: if e.code == 404: if count % 1 == 0: print str(count) + ", Error: " + str(e) + " URL: " + toUrl return "NO FOUND" else: try: page = urlopen_with_retry(req, proxy_all) headers = page.info() content = page.read() except Exception,e: if count % 1 == 0: print str(count) + ", Error: " + str(e) + " URL: " + toUrl + "retry_fail" return "FAIL"
def __init__(self, url): self.ledger_url = url self.proxy_handler = urllib2.ProxyHandler({})
import os import json import urllib2 from bs4 import BeautifulSoup import httplib url = 'http://codeforces.com/api/contest.list?gym=false' prx = open("proxy", 'r') proxy = prx.readline() proxy = proxy.split('\n', 1) proxy = proxy[0] if (len(proxy) != 0): print proxy proxy = urllib2.ProxyHandler({'http': proxy}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) if os.path.exists('ProblemStatistics'): print "The directory 'ProblemStatistics' exists !!!" else: os.makedirs('ProblemStatistics') dict = {} f1 = open("ProblemStatistics/CFRound.txt", "w") f2 = open("ProblemStatistics/ErrorCF.txt", "w") f3 = open("Asim.txt", "w") response = urllib2.urlopen(url).read() res = json.loads(response)
# Deprecated: now we have requests # HTTP Client with cookie handling # should be handy for bots... # # Programmed by WangLu # Last changed: 2011.05.16 import cookielib import time import urllib import urllib2 ############# configuration HTTP_PROXY = urllib2.ProxyHandler() HTTP_HEADERS = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:2.0) Gecko/20100101 Firefox/4.0' } HTTP_RETRY_COUNT = -1 # how many time do we need to retry when failed, negative for always retry HTTP_RETRY_INTERVAL = 5 # seconds # Handle cookie, and auto-retry while meet error class HTTPHandler(): def __init__ (self, proxy=HTTP_PROXY): self.cookie = cookielib.CookieJar() self.http = urllib2.build_opener(proxy, urllib2.HTTPCookieProcessor(self.cookie)) self.last_request = None self.last_response = None # auto retry when meet error
def get_proxy_opener(url, user, password): password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, url, user, password) proxy_handler = urllib2.ProxyHandler({'http': url}) proxy_auth_handler = urllib2.ProxyBasicAuthHandler(password_mgr) return urllib2.build_opener(proxy_handler, proxy_auth_handler)