Python ProxyHandler примеры, urllib2.ProxyHandler Python примеры использования

Пример #1

0

Показать файл

Файл: db_test.py Проект: tuanzijiang/MobikeSpiderUrllib2

def spider(lon,lat,num):
    http_proxy = {'http': '116.28.109.64:808'}
    url = "https://mwx.mobike.com/mobike-api/rent/nearbyBikesInfo.do"
    payload = "?latitude=%s&longitude=%s&errMsg=getMapCenterLocation" % (lat, lon)
    url = url + payload
    headers = {
        'charset': "utf-8",
        'platform': "4",
        "referer": "https://servicewechat.com/wx40f112341ae33edb/1/",
        'content-type': "application/x-www-form-urlencoded",
        'user-agent': "MicroMessenger/6.5.4.1000 NetType/WIFI Language/zh_CN",
        'host': "mwx.mobike.com",
        'connection': "Keep-Alive",
        'accept-encoding': "gzip",
        'cache-control': "no-cache"
    }
    proxy = [
        '222.128.13.94:8081',
        '222.128.13.94:8081',
        '121.232.147.200:9000',
        '218.89.97.127:9000',
        '117.90.252.123:9000',
        '121.232.147.206:9000',
        '121.232.146.12:9000',
        '222.208.66.14:9000',
        '121.232.147.206:9000',
        '121.232.145.93:9000',
        '118.178.227.171:80',
        '121.31.103.138:8123',
        '110.73.34.186:8123',
        '121.31.152.122:8123',
        '116.28.109.64:808',
        '115.213.1.83:8998',
        '60.178.137.158:8081',
        '117.90.3.185:9000',
        '117.90.1.204:9000',
        '110.73.48.146:8123',
        '111.13.7.42:83',
        '118.178.227.171:80',
        '182.42.46.43:808',
        '111.74.56.249:9000',
        '111.74.56.249:9000',
        '122.96.59.105:82',
        '117.90.2.208:9000',
        '121.232.146.149:9000',
        '60.178.13.75:8081',
        '182.129.243.22:9000',
        '110.73.33.147:8123',
        '123.169.84.88:808',
        '120.27.49.85:8090',
        '122.193.14.114:82',
        '121.232.145.104:9000',
        '171.38.36.78:8123',
        '163.125.222.240:8118',
        '114.230.31.225:3128',
        '171.92.4.67:9000',
        '121.232.147.247:9000',
        '121.232.146.148:9000',
        '121.232.144.201:9000',
        '121.232.145.251:9000',
        '110.73.55.150:8123',
        '112.33.7.9:8081',
        '182.141.46.93:9000',
        '210.76.163.216:8118',
        '221.230.7.59:9000',
        '121.232.146.139:9000',
        '121.232.145.43:9000',
        '118.117.139.117:9000',
        '121.232.146.205:9000',
        '121.232.147.112:9000',
        '202.141.161.30:8118',
        '117.90.0.205:9000',
        '121.232.144.156:9000',
        '122.96.59.105:82',
        '122.96.59.106:80',
        '222.208.66.14:9000',
        '111.12.96.188:80',
        '121.31.152.178:8123',
        '59.66.202.98:1080',
        '110.73.4.93:8123',
        '110.73.40.102:8123',
        '118.117.136.82:9000',
        '121.232.145.5:9000',
        '110.73.0.233:8123',
        '111.74.56.247:9000',
        '118.117.137.188:9000',
        '110.73.3.197:8123',
        '117.90.5.166:9000',
        '113.140.25.4:81',
        '115.230.60.199:808',
        '123.169.91.85:808',
        '117.90.7.181:9000',
        '122.193.14.114:82',
        '121.232.144.234:9000',
        '171.215.237.225:9000',
        '121.232.146.50:9000',
        '121.31.101.137:8123',
        '182.129.242.219:9000',
        '182.90.106.23:8123',
        '121.232.144.209:9000',
        '117.90.7.142:9000',
        '121.232.144.158:9000',
        '171.215.226.246:9000',
        '121.232.146.181:9000',
        '117.90.3.61:9000',
        '121.232.147.222:9000',
        '121.232.145.29:9000',
        '111.74.56.244:9000',
        '111.13.7.42:81',
        '117.90.6.233:9000',
        '163.125.251.253:8118',
        '112.33.7.9:8081',
        '121.232.144.113:9000',
        '122.96.59.106:843',
        '110.72.16.89:8123',
        '183.154.215.0:9000',
        '171.92.32.87:9000',
        '182.129.240.160:9000',
        '121.232.194.184:9000',
        '125.72.125.14:808',
        '120.77.255.133:8088',
        '121.232.144.67:9000',
        '121.232.146.15:9000',
        '121.31.101.188:8123',
        '61.232.121.166:8123',
        '117.90.4.70:9000',
        '182.129.249.38:9000',
        '121.232.147.148:9000',
        '218.104.148.157:8080',
        '58.217.255.184:1080',
        '117.90.4.150:9000',
        '121.232.144.41:9000',
        '121.232.147.165:9000',
        '117.90.0.211:9000',
        '117.90.7.246:9000',
        '121.232.147.195:9000',
        '202.141.161.30:8118',
        '117.90.5.200:9000',
        '121.232.146.127:9000',
        '121.232.145.128:9000',
        '110.73.28.60:8123',
        '60.178.169.117:8081',
        '183.240.87.229:8080',
        '111.155.116.229:8123',
        '121.232.146.217:9000',
        '121.232.144.190:9000',
        '121.232.148.17:9000',
        '117.90.1.254:9000',
        '118.117.137.217:9000',
        '163.125.251.49:8118',
        '120.77.206.98:8118',
        '121.232.145.52:9000',
        '121.232.144.105:9000',
        '118.26.183.215:8080',
        '121.232.147.200:9000',
        '171.92.52.2:9000',
        '121.232.144.89:9000',
        '117.90.5.133:9000',
        '60.178.169.106:8081',
        '220.191.14.233:808',
        '118.117.136.55:9000',
        '117.90.2.28:9000',
        '121.232.144.236:9000',
        '121.232.144.237:9000',
        '117.90.0.98:9000',
        '117.90.2.65:9000',
        '121.232.147.56:9000',
        '182.141.42.29:9000',
        '121.232.144.141:9000',
        '121.232.145.28:9000',
        '210.44.213.63:1080',
        '60.178.170.66:8081',
        '121.232.147.96:9000',
        '171.37.170.153:8123',
        '139.196.121.161:80',
        '118.117.138.101:9000',
        '121.232.147.203:9000',
        '111.155.116.207:8123',
        '218.89.97.88:9000',
        '110.73.51.213:8123',
        '121.232.146.236:9000',
        '121.232.147.241:9000',
        '118.117.137.2:9000',
        '121.31.148.28:8123',
        '171.215.227.75:9000',
        '121.232.146.86:9000',
        '111.13.7.42:80',
        '110.73.31.223:8123',
        '121.232.146.161:9000',
        '117.90.4.61:9000',
        '111.155.116.215:8123',
        '123.169.90.147:808',
        '117.90.5.69:9000',
        '121.232.146.192:9000',
        '121.232.147.246:9000',
        '163.125.251.46:8118',
        '121.232.147.145:9000',
        '121.232.144.138:9000',
        '60.178.1.74:8081',
        '180.110.17.213:808',
        '120.83.99.212:808',
        '111.1.52.45:80',
        '61.157.198.66:8080',
        '121.232.146.75:9000',
        '121.232.147.164:9000',
        '121.232.147.56:9000',
        '114.99.21.133:808',
        '121.232.148.31:9000',
        '121.232.146.71:9000',
        '121.232.147.113:9000',
        '101.86.86.101:8118',
        '182.129.241.132:9000',
        '60.178.10.96:8081',
        '121.40.164.232:8118',
        '183.240.87.229:8080',
        '121.232.147.78:9000',
        '122.96.59.104:80',
        '121.232.146.55:9000',
        '114.238.42.105:808',
        '121.232.147.199:9000',
        '125.92.33.20:3128',
        '60.178.3.139:8081',
        '121.31.155.232:8123',
        '121.232.147.156:9000',
        '121.232.147.55:9000',
        '121.15.170.171:8080',
        '121.232.144.249:9000',
        '118.117.137.2:9000',
        '115.46.76.205:8123',
        '171.39.4.208:8123',
        '221.239.81.83:8118',
        '121.232.145.190:9000',
        '115.213.203.188:808',
        '221.192.134.92:8081',
        '125.67.74.248:9000',
        '182.129.240.154:9000',
        '121.232.147.197:9000',
        '171.92.53.59:9000',
        '222.187.20.51:8998',
        '183.207.176.252:1080',
        '110.72.30.1:8123',
        '121.232.145.115:9000',
        '122.96.59.106:81',
        '121.232.144.37:9000',
        '223.86.37.135:8998',
        '122.96.59.106:843',
        '210.76.163.216:8118',
        '210.44.213.63:1080',
        '121.232.148.43:9000',
        '59.49.129.60:8998',
        '49.86.62.100:808',
        '123.206.225.120:8888',
        '60.178.128.19:8081',
        '218.104.148.157:8080',
        '220.176.93.100:9000',
        '182.129.242.241:9000',
        '121.232.148.61:9000',
        '122.112.230.18:8080',
        '121.232.147.250:9000',
        '59.62.6.93:9000',
        '111.155.116.221:8123',
        '121.232.147.28:9000',
        '171.215.241.87:9000',
        '118.117.138.101:9000',
        '60.178.3.2:8081',
        '114.115.218.71:80',
        '114.115.218.143:8118',
        '121.232.144.82:9000',
        '183.147.22.6:9000',
        '202.121.96.33:8086',
        '121.232.147.205:9000',
        '125.117.132.239:9000',
        '125.67.74.248:9000',
        '121.232.145.151:9000',
        '121.232.145.2:9000',
        '139.196.121.161:80',
        '180.119.65.217:3128',
        '222.208.83.160:9000',
        '114.99.21.133:808',
        '121.13.55.162:8118',
        '180.119.65.25:3128',
        '121.31.147.77:8123',
        '122.112.230.18:8080',
        '121.232.147.34:9000',
        '123.169.90.229:808',
        '180.173.109.149:8118',
        '121.232.147.130:9000',
    ]
    temp = random.randint(0, 269)
    http_proxy['http'] = proxy[temp]
    print (proxy[temp])
    proxy = urllib2.ProxyHandler(http_proxy)
    opener = urllib2.build_opener(proxy)
    request = urllib2.Request(url, headers=headers)
    try:
        response = opener.open(request, timeout=1)
        info = response.read()
        print info
        info = json.loads(info)
        bikes = info['object']
        # for bike in bikes:
        #     print bike['distY']
    except Exception as ex:
        num = num-1
        if num < 0:
            print ("die total")
        else:
            print ("die once")
            with open('./baidu.txt','a') as fp:
                fp.write(str(http_proxy['http']+'\n'))
            spider(lon,lat,num)
        pass
    print ('ok')

Пример #2

0

Показать файл

Файл: client.py Проект: sksundaram-learning/sawtooth-core

 def __init__(self, base_url):
     self._base_url = base_url.rstrip('/')
     self._proxy_handler = urllib2.ProxyHandler({})

Пример #3

0

Показать файл

Файл: wrike.py Проект: tmjnow/aidiq

    def _send_request(self,
                      method="GET",
                      path=None,
                      args=None,
                      data=None,
                      auth=False):
        """
            Send a request to the Wrike API

            @param method: the HTTP method
            @param path: the path relative to the repository URL
            @param data: the data to send
            @param auth: this is an authorization request
        """

        repository = self.repository

        # Request URL
        api = "oauth2/token" if auth else "api/v3"
        url = "/".join((repository.url.rstrip("/"), api))
        if path:
            url = "/".join((url, path.lstrip("/")))
        if args:
            url = "?".join((url, urllib.urlencode(args)))

        # Create the request
        req = urllib2.Request(url=url)
        handlers = []

        if not auth:
            # Install access token header
            access_token = self.access_token
            if not access_token:
                message = "Authorization failed: no access token"
                current.log.error(message)
                return None, message
            req.add_header("Authorization",
                           "%s %s" % (self.token_type, access_token))
            # JSONify request data
            request_data = json.dumps(data) if data else ""
            if request_data:
                req.add_header("Content-Type", "application/json")
        else:
            # URL-encode request data for auth
            request_data = urllib.urlencode(data) if data else ""

        # Indicate that we expect JSON response
        req.add_header("Accept", "application/json")

        # Proxy handling
        config = repository.config
        proxy = repository.proxy or config.proxy or None
        if proxy:
            current.log.debug("using proxy=%s" % proxy)
            proxy_handler = urllib2.ProxyHandler({"https": proxy})
            handlers.append(proxy_handler)

        # Install all handlers
        if handlers:
            opener = urllib2.build_opener(*handlers)
            urllib2.install_opener(opener)

        # Execute the request
        response = None
        message = None
        try:
            if method == "POST":
                f = urllib2.urlopen(req, data=request_data)
            else:
                f = urllib2.urlopen(req)
        except urllib2.HTTPError, e:
            message = "HTTP %s: %s" % (e.code, e.reason)

Пример #4

0

Показать файл

Файл: _base.py Проект: guyn2gis/ArcREST

    def _post_multipart_stnd(self,
                             host,
                             selector,
                             fields,
                             files,
                             ssl=False,
                             port=80,
                             proxy_url=None,
                             proxy_port=None):
        """ performs a multi-post to AGOL, Portal, or AGS using standard library
            Inputs:
               host - string - root url (no http:// or https://)
                   ex: www.arcgis.com
               selector - string - everything after the host
                   ex: /PWJUSsdoJDp7SgLj/arcgis/rest/services/GridIndexFeatures/FeatureServer/0/1/addAttachment
               fields - dictionary - additional parameters like token and format information
               files - tuple array- tuple with the file name type, filename, full path
               ssl - option to use SSL
               proxy_url - string - url to proxy server
               proxy_port - interger - port value if not on port 80

            Output:
               JSON response as dictionary
            Useage:
               import urlparse
               url = "http://sampleserver3.arcgisonline.com/ArcGIS/rest/services/SanFrancisco/311Incidents/FeatureServer/0/10261291"
               parsed_url = urlparse.urlparse(url)
               params = {"f":"json"}
               print _post_multipart(host=parsed_url.hostname,
                               selector=parsed_url.path,
                               files=files,
                               fields=params
                               )
        """
        content_type, body = self._encode_multipart_formdata(fields, files)

        if ssl:
            url = "https://%s%s" % (host, selector)
        else:
            url = "http://%s%s" % (host, selector)
        if proxy_url is not None:
            if proxy_port is None:
                proxy_port = 80
            proxies = {
                "http": "http://%s:%s" % (proxy_url, proxy_port),
                "https": "https://%s:%s" % (proxy_url, proxy_port)
            }
            proxy_support = urllib2.ProxyHandler(proxies)
            opener = urllib2.build_opener(proxy_support,
                                          urllib2.HTTPHandler(debuglevel=0))
            urllib2.install_opener(opener)
        request = urllib2.Request(url)
        request.add_header('User-agent', 'ArcREST')
        request.add_header('Content-type', content_type)
        request.add_header('Content-length', len(body))
        request.add_data(body)
        result = urllib2.urlopen(request).read()
        if result == "":
            return ""
        jres = json.loads(result)
        if 'error' in jres:
            if jres['error']['message'] == 'Request not made over ssl':
                if url.startswith('http://'):
                    url = url.replace('http://', 'https://')
                    return self._post_multipart(host,
                                                selector,
                                                fields,
                                                files,
                                                ssl=True,
                                                port=port,
                                                proxy_url=proxy_url,
                                                proxy_port=proxy_port)
        return self._unicode_convert(jres)

Пример #5

0

Показать файл

Файл: 2testkunbrowser.py Проект: eox03y/OldWorks

	print "------------------------------------------------------------------"


	client = KunClient(host=HOST, port=PORT) 				# make instance
	#client.conn() 							# conn Method return 1, if success

	client.header.parse(REQMSG)					# make initial Head 0.9
	client.header.add("HTTP_PHONE_NUMBER", MDN)
	client.header.add("HTTP_MDN_INFO", MDN)
	client.header.add("Host", host)

#########
	cj = cookielib.CookieJar()
	cookie_handler = urllib2.HTTPCookieProcessor(cj)
	#proxy_support = urllib2.ProxyHandler({'http':'http://localhost:8080'})
	proxy_support = urllib2.ProxyHandler({'http':'http://127.0.0.1:8080'})
	http_handler = urllib2.HTTPHandler(debuglevel=1000)	# DEBUG SHOW OPTION ON

	handlers = [ http_handler, proxy_support, cookie_handler ]

	#opener = urllib2.build_opener(handler, proxy_support, urllib2.HTTPCookieProcessor(cj))
	opener = urllib2.build_opener(*handlers)
	#opener = urllib2.build_opener(http_handler, urllib2.HTTPCookieProcessor(cj))
	urllib2.install_opener(opener)

	theurl = 'http://localhost:8080'		#TEST PAGES
	theurl = 'http://m.knpu.org'
	theurl = 'http://m.naver.com' 			#naver require exact phone number
	#theurl = 'http://sting.xozen.com'
	#theurl = 'http://kbank.altou.com'
	theurl = 'http://www.magicn.com/'

Пример #6

0

Показать файл

Файл: dyndnsSnortRules.py Проект: avgxm/dyndnsSnortRules

"""
from datetime import datetime
import re
from StringIO import StringIO
import sys
import urllib2
import zipfile

RULE_TEMPLATE = r'alert udp $HOME_NET any -> $DNS_SERVERS 53 (msg:"DNS Query for a dynamic domain <domain_here>"; content:"|01 00 00 01 00 00 00 00 00 00|"; depth:10; offset:2; content:"<content_here>"; fast_pattern; nocase; distance:0; classtype:bad-unknown; sid:<sid_here>; rev:1;)'
STARTING_SID = 1000000
HTTP_PROXY = 'http://127.0.0.1:3128'
FILE_URL = 'http://www.malware-domains.com/files/dynamic_dns.zip'
FILE = 'dynamic_dns.txt'
RULES_FILE = 'local.rules'

proxy_support = urllib2.ProxyHandler({'http': HTTP_PROXY})
opener = urllib2.build_opener(proxy_support)
urllib2.install_opener(opener)

try:
    url = urllib2.urlopen(FILE_URL)
    zf = zipfile.ZipFile(StringIO(url.read()))
except:
    print "Couldn't connect"
    sys.exit(1)

sid = STARTING_SID
timestamp = datetime.utcnow()

with open(RULES_FILE, 'w') as rules_fh:
    rules_fh.write('#autogenerated on ' + timestamp.isoformat('T') + 'Z' +

Пример #7

0

Показать файл

Файл: LogsDownloader.py Проект: xitation/logs-downloader

 def request_file_content(self, url, timeout=20):
     # default value
     response_content = ""
     # if we are using a proxy server - read its configurations
     if self.config.USE_PROXY == "YES":
         proxy_dict = {
             "http": self.config.PROXY_SERVER,
             "https": self.config.PROXY_SERVER,
             "ftp": self.config.PROXY_SERVER
         }
         proxy = urllib2.ProxyHandler(proxy_dict)
         opener = urllib2.build_opener(proxy)
         urllib2.install_opener(opener)
     # build the request
     request = urllib2.Request(url)
     base64string = base64.encodestring(
         "%s:%s" % (self.config.API_ID, self.config.API_KEY)).replace(
             '\n', '')
     request.add_header("Authorization", "Basic %s" % base64string)
     try:
         # open the connection to the URL
         if self.config.USE_CUSTOM_CA_FILE == "YES":
             response = urllib2.urlopen(request,
                                        timeout=timeout,
                                        cafile=self.config.CUSTOM_CA_FILE)
         else:
             response = urllib2.urlopen(request, timeout=timeout)
         # if we got a 200 OK response
         if response.code == 200:
             self.logger.info("Successfully downloaded file from URL %s" %
                              url)
             # read the response content
             response_content = response.read()
         # if we got another response code
         else:
             self.logger.error(
                 "Failed to download file %s. Response code is %s. Info is %s",
                 url, response.code, response.info())
         # close the response
         response.close()
         # return the content string
         return response_content
     # if we got a 401 or 404 responses
     except urllib2.HTTPError, e:
         if e.code == 404:
             self.logger.error(
                 "Could not find file %s. Response code is %s", url, e.code)
             return response_content
         elif e.code == 401:
             self.logger.error(
                 "Authorization error - Failed to download file %s. Response code is %s",
                 url, e.code)
             raise Exception("Authorization error")
         elif e.code == 429:
             self.logger.error(
                 "Rate limit exceeded - Failed to download file %s. Response code is %s",
                 url, e.code)
             raise Exception("Rate limit error")
         else:
             self.logger.error(
                 "An error has occur while making a open connection to %s. %s",
                 url, str(e.code))
             raise Exception("Connection error")

Пример #8

0

Показать файл

 def proxy(self):
     proxy = urllib2.ProxyHandler({'http': '127.0.0.1:8888'})
     opener = urllib2.build_opener(proxy)
     urllib2.install_opener(opener)

Пример #9

0

Показать файл

class SougouSpider:
    # 睡眠时长
    __amount = 0
    __start_amount = 0
    __counter = 0

    def __init__(self,
                 totalPageNum=1,
                 image_path='/data2/xijun.gong/jd_image_data'):
        """
        :param totalPageNum: 下载页数
        :param image_path: 图片放置目录
        """
        self.download = download
        self.proxy_new = proxy.Proxy()
        self.image_path = image_path
        self.__amount = totalPageNum * maxImageNum + self.__start_amount
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36',
            'Qunar-App':
            'SQxTLo6t4k5HSnsykL7nuz9jL/2FvrM9QfppVEbOhQYxIS5tR6I/w3GIq9wpZLbur3Hw7W//Ec+nFnorxB7gTlSSND1Xrbaj3zmRkWAZUaiRm+djpINDhvsYOXlFZHlrQ0BPZ+uZRIn5xnSAfPTpW1xJehqHDr1769Xs0Ly8rZM='
        }

    def __getImg(self, sub_dest_dir, word='dame', extra_word='provice'):
        """
        :param word:搜索的关键字
        """
        _keyword = extra_word.encode('utf-8') + ' ' + word.encode('utf-8')
        # print _keyword;
        search = urllib.pathname2url(str(_keyword))
        # pn int 图片数
        pn = self.__start_amount
        root_path = os.path.join(self.image_path, sub_dest_dir)
        try:
            if not os.path.exists(root_path):
                os.makedirs(root_path)
        except Exception, e:
            print e
            # 判断名字是否重复，获取图片长度
        self.__counter = len(os.listdir(root_path)) + 1
        while pn <= self.__amount and self.__counter <= self.__amount:
            url = 'http://pic.sogou.com/pics?query=' + search + '&mode=1&start=' + str(
                pn) + '&reqType=ajax&reqFrom=result&tn=1'
            # page = None;
            try:
                #使用代理
                proxy = urllib2.ProxyHandler(self.proxy_new.getProxyByTxt())
                opener = urllib2.build_opener(proxy)
                urllib2.install_opener(opener)
                _text = urllib2.Request(url, headers=self.headers)
                data = urllib2.urlopen(_text, timeout=50).read()

                # req = urllib2.Request(url=url, headers=self.headers)
                # page = urllib2.urlopen(req)
                # data = page.read();
                # 解析json
                # print data
                self.__saveImage(root_path, self.parseJson(data, 'thumbUrl'),
                                 word)
                pn += maxImageNum
            except Exception as e:
                print '下载图片异常:', e, url
            finally:
                self.__counter = len(os.listdir(root_path)) + 1
                # if page is not None:
                #     page.close();
        print "下载图片:", word + " 结束"
        pass

Пример #10

0

Показать файл

Файл: client.py Проект: krzysztofuu/Bonitillonew

def request(url,
            close=True,
            redirect=True,
            error=False,
            proxy=None,
            post=None,
            headers=None,
            mobile=False,
            XHR=False,
            limit=None,
            referer=None,
            cookie=None,
            output='',
            timeout=None):
    try:
        handlers = []
        if timeout == '' or timeout == None: timeout = '30'

        if not proxy == None:
            handlers += [
                urllib2.ProxyHandler({'http': '%s' % (proxy)}),
                urllib2.HTTPHandler
            ]
            opener = urllib2.build_opener(*handlers)
            opener = urllib2.install_opener(opener)

        if output == 'cookie' or output == 'extended' or not close == True:
            cookies = cookielib.LWPCookieJar()
            handlers += [
                urllib2.HTTPHandler(),
                urllib2.HTTPSHandler(),
                urllib2.HTTPCookieProcessor(cookies)
            ]
            opener = urllib2.build_opener(*handlers)
            opener = urllib2.install_opener(opener)

        try:
            if sys.version_info < (2, 7, 9): raise Exception()
            import ssl
            ssl_context = ssl.create_default_context()
            ssl_context.check_hostname = False
            ssl_context.verify_mode = ssl.CERT_NONE
            handlers += [urllib2.HTTPSHandler(context=ssl_context)]
            opener = urllib2.build_opener(*handlers)
            opener = urllib2.install_opener(opener)
        except:
            pass

        try:
            headers.update(headers)
        except:
            headers = {}
        if 'User-Agent' in headers:
            pass
        elif not mobile == True:
            #headers['User-Agent'] = agent()
            headers['User-Agent'] = cache.get(randomagent, 1)
        else:
            headers['User-Agent'] = 'Apple-iPhone/701.341'
        if 'Referer' in headers:
            pass
        elif referer == None:
            headers['Referer'] = '%s://%s/' % (urlparse.urlparse(url).scheme,
                                               urlparse.urlparse(url).netloc)
        else:
            headers['Referer'] = referer
        if not 'Accept-Language' in headers:
            headers['Accept-Language'] = 'en-US'
        if 'X-Requested-With' in headers:
            pass
        elif XHR == True:
            headers['X-Requested-With'] = 'XMLHttpRequest'
        if 'Cookie' in headers:
            pass
        elif not cookie == None:
            headers['Cookie'] = cookie

        if redirect == False:

            class NoRedirection(urllib2.HTTPErrorProcessor):
                def http_response(self, request, response):
                    return response

            opener = urllib2.build_opener(NoRedirection)
            opener = urllib2.install_opener(opener)

            try:
                del headers['Referer']
            except:
                pass

        request = urllib2.Request(url, data=post, headers=headers)

        try:
            response = urllib2.urlopen(request, timeout=int(timeout))
        except urllib2.HTTPError as response:

            if response.code == 503:
                cf_result = response.read(5242880)
                try:
                    encoding = response.info().getheader('Content-Encoding')
                except:
                    encoding = None
                if encoding == 'gzip':
                    cf_result = gzip.GzipFile(
                        fileobj=StringIO.StringIO(cf_result)).read()

                if 'cf-browser-verification' in cf_result:

                    netloc = '%s://%s' % (urlparse.urlparse(url).scheme,
                                          urlparse.urlparse(url).netloc)

                    ua = headers['User-Agent']

                    cf = cache.get(cfcookie().get, 168, netloc, ua, timeout)

                    headers['Cookie'] = cf

                    request = urllib2.Request(url, data=post, headers=headers)

                    response = urllib2.urlopen(request, timeout=int(timeout))

                elif error == False:
                    return

            elif error == False:
                return

        if output == 'cookie':
            try:
                result = '; '.join(
                    ['%s=%s' % (i.name, i.value) for i in cookies])
            except:
                pass
            try:
                result = cf
            except:
                pass
            if close == True: response.close()
            return result

        elif output == 'geturl':
            result = response.geturl()
            if close == True: response.close()
            return result

        elif output == 'headers':
            result = response.headers
            if close == True: response.close()
            return result

        elif output == 'chunk':
            try:
                content = int(response.headers['Content-Length'])
            except:
                content = (2049 * 1024)
            if content < (2048 * 1024): return
            result = response.read(16 * 1024)
            if close == True: response.close()
            return result

        if limit == '0':
            result = response.read(224 * 1024)
        elif not limit == None:
            result = response.read(int(limit) * 1024)
        else:
            result = response.read(5242880)

        try:
            encoding = response.info().getheader('Content-Encoding')
        except:
            encoding = None
        if encoding == 'gzip':
            result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()

        if 'sucuri_cloudproxy_js' in result:
            su = sucuri().get(result)

            headers['Cookie'] = su

            request = urllib2.Request(url, data=post, headers=headers)

            response = urllib2.urlopen(request, timeout=int(timeout))

            if limit == '0':
                result = response.read(224 * 1024)
            elif not limit == None:
                result = response.read(int(limit) * 1024)
            else:
                result = response.read(5242880)

            try:
                encoding = response.info().getheader('Content-Encoding')
            except:
                encoding = None
            if encoding == 'gzip':
                result = gzip.GzipFile(
                    fileobj=StringIO.StringIO(result)).read()

        if output == 'extended':
            response_headers = response.headers
            response_code = str(response.code)
            try:
                cookie = '; '.join(
                    ['%s=%s' % (i.name, i.value) for i in cookies])
            except:
                pass
            try:
                cookie = cf
            except:
                pass
            if close == True: response.close()
            return (result, response_code, response_headers, headers, cookie)
        else:
            if close == True: response.close()
            return result
    except:
        return

Пример #11

0

Показать файл

Файл: gethttpUN.py Проект: umbertopassanisi/SPI

import sys
import os
import urllib2
import glob
import FileAccess
import time

Path = sys.argv[1]
G_startYear = sys.argv[2]
G_endYear = sys.argv[3]

#proxy : http://autoproxy.cec.eu.int/proxy.pac

proxy_url = "http://*****:*****@147.67.138.13:8012"  #proxy BXL
proxy_urls = "http://*****:*****@147.67.117.13:8012"  #proxy LUX
proxy_support = urllib2.ProxyHandler({'https': proxy_urls})
opener = urllib2.build_opener(proxy_support)
urllib2.install_opener(opener)

dirUse = Path
dirLog = dirUse + '\\Log'
dirTXT = dirUse + '\\Input\\txt'
fichiersXML = glob.glob(dirUse + '\\Input\\xml\\*.xml')
dirFile = dirUse + '\\Input'


def getFileUN(startYear, endYear):
    nbrYear = int(endYear) - int(startYear) + 1
    code = 'K9tEO6K2x1gJoFnXR/qcd8gwzQgyXpkLcugnAN4Wj45g2jtfyj/9S6GqzH+KozrFerR4R4igrn717EjaBxQkgJKQts61M1U+dVxcdkRPZzGxClkhvNSLyxdp5OoXJ256L6xAvOpc/jJnvP0ZzLfeDsSN8CeXx+pvnTYCJbCbU/Y='
    cc = '??????'
    px = 'H0'

Пример #12

0

Показать файл

Файл: urls.py Проект: zatekusen/ansible

def fetch_url(module,
              url,
              data=None,
              headers=None,
              method=None,
              use_proxy=True,
              force=False,
              last_mod_time=None,
              timeout=10):
    '''
    Fetches a file from an HTTP/FTP server using urllib2
    '''

    if not HAS_URLLIB:
        module.fail_json(msg='urllib is not installed')
    if not HAS_URLLIB2:
        module.fail_json(msg='urllib2 is not installed')
    elif not HAS_URLPARSE:
        module.fail_json(msg='urlparse is not installed')

    r = None
    handlers = []
    info = dict(url=url)

    distribution = get_distribution()
    # Get validate_certs from the module params
    validate_certs = module.params.get('validate_certs', True)

    # FIXME: change the following to use the generic_urlparse function
    #        to remove the indexed references for 'parsed'
    parsed = urlparse.urlparse(url)
    if parsed[0] == 'https':
        if not HAS_SSL and validate_certs:
            if distribution == 'Redhat':
                module.fail_json(
                    msg=
                    'SSL validation is not available in your version of python. You can use validate_certs=no, however this is unsafe and not recommended. You can also install python-ssl from EPEL'
                )
            else:
                module.fail_json(
                    msg=
                    'SSL validation is not available in your version of python. You can use validate_certs=no, however this is unsafe and not recommended'
                )

        elif validate_certs:
            # do the cert validation
            netloc = parsed[1]
            if '@' in netloc:
                netloc = netloc.split('@', 1)[1]
            if ':' in netloc:
                hostname, port = netloc.split(':', 1)
            else:
                hostname = netloc
                port = 443
            # create the SSL validation handler and
            # add it to the list of handlers
            ssl_handler = SSLValidationHandler(module, hostname, port)
            handlers.append(ssl_handler)

    if parsed[0] != 'ftp':
        username = module.params.get('url_username', '')
        if username:
            password = module.params.get('url_password', '')
            netloc = parsed[1]
        elif '@' in parsed[1]:
            credentials, netloc = parsed[1].split('@', 1)
            if ':' in credentials:
                username, password = credentials.split(':', 1)
            else:
                username = credentials
                password = ''

            parsed = list(parsed)
            parsed[1] = netloc

            # reconstruct url without credentials
            url = urlparse.urlunparse(parsed)

        if username:
            passman = urllib2.HTTPPasswordMgrWithDefaultRealm()

            # this creates a password manager
            passman.add_password(None, netloc, username, password)

            # because we have put None at the start it will always
            # use this username/password combination for  urls
            # for which `theurl` is a super-url
            authhandler = urllib2.HTTPBasicAuthHandler(passman)

            # create the AuthHandler
            handlers.append(authhandler)

    if not use_proxy:
        proxyhandler = urllib2.ProxyHandler({})
        handlers.append(proxyhandler)

    # pre-2.6 versions of python cannot use the custom https
    # handler, since the socket class is lacking this method
    if hasattr(socket, 'create_connection'):
        handlers.append(CustomHTTPSHandler)

    opener = urllib2.build_opener(*handlers)
    urllib2.install_opener(opener)

    if method:
        if method.upper() not in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT',
                                  'DELETE', 'TRACE', 'CONNECT'):
            module.fail_json(msg='invalid HTTP request method; %s' %
                             method.upper())
        request = RequestWithMethod(url, method.upper(), data)
    else:
        request = urllib2.Request(url, data)

    # add the custom agent header, to help prevent issues
    # with sites that block the default urllib agent string
    request.add_header('User-agent', module.params.get('http_agent'))

    # if we're ok with getting a 304, set the timestamp in the
    # header, otherwise make sure we don't get a cached copy
    if last_mod_time and not force:
        tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000')
        request.add_header('If-Modified-Since', tstamp)
    else:
        request.add_header('cache-control', 'no-cache')

    # user defined headers now, which may override things we've set above
    if headers:
        if not isinstance(headers, dict):
            module.fail_json("headers provided to fetch_url() must be a dict")
        for header in headers:
            request.add_header(header, headers[header])

    try:
        if sys.version_info < (2, 6, 0):
            # urlopen in python prior to 2.6.0 did not
            # have a timeout parameter
            r = urllib2.urlopen(request, None)
        else:
            r = urllib2.urlopen(request, None, timeout)
        info.update(r.info())
        info['url'] = r.geturl()  # The URL goes in too, because of redirects.
        info.update(
            dict(msg="OK (%s bytes)" %
                 r.headers.get('Content-Length', 'unknown'),
                 status=200))
    except urllib2.HTTPError, e:
        info.update(dict(msg=str(e), status=e.code))

Пример #13

0

Показать файл

Файл: regex.py Проект: idaviesfmts/hmdsm.repository

def getRegexParsed(
        regexs,
        url,
        cookieJar=None,
        forCookieJarOnly=False,
        recursiveCall=False,
        cachedPages={},
        rawPost=False,
        cookie_jar_file=None):  #0,1,2 = URL, regexOnly, CookieJarOnly
    #cachedPages = {}
    #print 'url',url
    doRegexs = re.compile('\$doregex\[([^\]]*)\]').findall(url)
    #        print 'doRegexs',doRegexs,regexs
    setresolved = True
    for k in doRegexs:
        if k in regexs:
            #print 'processing ' ,k
            m = regexs[k]
            #print m
            cookieJarParam = False
            if 'cookiejar' in m:  # so either create or reuse existing jar
                #print 'cookiejar exists',m['cookiejar']
                cookieJarParam = m['cookiejar']
                if '$doregex' in cookieJarParam:
                    cookieJar = getRegexParsed(regexs, m['cookiejar'],
                                               cookieJar, True, True,
                                               cachedPages)
                    cookieJarParam = True
                else:
                    cookieJarParam = True
            #print 'm[cookiejar]',m['cookiejar'],cookieJar
            if cookieJarParam:
                if cookieJar == None:
                    #print 'create cookie jar'
                    cookie_jar_file = None
                    if 'open[' in m['cookiejar']:
                        cookie_jar_file = m['cookiejar'].split(
                            'open[')[1].split(']')[0]
#                            print 'cookieJar from file name',cookie_jar_file

                    cookieJar = getCookieJar(cookie_jar_file)
                    #                        print 'cookieJar from file',cookieJar
                    if cookie_jar_file:
                        saveCookieJar(cookieJar, cookie_jar_file)
                    #import cookielib
                    #cookieJar = cookielib.LWPCookieJar()
                    #print 'cookieJar new',cookieJar
                elif 'save[' in m['cookiejar']:
                    cookie_jar_file = m['cookiejar'].split('save[')[1].split(
                        ']')[0]
                    complete_path = os.path.join(profile, cookie_jar_file)
                    #                        print 'complete_path',complete_path
                    saveCookieJar(cookieJar, cookie_jar_file)

            if m['page'] and '$doregex' in m['page']:
                pg = getRegexParsed(regexs,
                                    m['page'],
                                    cookieJar,
                                    recursiveCall=True,
                                    cachedPages=cachedPages)
                if len(pg) == 0:
                    pg = 'http://regexfailed'
                m['page'] = pg

            if 'setcookie' in m and m['setcookie'] and '$doregex' in m[
                    'setcookie']:
                m['setcookie'] = getRegexParsed(regexs,
                                                m['setcookie'],
                                                cookieJar,
                                                recursiveCall=True,
                                                cachedPages=cachedPages)
            if 'appendcookie' in m and m['appendcookie'] and '$doregex' in m[
                    'appendcookie']:
                m['appendcookie'] = getRegexParsed(regexs,
                                                   m['appendcookie'],
                                                   cookieJar,
                                                   recursiveCall=True,
                                                   cachedPages=cachedPages)

            if 'post' in m and '$doregex' in m['post']:
                m['post'] = getRegexParsed(regexs,
                                           m['post'],
                                           cookieJar,
                                           recursiveCall=True,
                                           cachedPages=cachedPages)
#                    print 'post is now',m['post']

            if 'rawpost' in m and '$doregex' in m['rawpost']:
                m['rawpost'] = getRegexParsed(regexs,
                                              m['rawpost'],
                                              cookieJar,
                                              recursiveCall=True,
                                              cachedPages=cachedPages,
                                              rawPost=True)
                #print 'rawpost is now',m['rawpost']

            if 'rawpost' in m and '$epoctime$' in m['rawpost']:
                m['rawpost'] = m['rawpost'].replace('$epoctime$',
                                                    getEpocTime())

            if 'rawpost' in m and '$epoctime2$' in m['rawpost']:
                m['rawpost'] = m['rawpost'].replace('$epoctime2$',
                                                    getEpocTime2())

            link = ''
            if m['page'] and m[
                    'page'] in cachedPages and not 'ignorecache' in m and forCookieJarOnly == False:
                #print 'using cache page',m['page']
                link = cachedPages[m['page']]
            else:
                if m['page'] and not m['page'] == '' and m['page'].startswith(
                        'http'):
                    if '$epoctime$' in m['page']:
                        m['page'] = m['page'].replace('$epoctime$',
                                                      getEpocTime())
                    if '$epoctime2$' in m['page']:
                        m['page'] = m['page'].replace('$epoctime2$',
                                                      getEpocTime2())

                    #print 'Ingoring Cache',m['page']
                    page_split = m['page'].split('|')
                    pageUrl = page_split[0]
                    header_in_page = None
                    if len(page_split) > 1:
                        header_in_page = page_split[1]

#                            if
#                            proxy = urllib2.ProxyHandler({ ('https' ? proxytouse[:5]=="https":"http") : proxytouse})
#                            opener = urllib2.build_opener(proxy)
#                            urllib2.install_opener(opener)

#                        import urllib2
#                        print 'urllib2.getproxies',urllib2.getproxies()
                    current_proxies = urllib2.ProxyHandler(
                        urllib2.getproxies())

                    #print 'getting pageUrl',pageUrl
                    req = urllib2.Request(pageUrl)
                    if 'proxy' in m:
                        proxytouse = m['proxy']
                        #                            print 'proxytouse',proxytouse
                        #                            urllib2.getproxies= lambda: {}
                        if pageUrl[:5] == "https":
                            proxy = urllib2.ProxyHandler({'https': proxytouse})
                            #req.set_proxy(proxytouse, 'https')
                        else:
                            proxy = urllib2.ProxyHandler({'http': proxytouse})
                            #req.set_proxy(proxytouse, 'http')
                        opener = urllib2.build_opener(proxy)
                        urllib2.install_opener(opener)

                    req.add_header(
                        'User-Agent',
                        'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1'
                    )
                    proxytouse = None

                    if 'referer' in m:
                        req.add_header('Referer', m['referer'])
                    if 'accept' in m:
                        req.add_header('Accept', m['accept'])
                    if 'agent' in m:
                        req.add_header('User-agent', m['agent'])
                    if 'x-req' in m:
                        req.add_header('X-Requested-With', m['x-req'])
                    if 'x-addr' in m:
                        req.add_header('x-addr', m['x-addr'])
                    if 'x-forward' in m:
                        req.add_header('X-Forwarded-For', m['x-forward'])
                    if 'setcookie' in m:
                        #                            print 'adding cookie',m['setcookie']
                        req.add_header('Cookie', m['setcookie'])
                    if 'appendcookie' in m:
                        #                            print 'appending cookie to cookiejar',m['appendcookie']
                        cookiestoApend = m['appendcookie']
                        cookiestoApend = cookiestoApend.split(';')
                        for h in cookiestoApend:
                            n, v = h.split('=')
                            w, n = n.split(':')
                            ck = cookielib.Cookie(version=0,
                                                  name=n,
                                                  value=v,
                                                  port=None,
                                                  port_specified=False,
                                                  domain=w,
                                                  domain_specified=False,
                                                  domain_initial_dot=False,
                                                  path='/',
                                                  path_specified=True,
                                                  secure=False,
                                                  expires=None,
                                                  discard=True,
                                                  comment=None,
                                                  comment_url=None,
                                                  rest={'HttpOnly': None},
                                                  rfc2109=False)
                            cookieJar.set_cookie(ck)
                    if 'origin' in m:
                        req.add_header('Origin', m['origin'])
                    if header_in_page:
                        header_in_page = header_in_page.split('&')
                        for h in header_in_page:
                            n, v = h.split('=')
                            req.add_header(n, v)

                    if not cookieJar == None:
                        #                            print 'cookieJarVal',cookieJar
                        cookie_handler = urllib2.HTTPCookieProcessor(cookieJar)
                        opener = urllib2.build_opener(
                            cookie_handler, urllib2.HTTPBasicAuthHandler(),
                            urllib2.HTTPHandler())
                        opener = urllib2.install_opener(opener)
                        #                            print 'noredirect','noredirect' in m

                        if 'noredirect' in m:
                            opener = urllib2.build_opener(
                                cookie_handler, NoRedirection,
                                urllib2.HTTPBasicAuthHandler(),
                                urllib2.HTTPHandler())
                            opener = urllib2.install_opener(opener)
                    elif 'noredirect' in m:
                        opener = urllib2.build_opener(
                            NoRedirection, urllib2.HTTPBasicAuthHandler(),
                            urllib2.HTTPHandler())
                        opener = urllib2.install_opener(opener)

                    if 'connection' in m:
                        #                            print '..........................connection//////.',m['connection']
                        from keepalive import HTTPHandler
                        keepalive_handler = HTTPHandler()
                        opener = urllib2.build_opener(keepalive_handler)
                        urllib2.install_opener(opener)

                    #print 'after cookie jar'
                    post = None

                    if 'post' in m:
                        postData = m['post']
                        #if '$LiveStreamRecaptcha' in postData:
                        #    (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar)
                        #    if captcha_challenge:
                        #        postData=postData.replace('$LiveStreamRecaptcha','manual_recaptcha_challenge_field:'+captcha_challenge+',recaptcha_response_field:'+catpcha_word+',id:'+idfield)
                        splitpost = postData.split(',')
                        post = {}
                        for p in splitpost:
                            n = p.split(':')[0]
                            v = p.split(':')[1]
                            post[n] = v
                        post = urllib.urlencode(post)

                    if 'rawpost' in m:
                        post = m['rawpost']
                        #if '$LiveStreamRecaptcha' in post:
                        #    (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar)
                        #    if captcha_challenge:
                        #       post=post.replace('$LiveStreamRecaptcha','&manual_recaptcha_challenge_field='+captcha_challenge+'&recaptcha_response_field='+catpcha_word+'&id='+idfield)
                    link = ''
                    try:

                        if post:
                            response = urllib2.urlopen(req, post)
                        else:
                            response = urllib2.urlopen(req)
                        if response.info().get('Content-Encoding') == 'gzip':
                            from StringIO import StringIO
                            import gzip
                            buf = StringIO(response.read())
                            f = gzip.GzipFile(fileobj=buf)
                            link = f.read()
                        else:
                            link = response.read()

                        if 'proxy' in m and not current_proxies is None:
                            urllib2.install_opener(
                                urllib2.build_opener(current_proxies))

                        link = javascriptUnEscape(link)
                        #print repr(link)
                        #print link This just print whole webpage in LOG
                        if 'includeheaders' in m:
                            #link+=str(response.headers.get('Set-Cookie'))
                            link += '$$HEADERS_START$$:'
                            for b in response.headers:
                                link += b + ':' + response.headers.get(
                                    b) + '\n'
                            link += '$$HEADERS_END$$:'

#                        print link

                        response.close()
                    except:
                        pass
                    cachedPages[m['page']] = link
                    #print link
                    #print 'store link for',m['page'],forCookieJarOnly

                    if forCookieJarOnly:
                        return cookieJar  # do nothing
                elif m['page'] and not m['page'].startswith('http'):
                    if m['page'].startswith('$pyFunction:'):
                        val = doEval(m['page'].split('$pyFunction:')[1], '',
                                     cookieJar, m)
                        if forCookieJarOnly:
                            return cookieJar  # do nothing
                        link = val
                        link = javascriptUnEscape(link)
                    else:
                        link = m['page']

            if '$doregex' in m['expres']:
                m['expres'] = getRegexParsed(regexs,
                                             m['expres'],
                                             cookieJar,
                                             recursiveCall=True,
                                             cachedPages=cachedPages)

            if not m['expres'] == '':
                #print 'doing it ',m['expres']
                if '$LiveStreamCaptcha' in m['expres']:
                    val = askCaptcha(m, link, cookieJar)
                    #print 'url and val',url,val
                    url = url.replace("$doregex[" + k + "]", val)

                elif m['expres'].startswith(
                        '$pyFunction:') or '#$pyFunction' in m['expres']:
                    #print 'expeeeeeeeeeeeeeeeeeee',m['expres']
                    val = ''
                    if m['expres'].startswith('$pyFunction:'):
                        val = doEval(m['expres'].split('$pyFunction:')[1],
                                     link, cookieJar, m)
                    else:
                        val = doEvalFunction(m['expres'], link, cookieJar, m)
                    if 'ActivateWindow' in m['expres']: return
                    if forCookieJarOnly:
                        return cookieJar  # do nothing
                    if 'listrepeat' in m:
                        listrepeat = m['listrepeat']
                        return listrepeat, eval(val), m, regexs, cookieJar

                    try:
                        url = url.replace(u"$doregex[" + k + "]", val)
                    except:
                        url = url.replace("$doregex[" + k + "]",
                                          val.decode("utf-8"))
                else:
                    if 'listrepeat' in m:
                        listrepeat = m['listrepeat']
                        ret = re.findall(m['expres'], link)
                        return listrepeat, ret, m, regexs

                    val = ''
                    if not link == '':
                        #print 'link',link
                        reg = re.compile(m['expres']).search(link)
                        try:
                            val = reg.group(1).strip()
                        except:
                            traceback.print_exc()
                    elif m['page'] == '' or m['page'] == None:
                        val = m['expres']

                    if rawPost:
                        #                            print 'rawpost'
                        val = urllib.quote_plus(val)
                    if 'htmlunescape' in m:
                        #val=urllib.unquote_plus(val)
                        import HTMLParser
                        val = HTMLParser.HTMLParser().unescape(val)
                    try:
                        url = url.replace("$doregex[" + k + "]", val)
                    except:
                        url = url.replace("$doregex[" + k + "]",
                                          val.decode("utf-8"))
                    #print 'ur',url
                    #return val
            else:
                url = url.replace("$doregex[" + k + "]", '')
    if '$epoctime$' in url:
        url = url.replace('$epoctime$', getEpocTime())
    if '$epoctime2$' in url:
        url = url.replace('$epoctime2$', getEpocTime2())

    if '$GUID$' in url:
        import uuid
        url = url.replace('$GUID$', str(uuid.uuid1()).upper())
    if '$get_cookies$' in url:
        url = url.replace('$get_cookies$', getCookiesString(cookieJar))

    if recursiveCall: return url
    #print 'final url',repr(url)
    if url == "":
        return
    else:
        return url, setresolved

Пример #14

0

Показать файл

Файл: base.py Проект: vadyalex/hitta-dev-web

        '-Dapplication.url=%s://localhost:%s' % (protocol, http_port),
        'play.modules.testrunner.FirePhoque'
    ]
    try:
        subprocess.call(java_cmd, env=os.environ)
    except OSError:
        print "Could not execute the headless browser. "
        sys.exit(-1)

    print "~"
    time.sleep(1)

    # Kill if exists
    http_port = app.readConf('http.port')
    try:
        proxy_handler = urllib2.ProxyHandler({})
        opener = urllib2.build_opener(proxy_handler)
        opener.open('%s://localhost:%s/@kill' % (protocol, http_port))
    except Exception, e:
        pass

    if os.path.exists(os.path.join(app.path, 'test-result/result.passed')):
        print "~ All tests passed"
        print "~"
        testspassed = True
    if os.path.exists(os.path.join(app.path, 'test-result/result.failed')):
        print "~ Some tests have failed. See file://%s for results" % test_result
        print "~"
        sys.exit(1)

Пример #15

0

Показать файл

Файл: 爬虫2.py Проект: weiminlee1/weimin-python-document

#-*- conding:utf-8 -*-
"""
1,Proxy的设置
urllib2默认会使用环境变量http_proxy来设置HTTP Proxy。
如果想在程序中明确控制Proxy而不受环境变量的影响，可以使用代理。

"""
##例1

import urllib2
enable_proxy = True
proxy_handler = urllib2.ProxyHandler({'http':'http://some-proxy.com:2222'})
null_proxy_handler = urllib2.ProxyHandler({})

if enable_proxy:
  opener = urllib2.build_opener(proxy_handler)
else:
  opener = urllib2.build_opener(null_proxy_handler)
  
urllib2.install_opener(opener)
'''使用urllib2.install_opener(opener)会设置urllib2的全局opener，
比较好的办法是不使用install_opener去更改全局的设置，而只是直接调用opener.open(url)
的方法代替全局的urlopen()方法'''

"""
2,Timeout设置
urllib2的API并没有暴露Timeout的设置，要设置Timeout值，只能更改Socket的全局Timeout值。

"""
##例2
import urllib2

Пример #16

0

Показать файл

                if continue_tests == True:
                    settings.IGNORE_ERR_MSG = True
                else:
                    raise SystemExit()
            response = False
        except urllib2.URLError, err:
            if "Connection refused" in err.reason:
                print "\n" + Back.RED + "(x) Critical: The target host is not responding." + \
                      " Please ensure that is up and try again." + Style.RESET_ALL
            raise SystemExit()

    # Check if defined Tor.
    elif menu.options.tor:
        try:
            proxy = urllib2.ProxyHandler({
                settings.PROXY_PROTOCOL:
                settings.PRIVOXY_IP + ":" + PRIVOXY_PORT
            })
            response = inject_cookie(url, vuln_parameter, payload, proxy)
        except urllib2.HTTPError, err:
            if settings.IGNORE_ERR_MSG == False:
                print "\n" + Back.RED + "(x) Error: " + str(
                    err) + Style.RESET_ALL
                continue_tests = checks.continue_tests(err)
                if continue_tests == True:
                    settings.IGNORE_ERR_MSG = True
                else:
                    raise SystemExit()
            response = False
        except urllib2.URLError, err:
            if "Connection refused" in err.reason:
                print "\n" + Back.RED + "(x) Critical: The target host is not responding." + \

Пример #17

0

Показать файл

    def run(self, obj, config):
        key = config.get('vt_api_key', '')
        url = config.get('vt_download_url', '')
        sizeLimit = config.get('size_limit', '')
        replace = config.get('replace_sample', False)
        do_triage = config.get('run_triage', False)

        user = self.current_task.user
        sample = Sample.objects(md5=obj.md5).first()
        if not sample:
            sample = Sample()
            sample.md5 = md5_digest
        self._info("Checking if binary already exists in CRITs.")
        sample.discover_binary()

        if sample.filedata and replace == False:  #if we already have this binary and don't have permission to replace
            self._info(
                "CRITs already has this binary. Enable the 'Replace' option to overwrite with data from VirusTotal."
            )
            self._add_result("Download Canceled",
                             "Binary already exists in CRITs.")
            return

        if not user.has_access_to(SampleACL.WRITE):
            self._info("User does not have permission to add Samples to CRITs")
            self._add_result(
                "Download Canceled",
                "User does not have permission to add Samples to CRITs")
            return

        parameters = urllib.urlencode({"hash": obj.md5, "apikey": key})
        if settings.HTTP_PROXY:
            proxy = urllib2.ProxyHandler({
                'http': settings.HTTP_PROXY,
                'https': settings.HTTP_PROXY
            })
            opener = urllib2.build_opener(proxy)
            urllib2.install_opener(opener)

        try:
            req = url + "?" + parameters
            self._info(
                "Requesting binary with md5 '{0}' from VirusTotal.".format(
                    obj.md5))
            request = urllib2.Request(req)
            response = urllib2.urlopen(request)
            size = response.info().getheaders("Content-Length")[0]
            self._info("Binary size: {0} bytes".format(size))

            if int(size) > sizeLimit:  # Check if within size limit
                self._error(
                    "Binary size is {0} bytes, which is greater than maximum of {1} bytes. This limit can be changed in options."
                    .format(size, sizeLimit))
                self._add_result(
                    "Download Aborted",
                    "Match found, but binary is larger than maximum size limit."
                )
                return

            data = response.read()
        except urllib2.HTTPError as e:
            if e.code == 404:
                self._info(
                    "No results were returned. Either VirusTotal does not have the requested binary, or the request URL is incorrect."
                )
                self._add_result(
                    "Not Found",
                    "Binary was not found in the VirusTotal database")
            elif e.code == 403:
                self._error("Download forbidden. {0}".format(e))
                self._add_result(
                    "Download Canceled",
                    "CRITs was forbidden from downloading the binary.")
            else:
                self._error("An HTTP Error occurred: {0}".format(e))
            return
        except Exception as e:
            logger.error("VirusTotal: Failed connection ({0})".format(e))
            self._error("Failed to get data from VirusTotal: {0}".format(e))
            return

        if data:  # Retrieved some data from VT
            if replace == True:
                try:
                    self._info(
                        "Replace = True. Deleting any previous binary with md5 {0}"
                        .format(obj.md5))
                    sample.filedata.delete()
                except Exception as e:
                    logger.error(
                        "VirusTotal: Error deleting existing binary ({0})".
                        format(e))
                    self._error("Failed to delete existing binary")
            self._info("Adding new binary to CRITs.")

            try:
                handle_file(filename=obj.md5,
                            data=data,
                            source="VirusTotal",
                            reference="Binary downloaded from VT based on MD5",
                            user="******",
                            method="VirusTotal Download Service",
                            md5_digest=obj.md5)
            except Exception as e:
                logger.error(
                    "VirusTotal: Sample creation failed ({0})".format(e))
                self._error("Failed to create new Sample: {0}".format(e))
                return
            if do_triage:
                self._info("Running sample triage for data-reliant services.")
                sample.reload()
                run_triage(sample, user="******")
            self._add_result(
                "Download Successful",
                "Binary was successfully downloaded from VirusTotal")
        else:
            self._error("No data returned by VirusTotal.")

Пример #18

0

Показать файл

Файл: wpbf.py Проект: xbyteid/wpbf

        username = sys.argv[count + 1]
    elif arg == '-U':
        userFile = sys.argv[count + 1]
    elif arg == '-p':
        password = sys.argv[count + 1]
    elif arg == '-P':
        passFile = sys.argv[count + 1]
    elif arg == '-v':
        verbose = 1
    elif arg == '-s':
        signal = sys.argv[count + 1]
    elif arg == '-g':
        agent = sys.argv[count + 1]
    elif arg == '-x':
        lstTmp = sys.argv[count + 1].split(':')
        proxyHandler = urllib2.ProxyHandler(
            {lstTmp[0]: lstTmp[1] + ':' + lstTmp[2]})
        useProxy = 1
    elif arg == '-f':
        continues = 1
    count += 1

if (len(username) > 0 and len(password) > 0):
    mode = 1  #single
elif (len(username) > 0 and len(passFile) > 0):
    mode = 2  #
elif (len(userFile) > 0 and len(password) > 0):
    mode = 3
elif (len(userFile) > 0 and len(passFile) > 0):
    mode = 4

#

Пример #19

0

Показать файл

#-*-coding:utf-8-*-
'''
rllib2 默认会使用环境变量 http_proxy 来设置 HTTP Proxy。
假如一个网站它会检测某一段时间某个IP 的访问次数，
如果访问次数过多，它会禁止你的访问。
所以你可以设置一些代理服务器来帮助你做工作，
每隔一段时间换一个代理，
网站君都不知道是谁在捣鬼了，这酸爽！

下面一段代码说明了代理的设置用法

'''

import urllib2

enable_proxy = True
#使用代理
proxy_handler = urllib2.ProxyHandler({'http': '180.168.179.193:8080'})
#不使用代理
null_proxy_handler = urllib2.ProxyHandler({})
if enable_proxy:
    opener = urllib2.build_opener(proxy_handler)
else:
    opener = urllib2.build_opener(null_proxy_handler)
urllib2.install_opener(opener)

response = urllib2.urlopen('http://www.baidu.com')
print response.read()

Пример #20

0

Показать файл

Файл: das_client.py Проект: ClydeHuibregtse/dynamo

def get_data(host,
             query,
             idx,
             limit,
             debug,
             threshold=300,
             ckey=None,
             cert=None,
             das_headers=True):
    """Contact DAS server and retrieve data for given DAS query"""
    params = {'input': query, 'idx': idx, 'limit': limit}
    path = '/das/cache'
    pat = re.compile('http[s]{0,1}://')
    if not pat.match(host):
        msg = 'Invalid hostname: %s' % host
        raise Exception(msg)
    url = host + path
    client = '%s (%s)' % (DAS_CLIENT, os.environ.get('USER', ''))
    headers = {"Accept": "application/json", "User-Agent": client}
    encoded_data = urllib.urlencode(params, doseq=True)
    url += '?%s' % encoded_data
    req = urllib2.Request(url=url, headers=headers)
    if ckey and cert:
        ckey = fullpath(ckey)
        cert = fullpath(cert)
        http_hdlr = HTTPSClientAuthHandler(ckey, cert, debug)
    else:
        http_hdlr = urllib2.HTTPHandler(debuglevel=debug)
    proxy_handler = urllib2.ProxyHandler({})
    cookie_jar = cookielib.CookieJar()
    cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar)
    opener = urllib2.build_opener(http_hdlr, proxy_handler, cookie_handler)
    fdesc = opener.open(req)
    data = fdesc.read()
    fdesc.close()

    pat = re.compile(r'^[a-z0-9]{32}')
    if data and isinstance(data, str) and pat.match(data) and len(data) == 32:
        pid = data
    else:
        pid = None
    iwtime = 2  # initial waiting time in seconds
    wtime = 20  # final waiting time in seconds
    sleep = iwtime
    time0 = time.time()
    while pid:
        params.update({'pid': data})
        encoded_data = urllib.urlencode(params, doseq=True)
        url = host + path + '?%s' % encoded_data
        req = urllib2.Request(url=url, headers=headers)
        try:
            fdesc = opener.open(req)
            data = fdesc.read()
            fdesc.close()
        except urllib2.HTTPError as err:
            return {"status": "fail", "reason": str(err)}
        if data and isinstance(data,
                               str) and pat.match(data) and len(data) == 32:
            pid = data
        else:
            pid = None
        time.sleep(sleep)
        if sleep < wtime:
            sleep *= 2
        elif sleep == wtime:
            sleep = iwtime  # start new cycle
        else:
            sleep = wtime
        if (time.time() - time0) > threshold:
            reason = "client timeout after %s sec" % int(time.time() - time0)
            return {"status": "fail", "reason": reason}
    jsondict = json.loads(data)
    return jsondict

Пример #21

0

Показать файл

Файл: _base.py Проект: guyn2gis/ArcREST

    def _download_file_stnd(self,
                            url,
                            save_path,
                            file_name=None,
                            param_dict=None,
                            proxy_url=None,
                            proxy_port=None):
        """ downloads a file """
        try:
            #if url.find("http://") > -1:
            #    url = url.replace("http://", "https://")
            if proxy_url is not None:
                if proxy_port is None:
                    proxy_port = 80
                proxies = {
                    "http": "http://%s:%s" % (proxy_url, proxy_port),
                    "https": "https://%s:%s" % (proxy_url, proxy_port)
                }
                proxy_support = urllib2.ProxyHandler(proxies)
                opener = urllib2.build_opener(
                    proxy_support, urllib2.HTTPHandler(debuglevel=0),
                    AGOLRedirectHandler())
                urllib2.install_opener(opener)
            else:
                opener = urllib2.build_opener(
                    urllib2.HTTPHandler(debuglevel=0), AGOLRedirectHandler())
                urllib2.install_opener(opener)

            if param_dict is not None:
                encoded_args = urllib.urlencode(param_dict)
                url = url + '/?' + encoded_args
            file_data = urllib2.urlopen(url)
            file_data.getcode()
            file_data.geturl()
            if file_name is None:
                url = file_data.geturl()
                a = file_data.info().getheader('Content-Disposition')
                if a is not None:
                    a = a.strip()
                    file_name = re.findall(r'filename=\"(.+?)\"', a)[0]
                else:
                    file_name = os.path.basename(
                        file_data.geturl().split('?')[0])
            if hasattr(file_data, "status") and \
               (int(file_data.status) >= 300 and int(file_data.status) < 400):
                self._download_file(url=file_data.geturl(),
                                    save_path=save_path,
                                    file_name=file_name,
                                    proxy_url=self._proxy_url,
                                    proxy_port=self._proxy_port)
                return save_path + os.sep + file_name
            if (file_data.info().getheader('Content-Length')):
                total_size = int(
                    file_data.info().getheader('Content-Length').strip())
                downloaded = 0
                CHUNK = 4096
                with open(save_path + os.sep + file_name, 'wb') as out_file:
                    while True:
                        chunk = file_data.read(CHUNK)
                        downloaded += len(chunk)
                        if not chunk: break
                        out_file.write(chunk)
            elif file_data.headers.maintype == 'image':
                with open(save_path + os.sep + file_name, 'wb') as out_file:
                    buf = file_data.read()
                    out_file.write(buf)
            return save_path + os.sep + file_name
        except urllib2.HTTPError, e:
            print "HTTP Error:", e.code, url
            return False

Пример #22

0

Показать файл

Файл: tor.py Проект: Kevingcc/Tools_list

def do_check():

    # Check if 'tor' is installed.
    requirment = "tor"
    requirments.do_check(requirment)

    # Check if 'privoxy' is installed.
    requirment = "privoxy"
    requirments.do_check(requirment)

    check_privoxy_proxy = True
    info_msg = "Testing Tor SOCKS proxy settings ("
    info_msg += settings.PRIVOXY_IP + ":" + PRIVOXY_PORT
    info_msg += ")... "
    sys.stdout.write(settings.print_info_msg(info_msg))
    sys.stdout.flush()
    try:
        privoxy_proxy = urllib2.ProxyHandler(
            {settings.SCHEME: settings.PRIVOXY_IP + ":" + PRIVOXY_PORT})
        opener = urllib2.build_opener(privoxy_proxy)
        urllib2.install_opener(opener)
    except:
        check_privoxy_proxy = False
        pass

    if check_privoxy_proxy:
        try:
            check_tor_page = opener.open(
                "https://check.torproject.org/").read()
            found_ip = re.findall(r":  <strong>" + "(.*)" + "</strong></p>",
                                  check_tor_page)
            if not "You are not using Tor" in check_tor_page:
                sys.stdout.write("[" + Fore.GREEN + " SUCCEED " +
                                 Style.RESET_ALL + "]\n")
                sys.stdout.flush()
                if menu.options.tor_check:
                    success_msg = "Tor connection is properly set. "
                else:
                    success_msg = ""
                success_msg += "Your ip address appears to be " + found_ip[
                    0] + ".\n"
                sys.stdout.write(settings.print_success_msg(success_msg))
                warn_msg = "Increasing default value for option '--time-sec' to"
                warn_msg += " " + str(
                    settings.TIMESEC) + " because switch '--tor' was provided."
                print settings.print_warning_msg(warn_msg)

            else:
                print "[" + Fore.RED + " FAILED " + Style.RESET_ALL + "]"
                if menu.options.tor_check:
                    err_msg = "It seems that your Tor connection is not properly set. "
                else:
                    err_msg = ""
                err_msg += "Can't establish connection with the Tor SOCKS proxy. "
                err_msg += "Please make sure that you have "
                err_msg += "Tor installed and running so "
                err_msg += "you could successfully use "
                err_msg += "switch '--tor'."
                print settings.print_critical_msg(err_msg)
                raise SystemExit()

        except urllib2.URLError, err_msg:
            print "[" + Fore.RED + " FAILED " + Style.RESET_ALL + "]"
            if menu.options.tor_check:
                err_msg = "It seems that your Tor connection is not properly set. "
            else:
                err_msg = ""
            err_msg = "Please make sure that you have "
            err_msg += "Tor installed and running so "
            err_msg += "you could successfully use "
            err_msg += "switch '--tor'."
            print settings.print_critical_msg(err_msg)
            raise SystemExit()

        except httplib.BadStatusLine, err_msg:
            print "[ " + Fore.RED + "FAILED" + Style.RESET_ALL + " ]"
            if len(err_msg.line) > 2:
                print err_msg.line, err_msg.message
            raise SystemExit()

Пример #23

0

Показать файл

Файл: SplunkPy.py Проект: 9b/content

def handler(proxy):
    proxy_handler = urllib2.ProxyHandler({'http': proxy, 'https': proxy})
    opener = urllib2.build_opener(proxy_handler)
    urllib2.install_opener(opener)
    return request

Пример #24

0

Показать файл

Файл: util.py Проект: xiaol/ZhihuCrawler

def urlopen_with_retry(req, proxy_all):
    current_proxy = random.choice(proxy_all)
    opener = urllib2.build_opener(urllib2.ProxyHandler({"http" : current_proxy}))  #urllib2.ProxyHandler()
    urllib2.install_opener(opener)
    return urllib2.urlopen(req)

Пример #25

0

Показать файл

Файл: client.py Проект: scivolant/plugin.video.brplay

def request(url,
            close=True,
            redirect=True,
            error=False,
            proxy=None,
            post=None,
            headers=None,
            mobile=False,
            XHR=False,
            limit=None,
            referer=None,
            cookie=None,
            output='',
            timeout='30',
            headRequest=False):
    try:
        handlers = []

        if not proxy is None:
            control.log("proxy: %s" % proxy)
            handlers += [urllib2.ProxyHandler(proxy), urllib2.HTTPHandler]

        if output == 'cookie' or output == 'extended' or output == 'cookiejar' or not close == True:
            cookies = cookielib.LWPCookieJar()
            handlers += [
                urllib2.HTTPHandler(),
                urllib2.HTTPSHandler(),
                urllib2.HTTPCookieProcessor(cookies)
            ]

        try:
            if sys.version_info < (2, 7, 9): raise Exception()
            import ssl
            ssl_context = ssl.create_default_context()
            ssl_context.check_hostname = False
            ssl_context.verify_mode = ssl.CERT_NONE
            handlers += [urllib2.HTTPSHandler(context=ssl_context)]
        except:
            pass

        try:
            headers.update(headers)
        except:
            headers = {}

        if 'User-Agent' in headers:
            pass
        elif not mobile == True:
            headers['User-Agent'] = cache.get(randomagent, 1)
        else:
            headers['User-Agent'] = 'Apple-iPhone/701.341'
        if 'Referer' in headers:
            pass
        elif referer is None:
            headers['Referer'] = '%s://%s/' % (urlparse.urlparse(url).scheme,
                                               urlparse.urlparse(url).netloc)
        else:
            headers['Referer'] = referer
        if not 'Accept-Language' in headers:
            headers['Accept-Language'] = 'en-US'
        if 'X-Requested-With' in headers:
            pass
        elif XHR is True:
            headers['X-Requested-With'] = 'XMLHttpRequest'
        if 'Cookie' in headers:
            pass
        elif not cookie is None:
            headers['Cookie'] = printCookieDict(cookie) if isinstance(
                cookie, dict) else cookie

        if redirect is False:

            class NoRedirection(urllib2.HTTPErrorProcessor):
                def http_response(self, request, response):
                    return response

            handlers += [NoRedirection]

            try:
                del headers['Referer']
            except:
                pass

        if handlers:
            opener = urllib2.build_opener(*handlers)
            urllib2.install_opener(opener)
        else:
            urllib2.install_opener(None)

        request = urllib2.Request(url, data=post, headers=headers)

        rid = uuid.uuid4().hex
        control.log("Url request (%s): %s" % (rid, url))

        if headRequest:
            request.get_method = lambda: 'HEAD'

        try:
            response = urllib2.urlopen(request, timeout=int(timeout))
        except urllib2.HTTPError as response:

            if response.code == 503:
                cf_result = response.read(5242880)
                try:
                    encoding = response.info().getheader('Content-Encoding')
                except:
                    encoding = None
                if encoding == 'gzip':
                    cf_result = gzip.GzipFile(
                        fileobj=StringIO.StringIO(cf_result)).read()

                if 'cf-browser-verification' in cf_result:

                    netloc = '%s://%s' % (urlparse.urlparse(url).scheme,
                                          urlparse.urlparse(url).netloc)

                    ua = headers['User-Agent']

                    cf = cache.get(cfcookie().get, 168, netloc, ua, timeout)

                    headers['Cookie'] = cf

                    request = urllib2.Request(url, data=post, headers=headers)

                    response = urllib2.urlopen(request, timeout=int(timeout))

                elif error is False:
                    control.log("Response error code (%s): %s" %
                                (rid, response.code))
                    return
            elif response.code == 403:
                control.log("Response error code (%s): %s" %
                            (rid, response.code))
                raise Exception("Permission Denied")
            elif error is True:
                control.log("Response error code (%s): %s" %
                            (rid, response.code))
                raise Exception("ERROR (%s): %s" % (url, response.code))
            elif error is False:
                control.log("Response error code (%s): %s" %
                            (rid, response.code))
                return

        control.log("Response code (%s): %s" % (rid, response.code))

        if response.code == 403:
            raise Exception("Permission Denied")
        elif response.code >= 400:
            raise Exception("Request Error (%s): %s" % (url, response.code))

        if output == 'cookie':
            try:
                result = '; '.join(
                    ['%s=%s' % (i.name, i.value) for i in cookies])
            except:
                pass
            try:
                result = cf
            except:
                pass
            if close is True: response.close()
            return result

        if output == 'cookiejar':
            cookies_dict = {}
            for i in cookies:
                cookies_dict[i.name] = i.value
            return cookies_dict

        elif output == 'geturl':
            result = response.geturl()
            if close is True: response.close()
            return result

        elif output == 'headers':
            result = response.headers
            if close is True: response.close()
            return result

        elif output == 'chunk':
            try:
                content = int(response.headers['Content-Length'])
            except:
                content = (2049 * 1024)
            if content < (2048 * 1024): return
            result = response.read(16 * 1024)
            if close is True: response.close()
            return result

        if limit == '0':
            result = response.read(224 * 1024)
        elif not limit is None:
            result = response.read(int(limit) * 1024)
        else:
            result = response.read(5242880)

        try:
            encoding = response.info().getheader('Content-Encoding')
        except:
            encoding = None
        if encoding == 'gzip':
            result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()

        if 'sucuri_cloudproxy_js' in result:
            su = sucuri().get(result)

            headers['Cookie'] = su

            request = urllib2.Request(url, data=post, headers=headers)

            response = urllib2.urlopen(request, timeout=int(timeout))

            if limit == '0':
                result = response.read(224 * 1024)
            elif not limit is None:
                result = response.read(int(limit) * 1024)
            else:
                result = response.read(5242880)

            try:
                encoding = response.info().getheader('Content-Encoding')
            except:
                encoding = None
            if encoding == 'gzip':
                result = gzip.GzipFile(
                    fileobj=StringIO.StringIO(result)).read()

        control.log("response (%s): %s" % (rid, result[:500]))

        if response.headers and response.headers.get('content-type') and (
                'application/json' in response.headers.get('content-type')
                or 'text/javascript' in response.headers.get('content-type')):
            return json.loads(result, object_pairs_hook=OrderedDict
                              ) if OrderedDict else json.loads(result)

        if output == 'extended':
            response_headers = response.headers
            response_code = str(response.code)
            try:
                cookie = '; '.join(
                    ['%s=%s' % (i.name, i.value) for i in cookies])
            except:
                pass
            try:
                cookie = cf
            except:
                pass
            if close is True: response.close()
            return result, response_code, response_headers, headers, cookie
        else:
            if close is True: response.close()
            return result
    except Exception, e:
        traceback.print_exc()
        control.log("Request ERROR: %s" % str(e))
        return

Пример #26

0

Показать файл

Файл: util.py Проект: xiaol/ZhihuCrawler

def get_content(toUrl,count):
    """ Return the content of given url

        Args:
            toUrl: aim url
            count: index of this connect

        Return:
            content if success
            'Fail' if fail
    """

    cf = ConfigParser.ConfigParser()
    cf.read("config.ini")
    cookie = cf.get("cookie", "cookie")

    headers = {
        'Cookie': cookie,
        'Host':'www.zhihu.com',
        'Referer':'http://www.zhihu.com/',
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
        # 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
        'Accept-Encoding':'gzip'
    }

    req = urllib2.Request(
        url = toUrl,
        headers = headers
    )

    proxy_all = [
    "10.25.170.247:5678",
    "10.25.171.82:5678",
    "10.47.114.111:5678",
    "10.47.54.77:5678",
    "10.25.60.218:5678",
    "10.47.54.180:5678",
    "10.47.54.115:5678",
    "10.47.106.138:5678"
    ]
    current_proxy = random.choice(proxy_all)
    try:
        opener = urllib2.build_opener(urllib2.ProxyHandler({"http" : current_proxy}))  #urllib2.ProxyHandler()
        urllib2.install_opener(opener)
        page = urllib2.urlopen(req,timeout = 15)
        headers = page.info()
        content = page.read()
    # except Exception,e:
    #     if count % 1 == 0:
    #         print str(count) + ", Error: " + str(e) + " URL: " + toUrl
    #     return "FAIL"
    except urllib2.HTTPError, e:
        if e.code == 404:
            if count % 1 == 0:
                print str(count) + ", Error: " + str(e) + " URL: " + toUrl
            return "NO FOUND"
        else:
            try:
                page = urlopen_with_retry(req, proxy_all)
		headers = page.info()
                content = page.read()
            except Exception,e:
                if count % 1 == 0:
                    print str(count) + ", Error: " + str(e) + " URL: " + toUrl + "retry_fail"
                return "FAIL"

Пример #27

0

Показать файл

Файл: client.py Проект: sksundaram-learning/sawtooth-core

 def __init__(self, url):
     self.ledger_url = url
     self.proxy_handler = urllib2.ProxyHandler({})

Пример #28

0

Показать файл

import os
import json
import urllib2
from bs4 import BeautifulSoup
import httplib

url = 'http://codeforces.com/api/contest.list?gym=false'

prx = open("proxy", 'r')
proxy = prx.readline()
proxy = proxy.split('\n', 1)
proxy = proxy[0]

if (len(proxy) != 0):
    print proxy
    proxy = urllib2.ProxyHandler({'http': proxy})
    opener = urllib2.build_opener(proxy)
    urllib2.install_opener(opener)

if os.path.exists('ProblemStatistics'):
    print "The directory 'ProblemStatistics' exists !!!"
else:
    os.makedirs('ProblemStatistics')

dict = {}
f1 = open("ProblemStatistics/CFRound.txt", "w")
f2 = open("ProblemStatistics/ErrorCF.txt", "w")
f3 = open("Asim.txt", "w")

response = urllib2.urlopen(url).read()
res = json.loads(response)

Пример #29

0

Показать файл

Файл: http.py Проект: noscripter/scripts

# Deprecated: now we have requests

# HTTP Client with cookie handling
# should be handy for bots...
#
# Programmed by WangLu
# Last changed: 2011.05.16

import cookielib
import time
import urllib
import urllib2

############# configuration

HTTP_PROXY = urllib2.ProxyHandler()
HTTP_HEADERS = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:2.0) Gecko/20100101 Firefox/4.0' }

HTTP_RETRY_COUNT = -1 # how many time do we need to retry when failed, negative for always retry
HTTP_RETRY_INTERVAL = 5 # seconds

# Handle cookie, and auto-retry while meet error 
class HTTPHandler():

    def __init__ (self, proxy=HTTP_PROXY):
        self.cookie = cookielib.CookieJar() 
        self.http = urllib2.build_opener(proxy, urllib2.HTTPCookieProcessor(self.cookie))
        self.last_request = None
        self.last_response = None

    # auto retry when meet error

Пример #30

0

Показать файл

Файл: proxy_checker.py Проект: esemi/dsbot

def get_proxy_opener(url, user, password):
    password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
    password_mgr.add_password(None, url, user, password)
    proxy_handler = urllib2.ProxyHandler({'http': url})
    proxy_auth_handler = urllib2.ProxyBasicAuthHandler(password_mgr)
    return urllib2.build_opener(proxy_handler, proxy_auth_handler)

Python ProxyHandler примеры использования