Python Proxies, Spiders примеры использования

Язык программирования: Python

Класс/Тип: Proxies

Примеров на hotexamples.com: 4

Python Proxies - 4 примера найдено. Это лучшие примеры Python кода для Proxies из пакета Spiders, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Proxies(2)

do_http_connect(1)

do_proxy_connect(1)

do_socks4a(1)

Пример #1

Показать файл

Файл: Trade.py Проект: stamit/trade

def mtgox_websocket_connect(s, proxytype=None,proxy=None):
	host='websocket.mtgox.com'
	port=80

	if proxytype=='http':
		s.connect(proxy)
		Proxies.do_http_connect(s,host,port)
	elif proxytype=='socks4a':
		s.connect(proxy)
		Proxies.do_socks4a(s,host,port)
	else:
		s.connect((host,port))

	querylines = [
		'GET /mtgox HTTP/1.1',
		'Upgrade: WebSocket',
		'Connection: Upgrade',
		'Host: websocket.mtgox.com',
		'Origin: null',
	]
	s.send('\r\n'.join(querylines)+'\r\n\r\n')

	lines = []
	while True:
		line = recv_line(s)
		if line=='': break
		lines.append(line)

	if lines[0]<>'HTTP/1.1 101 Web Socket Protocol Handshake':
		raise Exception('server did not say "101" (said %s)'%repr(lines[0]))

	return s

Пример #2

Показать файл

Файл: HTTPS.py Проект: stamit/trade

def do_https_query(url, postdata=None, reqheaders=None, digest=None, digesttype='sha256', proxy=None, proxytype=None, timeout=None):
	scheme,netloc,path,query,fragment = urlparse.urlsplit(url)
	if scheme<>'https':
		raise Exception('not an HTTPS query')

	if len(netloc.split('@'))>2:
		raise Exception('username/password not supported')

	hp = netloc.split(':')
	if len(hp)>2 or len(hp)<1:
		raise Exception('netloc must be hostname:port')

	host = hp[0]
	if len(hp)>=2:
		port = int(hp[1])
	else:
		port = 443

	if timeout<>None:
		signal.alarm(timeout)

	s = socket.socket()

	if proxytype<>None:
		s.connect(proxy)
		Proxies.do_proxy_connect(s,proxytype,host,port)
	else:
		s.connect((host,port))

	ctx = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_METHOD)
	c = OpenSSL.SSL.Connection(ctx,s)
	c.set_connect_state()
	c.do_handshake()

	if timeout<>None:
		signal.alarm(0)

	if digest<>None:
		cert = c.get_peer_certificate()
		digestgot = cert.digest(digesttype)
		if digestgot<>digest:
			raise Exception('server certificate mismatch (%s)'%digestgot)
	
	result = do_http_query(c, host, path+query, postdata, headers=reqheaders)

	c.close()
	return result

Пример #3

Показать файл

Файл: main.py Проект: alexiosgkikas/MeteoCrawler

    def crawl_job(self):
        count = 0
        now = datetime.now()
        current_time = now.strftime("%H:%M %d-%m-%Y")
        print('\n====== Starting Crawling at : ' + str(current_time) +
              ' ==============================\n')

        count_prox = 1

        if self.num_prox > 0:
            proxy = prox.Proxies(number_of_proxies=1).getProxiesAllInOne()[0]

        for url in self.stations_url:
            # Get name
            name = url.split('stations')[-1].replace('/', '')
            # Get Proxy { ip: xxxx , port: xxxxx}
            if self.num_prox > 0 and count_prox % self.num_prox == 0:
                print("Searching for new proxy")
                proxy = prox.Proxies(
                    number_of_proxies=1).getProxiesAllInOne()[0]

            count_prox += 1
            # create Instance for crawl
            cs = cS.Crawl_Station(url, proxy=proxy)
            # Get Data
            data = cs.getInfo()
            if len(data) >= 6:
                count += 1
            #print(data)
            data['TimeCrawled'] = now.strftime("%H:%M")
            self.writeCSV(name, data)
            #time.sleep(5)

        now = datetime.now()
        current_time = now.strftime("%H:%M %d-%m-%Y")
        print('\n====== Crawling Ended at ' + current_time +
              ' ==============================\n')
        self.writeLogFile(current_time, count)

Пример #4

Показать файл

    "Cookie":
    "_ga=GA1.2.841469794.1541152606; user_trace_token=20181102175657-a2701865-de85-11e8-8368-525400f775ce; LGUID=20181102175657-a2701fbd-de85-11e8-8368-525400f775ce; index_location_city=%E5%B9%BF%E5%B7%9E; _gid=GA1.2.311675459.1542615716; _ga=GA1.3.841469794.1541152606; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1542634073,1542634080,1542634122,1542634128; JSESSIONID=ABAAABAAAGCABCC1B87E5C12282CECED77A736D4CD7FA8A; X_HTTP_TOKEN=aae2d9e96d6a68f72d98ab409a933460; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221672c5c65c01c7-0e8e56366a6cce-3a3a5c0e-2073600-1672c5c65c3bf%22%2C%22%24device_id%22%3A%221672c5c65c01c7-0e8e56366a6cce-3a3a5c0e-2073600-1672c5c65c3bf%22%7D; sajssdk_2015_cross_new_user=1; _gat=1; LGSID=20181119231628-167f7db1-ec0e-11e8-a76a-525400f775ce; PRE_UTM=; PRE_HOST=; PRE_SITE=https%3A%2F%2Fm.lagou.com%2Fsearch.html; PRE_LAND=https%3A%2F%2Fm.lagou.com%2Fjobs%2F5219979.html; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6={timeStamp}; LGRID={time}-1c458fde-ec0e-11e8-895f-5254005c3644"
    .format(timeStamp=timeStamp, time=time1),
    "Referer":
    "https://m.lagou.com/search.html",
    "User-Agent":
    "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36",
    "X-Requested-With":
    "XMLHttpRequest",
}

city = "广州"
positionName = "python"
# pageNo = "1"
pageSize = "15"
proxies = Proxies()


def get_detail_url(pageNo, proxies):
    base_url = "https://m.lagou.com/search.json?city={city}&positionName={positionName}&pageNo={pageNo}&" \
               "pageSize={pageSize}".format(city=city,positionName=positionName,pageNo=pageNo,pageSize=pageSize)
    res = requests.get(base_url, headers=headers, proxies=proxies)
    content = res.content.decode()
    dict1 = json.loads(content)
    # print(dict1)
    list1 = dict1['content']['data']['page']['result']
    for i in list1:
        yield "https://m.lagou.com/jobs/{}.html".format(i['positionId'])


# 职位名称  薪资  工作地点  工作年限  学历要求 企业名字   职位描述