Exemplos de Utils.GetAgentIp em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: utils.utils

Classe / Tipo: Utils

Método / Função: GetAgentIp

Exemplos em hotexamples.com: 3

Utils.GetAgentIp em Python - 3 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de utils.utils.Utils.GetAgentIp em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

GetMyAgent(26)

GetType(13)

print_table(4)

extract_domain(4)

remove_dir_content(4)

ask(4)

remove_file(3)

GetAgentIp(3)

sanitize_url(3)

GetCommonBaseType(3)

sanitize_brand(3)

is_rar(2)

is_tar(2)

CreateName2(2)

is_zip(2)

read_file(2)

rename_file(2)

is_apk(2)

generateID(2)

decode_dict(1)

compute_file_number(1)

transformRectPosition(1)

timestamp_to_string(1)

GetArguments(1)

GetName(1)

GetRulesParsing(1)

IsBasicType(1)

append_file(1)

base64(1)

check_regular(1)

md5(1)

long_to_int(1)

complexToFloat(1)

compute_md5_file(1)

compute_wma(1)

is_internet_up(1)

compute_sha256_file(1)

get_size(1)

get_mongodb_client(1)

get_linear_estimate(1)

get_db(1)

get_current_time_string(1)

get_city_stat(1)

EvaluateReverseParse(1)

extract_zip(1)

extract_tar(1)

extract_rar(1)

write_file(1)

Métodos Frequentes

GetMyAgent (26)

GetType (13)

print_table (4)

extract_domain (4)

remove_dir_content (4)

ask (4)

remove_file (3)

GetAgentIp (3)

sanitize_url (3)

GetCommonBaseType (3)

Métodos Frequentes

sanitize_brand (3)

is_rar (2)

is_tar (2)

CreateName2 (2)

is_zip (2)

read_file (2)

rename_file (2)

is_apk (2)

generateID (2)

decode_dict (1)

compute_file_number (1)

transformRectPosition (1)

timestamp_to_string (1)

GetArguments (1)

GetName (1)

GetRulesParsing (1)

IsBasicType (1)

append_file (1)

base64 (1)

check_regular (1)

Métodos Frequentes

compute_file_number (1)

transformRectPosition (1)

timestamp_to_string (1)

GetArguments (1)

GetName (1)

GetRulesParsing (1)

IsBasicType (1)

append_file (1)

base64 (1)

check_regular (1)

md5 (1)

long_to_int (1)

complexToFloat (1)

compute_md5_file (1)

compute_wma (1)

is_internet_up (1)

compute_sha256_file (1)

get_size (1)

get_mongodb_client (1)

get_linear_estimate (1)

get_db (1)

get_current_time_string (1)

get_city_stat (1)

EvaluateReverseParse (1)

extract_zip (1)

extract_tar (1)

extract_rar (1)

write_file (1)

Métodos Frequentes

md5 (1)

long_to_int (1)

complexToFloat (1)

compute_md5_file (1)

compute_wma (1)

is_internet_up (1)

compute_sha256_file (1)

get_size (1)

get_mongodb_client (1)

get_linear_estimate (1)

get_db (1)

get_current_time_string (1)

get_city_stat (1)

EvaluateReverseParse (1)

extract_zip (1)

extract_tar (1)

extract_rar (1)

write_file (1)

Exemplo n.º 1

0

Exibir arquivo

def crawl_shop_all_item(self): agentIp = Utils.GetAgentIp() header = {'ip': agentIp} shop_id = -1 # agentIp=None # agentIp = '120.24.171.107:16816' url = "{shop_url}/search.htm?&search=y&orderType=hotsell_desc&scene=taobao_shop".format( shop_url=self.shop_url) url = self.shop_url print url # data=urllib2.urlopen(url).readlines() # soup=BeautifulSoup(''.join(data), fromEncoding='utf8') # primary_consumer = soup.find(id="bd") ok, response = Html_Downloader.Download_Html(url, {}, header) soup = BeautifulSoup(''.join(response), fromEncoding='utf8') header = soup.find(id="J_GlobalNav") div = header.text # print(ok) if ok: html = etree.HTML(response.text.encode('utf-8')) if html is not None and html.xpath("//header[@id='mp-header']"): if "shopId" in html.xpath("//header[@id='mp-header']")[0].get( "mdv-cfg").split(':')[0]: shop_id = html.xpath("//header[@id='mp-header']")[0].get( "mdv-cfg").split(':')[1] shop_id = shop_id.replace("\'}", "").replace("\'", "") url = "{shop_url}/shop/shop_auction_search.do?sort=d&p=1&page_size=90&from=h5&shop_id={shop_id}&ajson=1&_tm_source=tmallsearch&orderType=hotsell_desc".format( shop_url=self.shop_url, shop_id=shop_id) print(url) # driver = PhantomDriver(2, agentIp, 60) # driver.download_no_quit(self.shop_url) # sleep(1) # for i in range(20): # result = driver.download_no_quit(url) # sleep(3) # source = result['page_source'] # driver.return_driver().quit() # if result['ok']: # html = etree.HTML(source) ok, response = Html_Downloader.Download_Html(url, {}, header) print(ok) if not ok: ok, response = Html_Downloader.Download_Html(url, {}, {}) print(url) if ok: html = etree.HTML(response.text) data = json.loads(html.group(1).encode('utf-8')) print

Exemplo n.º 2

0

Exibir arquivo

Arquivo: crawl_item_details1.py Projeto: xwjonline/taobaoSpider

def crawl_shop_all_item(self): agentIp = Utils.GetAgentIp() shop_id = -1 # agentIp=None # agentIp = '120.24.171.107:16816' driver = PhantomDriver(2, agentIp, 60) parms_url = "{shop_url}/i/asynSearch.htm?_ksTS={now}569_240&callback=jsonp241&mid=w-14766145001-0&wid=14766145001&path=/search.htm&search=y&orderType=hotsell_desc&scene=taobao_shop&pageNo={page_num}" url = "{shop_url}/search.htm?&search=y&orderType=hotsell_desc&scene=taobao_shop".format( shop_url=self.shop_url) # url="https://nanshanweng.m.tmall.com/shop/shop_auction_search.do?sort=d&p=1&page_size=12&from=h5&shop_id=247506881&ajson=1&_tm_source=tmallsearch" # self.testurl(url,agentIp) print(url) result = driver.download_no_quit(url) source = result['page_source'] driver.return_driver().quit() if result['ok']: html = etree.HTML(source) shop_items = [] if html is not None and 'page-info' in source and html.xpath( "//span[contains(@class,'page-info')]/text()"): total = int( html.xpath("//span[contains(@class,'page-info')]/text()") [0].split('/')[1]) total = 3 if html.xpath("//meta[@name='microscope-data']"): for meta in html.xpath("//meta[@name='microscope-data']" )[0].get('content').split(';'): if 'shopid' in meta.lower(): shop_id = meta.split("=")[1] # self.shopall.format_data(shop_id, False) shop_items.extend( self.parse_items(html, shop_id, agentIp)) for i in range(1, total): page_num = i + 1 print("page%s" % page_num) url = parms_url.format(shop_url=self.shop_url, now=long(time.time()), page_num=page_num) result = driver.download_no_quit(url) if result['ok']: html = etree.HTML(result['page_source']) if result['ok'] and 'page-info' in source and html.xpath( "//span[contains(@class,'page-info')]/text()"): results = self.parse_items(html, shop_id, agentIp) shop_items.extend(results) sleep(15) self.shopall.insert_or_update(shop_items) elif html is not None and 'ui-page-s-len' in source and html.xpath( "//b[contains(@class,'ui-page-s-len')]/text()"): total = int( html.xpath("//b[contains(@class,'ui-page-s-len')]/text()") [0].split('/')[1]) total = 3 if html.xpath("//meta[@name='microscope-data']"): for meta in html.xpath("//meta[@name='microscope-data']" )[0].get('content').split(';'): if 'shopid' in meta.lower(): shop_id = meta.split("=")[1] # self.shopall.format_data(shop_id, False) shop_items.extend( self.parse_items1(html, shop_id, agentIp)) for i in range(1, total): page_num = i + 1 print("page%s" % page_num) url = parms_url.format(shop_url=self.shop_url, now=long(time.time()), page_num=page_num) result = driver.download_no_quit(url) if result['ok']: html = etree.HTML(result['page_source']) if result['ok'] and 'ui-page-s-len' in source and html.xpath( "//b[contains(@class,'ui-page-s-len')]/text()"): results = self.parse_items1(html, shop_id, agentIp) shop_items.extend(results) sleep(15) self.shopall.insert_or_update(shop_items) else: # 失败就退出关闭webdriver driver.return_driver().quit() print("无法获取%s" % agentIp) return -1 return shop_id

Exemplo n.º 3

0

Exibir arquivo

# -*- coding: utf-8 -*- #用于抓取订单中tradeID对应的宝贝ID import json from utils.driver_utils import ChromeDriver from db.DataStore import * from utils.utils import Utils from lxml import etree import time import datetime from time import sleep import re import random agent_ip = Utils.GetAgentIp() result = get_item_trade_ids() cookies = "mt=ci%3D-1_0; thw=cn; _m_user_unitinfo_=unit|unsz; _m_unitapi_v_=1498717160426; _m_h5_tk=5497d68b5bcf376f3f03c2bfe29d5c3e_1499745724771; _m_h5_tk_enc=bd37ed1f8dad5844fa8737aa499399d3; mt=ci%3D-1_0; _tb_token_=e17e846a1e737; x=78550821; uc3=sg2=AVAJ%2F%2FuFgrZrwbvpPwMpeUNJWGnNVTEcpZhNLKPoZwE%3D&nk2=&id2=&lg2=; uss=WvmGFLDaRLuLKHzx3Jt6R6Zh8SbBg8epTAb4OU0jo4jMr30BF8ACG4yF; tracknick=; sn=%E8%8B%B1%E8%AF%AD%E4%BA%8C%E6%B2%B9%E6%9D%A1%3A%E6%8E%A8%E5%B9%BF; skt=753a73a2763c5d75; v=0; cookie2=3c92dea4c50d0cf31281f889a3a999ec; unb=857889334; t=efd1f635969594e9ad33c0ec391d9883; uc1=cookie14=UoW%2BsWPGhqNu%2Fw%3D%3D&lng=zh_CN; cna=0SPrEVg+OkQCAQ4XY4MHK7uX; isg=Avv7jk0BJmcLoRtlqwnHCbYyit-l-AMTeQ3uMe245_oRTBsudSCfohnMENr5; apush5dceacf8bcd04ef16398a2906680ab9b=%7B%22ts%22%3A1499853369995%2C%22parentId%22%3A1499850283869%7D" cookie_dict = { item.split('=')[0]: item.split('=')[1] for item in cookies.split(';') } driver = ChromeDriver() cookies = driver.login_an_get('英语二油条:推广', 'tuiguang654321') sleep(5) for item in result: url = "https:%s" % item['item_url'] mydriver = driver.get_driver() mydriver.get(url)