Exemplos de Download.xpath em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: download

Classe / Tipo: Download

Método / Função: xpath

Exemplos em hotexamples.com: 7

Download.xpath em Python - 7 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de download.Download.xpath em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Download(30)

download(10)

xpath(7)

get_html(5)

get(3)

get_packages(2)

__init__(2)

down_html(2)

initialize(2)

output_path(1)

next(1)

pack(1)

scanFolder(1)

percent_complete(1)

pop_user(1)

post(1)

read_content(1)

multiDown(1)

main(1)

reg_callback(1)

save_content(1)

set_hits(1)

set_amount(1)

listening_test(1)

set_progressbar(1)

set_query(1)

set_savepath(1)

set_window_width(1)

start(1)

status(1)

subtitle(1)

thread_download(1)

thumbnail(1)

time(1)

total_size(1)

unzip(1)

video_download(1)

load_user(1)

_get_next(1)

link(1)

download_and_show(1)

as_view(1)

auth(1)

cancel(1)

checkFolders(1)

crawl_comic(1)

create(1)

create_from_dict(1)

create_static(1)

custom_name(1)

Métodos Frequentes

Download (30)

download (10)

xpath (7)

get_html (5)

get (3)

get_packages (2)

__init__ (2)

down_html (2)

initialize (2)

output_path (1)

Métodos Frequentes

next (1)

pack (1)

scanFolder (1)

percent_complete (1)

pop_user (1)

post (1)

read_content (1)

multiDown (1)

main (1)

reg_callback (1)

save_content (1)

set_hits (1)

set_amount (1)

listening_test (1)

set_progressbar (1)

set_query (1)

set_savepath (1)

set_window_width (1)

start (1)

status (1)

Métodos Frequentes

save_content (1)

set_hits (1)

set_amount (1)

listening_test (1)

set_progressbar (1)

set_query (1)

set_savepath (1)

set_window_width (1)

start (1)

status (1)

subtitle (1)

thread_download (1)

thumbnail (1)

time (1)

total_size (1)

unzip (1)

video_download (1)

load_user (1)

_get_next (1)

link (1)

download_and_show (1)

as_view (1)

auth (1)

cancel (1)

checkFolders (1)

crawl_comic (1)

create (1)

create_from_dict (1)

create_static (1)

custom_name (1)

Métodos Frequentes

subtitle (1)

thread_download (1)

thumbnail (1)

time (1)

total_size (1)

unzip (1)

video_download (1)

load_user (1)

_get_next (1)

link (1)

download_and_show (1)

as_view (1)

auth (1)

cancel (1)

checkFolders (1)

crawl_comic (1)

create (1)

create_from_dict (1)

create_static (1)

custom_name (1)

del_photo (1)

del_photo_rows (1)

delete_folder (1)

downloadDataframe (1)

download_cookie (1)

is_different (1)

download_data (1)

download_first_page (1)

download_song (1)

executeCommand (1)

file (1)

file_name (1)

format (1)

getVideo (1)

go (1)

hide_photo (1)

Downloader (1)

add (1)

is_custom_name (1)

inf0 (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: spider.py Projeto: lureiny/ncov_spider

def get_post(self, item): if item.get_info("sourceUrl").split(".")[-1] == "pdf": return xml = Download(item.get_info("sourceUrl")).request() if xml is False: return try: source_date = xml.xpath( '//div[@class="xxxq_text_tit"][1]/h6/span[2]')[0] source_date = ["深圳市卫生健康委员会", source_date.text.replace("发布日期：", "")] except Exception as e: print_info("{} 解析失败".format(item.get_info("sourceUrl"))) return body = [] for p in xml.xpath('//div[@class="TRS_Editor"]/p'): if p.text: body.append(p.text) else: continue date = source_date[1] update_info = { "date": date, "_id": generate_hash("{}{}".format(item.get_info("title"), date)), "source": source_date[0], "body": "\n".join(body), "effective": True } item.set_info(update_info)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: spider.py Projeto: lureiny/ncov_spider

def get_post(self, item): xml = Download(item.get_info("sourceUrl")).request() if xml is False: return try: source_date = xml.xpath( '//p[@class="margin_top15 c999999 text_cencer"]')[0].text except Exception: print_info("{} 解析失败".format(item.get_info("sourceUrl"))) return source_date = source_date.split(" ") body = [] for p in xml.xpath('//div[@class="content-content"]/p'): if p.text: body.append(p.text) date = "{} {}".format(source_date[0].replace("时间：", ""), source_date[1]) update_info = { "date": date, "_id": generate_hash("{}{}".format(item.get_info("title"), date)), "source": source_date[3].replace("来源：", ""), "body": "\n".join(body), "effective": True } item.set_info(update_info)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: spider.py Projeto: lureiny/ncov_spider

def get_post_list(self, url, items): xml = Download(url).request() if xml is False: return lis = xml.xpath('//div[@class="section list"][1]/ul/li') for li in lis: a = li.find("a") span = li.find("span") if self.url_repeat(a.get("href")) is False: item = GDWJWItem() item.set_info({ "title": a.get("title"), "sourceUrl": a.get("href"), "_id": generate_hash("{}{}".format(a.get("title"), span.text)), "agency": "广东省卫健委", "date": span.text, "effective": True }) items.append(item)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: spider.py Projeto: lureiny/ncov_spider

def get_page_num(self): xml = Download(self._start_url).request() if xml is False: return 1 js_func = xml.xpath('//div[@class="zx_ml_list_page"]/script/text()')[0] js_func = js_func.replace("createPageHTML(", "").replace(");", "") return int(js_func.split(",")[0])

Exemplo n.º 5

0

Exibir arquivo

Arquivo: spider.py Projeto: lureiny/ncov_spider

def get_page_num(self): xml = Download(self._start_url).request() if xml is False: return 1 last_url = xml.xpath('//a[@class="last"]')[0].xpath("@href")[0] html_names = re.findall(pattern=r"index_[\d]*.html", string=last_url) if len(html_names) >= 1: pages_num = int(html_names[0].replace("index_", "").replace(".html", "")) return pages_num else: return 1

Exemplo n.º 6

0

Exibir arquivo

Arquivo: spider.py Projeto: lureiny/ncov_spider

def get_post(self, item): xml = Download(item.get_info("sourceUrl")).request() if xml is False: return bodys = [] try: lis = xml.xpath('//div[@class="check_content_points"]/ul/li') if len(lis) > 1: for li in lis: if li.find("span").tail: bodys.append(li.find("span").tail) else: bodys.append(lis[0].text) except Exception: print_info("解析错误：{}".format(item.get_info("sourceUrl"))) return item.set_info({"body": "\n".join(bodys)})

Exemplo n.º 7

0

Exibir arquivo

Arquivo: spider.py Projeto: lureiny/ncov_spider

def get_post_list(self, url, items): xml = Download(url).request() if xml is False: return lis = xml.xpath('//div[@class="wendangListC"][1]//li') for li in lis: date = li.find("strong").text a = li.find("a") post_url = re.sub("^\.", "http://wjw.sz.gov.cn/yqxx", a.get("href")) if self.url_repeat(post_url) is False: item = SZWJWItem() item.set_info({ "title": a.text, "sourceUrl": post_url, "_id": generate_hash("{}{}".format(a.text, date)), "agency": "深圳卫健委", "date": date, "effective": True, "source": "深圳市卫生健康委员会" }) items.append(item)