Exemplos de Download.down_html em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: download

Classe / Tipo: Download

Método / Função: down_html

Exemplos em hotexamples.com: 2

Download.down_html em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de download.Download.down_html em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Download(30)

download(10)

xpath(7)

get_html(5)

get(3)

get_packages(2)

__init__(2)

down_html(2)

initialize(2)

output_path(1)

next(1)

pack(1)

scanFolder(1)

percent_complete(1)

pop_user(1)

post(1)

read_content(1)

multiDown(1)

main(1)

reg_callback(1)

save_content(1)

set_hits(1)

set_amount(1)

listening_test(1)

set_progressbar(1)

set_query(1)

set_savepath(1)

set_window_width(1)

start(1)

status(1)

subtitle(1)

thread_download(1)

thumbnail(1)

time(1)

total_size(1)

unzip(1)

video_download(1)

load_user(1)

_get_next(1)

link(1)

download_and_show(1)

as_view(1)

auth(1)

cancel(1)

checkFolders(1)

crawl_comic(1)

create(1)

create_from_dict(1)

create_static(1)

custom_name(1)

Métodos Frequentes

Download (30)

download (10)

xpath (7)

get_html (5)

get (3)

get_packages (2)

__init__ (2)

down_html (2)

initialize (2)

output_path (1)

Métodos Frequentes

next (1)

pack (1)

scanFolder (1)

percent_complete (1)

pop_user (1)

post (1)

read_content (1)

multiDown (1)

main (1)

reg_callback (1)

save_content (1)

set_hits (1)

set_amount (1)

listening_test (1)

set_progressbar (1)

set_query (1)

set_savepath (1)

set_window_width (1)

start (1)

status (1)

Métodos Frequentes

save_content (1)

set_hits (1)

set_amount (1)

listening_test (1)

set_progressbar (1)

set_query (1)

set_savepath (1)

set_window_width (1)

start (1)

status (1)

subtitle (1)

thread_download (1)

thumbnail (1)

time (1)

total_size (1)

unzip (1)

video_download (1)

load_user (1)

_get_next (1)

link (1)

download_and_show (1)

as_view (1)

auth (1)

cancel (1)

checkFolders (1)

crawl_comic (1)

create (1)

create_from_dict (1)

create_static (1)

custom_name (1)

Métodos Frequentes

subtitle (1)

thread_download (1)

thumbnail (1)

time (1)

total_size (1)

unzip (1)

video_download (1)

load_user (1)

_get_next (1)

link (1)

download_and_show (1)

as_view (1)

auth (1)

cancel (1)

checkFolders (1)

crawl_comic (1)

create (1)

create_from_dict (1)

create_static (1)

custom_name (1)

del_photo (1)

del_photo_rows (1)

delete_folder (1)

downloadDataframe (1)

download_cookie (1)

is_different (1)

download_data (1)

download_first_page (1)

download_song (1)

executeCommand (1)

file (1)

file_name (1)

format (1)

getVideo (1)

go (1)

hide_photo (1)

Downloader (1)

add (1)

is_custom_name (1)

inf0 (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: main.py Projeto: YanYii/Spider-CSDN-Article

def main(): # print 'Running.' # url = 'https://blog.csdn.net/GitChat' # download = Download() # articledb = ArticleDB(DB(*config)) # spider = Spider(url, download, articledb) # spider.start() # print 'Done.' # index url = 'https://blog.csdn.net/' download = Download() html = download.down_html(url, save=True) print html

Exemplo n.º 2

0

Exibir arquivo

class CSDN(object): def __init__(self): self.download = Download() self.home = 'https://blog.csdn.net' self.catetories = [] self.blog_user = [] self.queue = Queue.Queue() pass def visit_home(self): html = self.download.down_html(self.home) return html def parse_category(self, html): # with open('tmp.html') as f: # html = f.read() soup = BeautifulSoup(html, 'lxml') div = soup.find('div', class_='nav_com') if div: # print div a_tags = div.find_all('a') print len(a_tags) for a_tag in a_tags: href = a_tag.attrs['href'] self.catetories.append(''.join([self.home, href])) print self.catetories def visit_category(self): for category in self.catetories: html = self.download.down_html(category, save=True) self.parse_blog_user(html) # break print self.blog_user print len(self.blog_user) def parse_blog_user(self, html): print 'parse blog user' # soup = BeautifulSoup(html, 'lxml') ul = soup.find('ul', class_='feedlist_mod') if ul: dds = ul.find_all('dd', class_='name') for dd in dds: href = dd.find('a').attrs['href'] self.blog_user.append(href) self.queue.put(href) def start(self): html = self.visit_home() # print html # html = '' self.parse_category(html) self.visit_category() i = 0 while not self.queue.empty(): blog = self.queue.get() # 下载博客文章 i += 1 pass print 'run times ', i