Exemplos de Book.add_chapter em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: model.book

Classe / Tipo: Book

Método / Função: add_chapter

Exemplos em hotexamples.com: 1

Book.add_chapter em Python - 1 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de model.book.Book.add_chapter em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Book(30)

get_by_id(5)

get_by_key_name(4)

publish_date(1)

insert(1)

isbn10(1)

isbn13(1)

mpic(1)

pages(1)

price(1)

pubdate(1)

publisher(1)

get_title(1)

query(1)

setAuthor(1)

set_thumb(1)

spic(1)

sub_title(1)

summary(1)

tags(1)

title(1)

translators(1)

id(1)

get_id(1)

get_price_by_store_name_book_name(1)

coverImageId(1)

add_copy(1)

all(1)

authors(1)

authors_intro(1)

binding(1)

bookUrl(1)

bpic(1)

chapter_validate(1)

create_from_isbn(1)

add_chapter(1)

dir(1)

general_validate(1)

getAuthor(1)

getDescription(1)

getId(1)

getTitle(1)

get_author(1)

get_description(1)

validate(1)

Métodos Frequentes

Book (30)

get_by_id (5)

get_by_key_name (4)

publish_date (1)

insert (1)

isbn10 (1)

isbn13 (1)

mpic (1)

pages (1)

price (1)

Métodos Frequentes

pubdate (1)

publisher (1)

get_title (1)

query (1)

setAuthor (1)

set_thumb (1)

spic (1)

sub_title (1)

summary (1)

tags (1)

title (1)

translators (1)

id (1)

get_id (1)

get_price_by_store_name_book_name (1)

coverImageId (1)

add_copy (1)

all (1)

authors (1)

authors_intro (1)

Métodos Frequentes

title (1)

translators (1)

id (1)

get_id (1)

get_price_by_store_name_book_name (1)

coverImageId (1)

add_copy (1)

all (1)

authors (1)

authors_intro (1)

binding (1)

bookUrl (1)

bpic (1)

chapter_validate (1)

create_from_isbn (1)

add_chapter (1)

dir (1)

general_validate (1)

getAuthor (1)

getDescription (1)

getId (1)

getTitle (1)

get_author (1)

get_description (1)

validate (1)

Métodos Frequentes

binding (1)

bookUrl (1)

bpic (1)

chapter_validate (1)

create_from_isbn (1)

add_chapter (1)

dir (1)

general_validate (1)

getAuthor (1)

getDescription (1)

getId (1)

getTitle (1)

get_author (1)

get_description (1)

validate (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: crawler.py Projeto: quybvs/vnthuquan_book_burner

class Crawler: book = None status = None def __init__(self, root_url): self.root_url = root_url self.book = Book() self.status = False def set_root_url(self, url): self.root_url = url def crawling(self): self.book = Book() # Reinitialize Book object for retry log.info("Crawling function is call") if self.root_url: thumb, title, author, chapter_params = self.get_general_info() if Book.general_validate(title, author, chapter_params): self.book.set_thumb(thumb) self.book.set_title(title) self.book.set_author(author) for chapter_param in chapter_params: chapter = self.get_chapter(chapter_param) if not Book.chapter_validate(chapter): log.error("Chapter: %s is fail crawled", chapter) return False self.book.add_chapter(chapter) self.status = True return True else: log.error("No validate general info") return False else: log.error("not define root url") return False def get_general_info(self): log.info("get_general_info function is call") book_thumb = None book_title = None book_author = None chapter_params = [] # site_rs = requests.get(self.root_url, verify=False) site_rs = self.try_request(self.root_url) soup = BeautifulSoup(site_rs.content, 'html.parser') for acronym in soup.find_all('acronym'): chapter_link = acronym.li.get('onclick') if chapter_link: chapter_re = re.search('noidung1\(\'((\w|\W)*)\'\)', chapter_link) chapter_param = chapter_re.group(1) chapter_params.append(chapter_param) if chapter_params: first_chapter = self.get_chapter(chapter_params[0]) if first_chapter: if first_chapter.book_thumb: book_thumb = first_chapter.book_thumb if first_chapter.book_title: book_title = first_chapter.book_title if first_chapter.book_author: book_author = first_chapter.book_author return book_thumb, book_title, book_author, chapter_params def get_chapter(self, chapter_param): log.info("get_chapter function is call") log.info(chapter_param) book_title = None book_thumb = None book_author = None chapter_title = None chapter_content = None upper_char = None url = system_cfg.CHAPTER_URL chapter_param = html_tool.decode_param_to_dict(chapter_param) site_rs = self.try_request(url, 'POST', data=chapter_param) if site_rs: content_list = site_rs.content.split('--!!tach_noi_dung!!--', 3) if len(content_list) >= 3: ##################### # get book_thumb from css ##################### css_soup = BeautifulSoup(content_list[0], 'html.parser') style_tag = css_soup.find('style') if style_tag: thumb_re = re.search('background:url\((http://(\w|\W)*)\)', style_tag.string) if thumb_re: book_thumb = thumb_re.group(1) ##################### # get book title # get book author # get chapter title ##################### desc_soup = BeautifulSoup(content_list[1], 'html.parser') book_title_tag = desc_soup.find('span', class_='chuto40') if book_title_tag: book_title = book_title_tag.string.strip() tuade_tag = desc_soup.find('div', class_='tuade') if tuade_tag: chutieude_tags = desc_soup.find_all('span', class_='chutieude') chutieude_list = [] for chutieude_tag in chutieude_tags: if chutieude_tag.string and chutieude_tag.string.strip(): chutieude_list.append(chutieude_tag.string.strip()) if len(chutieude_list) >= 2: book_author = chutieude_list[0] del chutieude_list[0] for chutieude in chutieude_list: if chapter_title: chapter_title = chapter_title + chutieude + " " else: chapter_title = chutieude + " " elif len(chutieude_list) == 1: chapter_title = chutieude_list[0] else: tac_gia_tag = desc_soup.find('span', class_='tacgiaphai') if tac_gia_tag: book_author = tac_gia_tag.string.strip() chutieude_tags = desc_soup.find_all('span', class_='chutieude') chutieude_list = [] for chutieude_tag in chutieude_tags: if chutieude_tag.text and chutieude_tag.text.strip(): chutieude_list.append(chutieude_tag.text.strip()) if len(chutieude_list) == 2: chapter_title = chutieude_list[0] + ": " + chutieude_list[1] elif len(chutieude_list) == 1: chapter_title = chutieude_list[0] ##################### # get chapter content( add chapter title to chapter content) ##################### content_soup = BeautifulSoup(content_list[2], 'html.parser') if content_soup: chuhoain_tag = content_soup.find(id='chuhoain') if chuhoain_tag: chuinhoa_img = chuhoain_tag.img if chuinhoa_img: chuhoain_src = chuinhoa_img['src'] if (isinstance(chuhoain_src, str) or isinstance(chuhoain_src, unicode)) and chuhoain_src.find( system_cfg.UPPER_CHAR_URL) != -1: chuhoain = chuhoain_src.replace(system_cfg.UPPER_CHAR_URL, '') chuinhoa_img['src'] = system_cfg.UPPER_CHAR_PATH + chuhoain upper_char = chuhoain chapter_content = content_soup.prettify() # Add chapter title to chapter_content if chapter_title and chapter_content: chapter_content = '<div><h2 align=\'center\'>' + chapter_title + '</h2></div>' + chapter_content chapter = Chapter(title=chapter_title, content=chapter_content) if book_title: chapter.set_book_title(book_title) if book_author: chapter.set_book_author(book_author) if book_thumb: chapter.set_book_thumb(book_thumb) if upper_char: chapter.set_upper_char(upper_char) log.info("Crawler chapter: %s", chapter_title) return chapter else: log.error("Can't get content of this chapter") return None def try_request(self, url, post_type='GET', try_time=0, params=None, data=None): try: if post_type == 'GET': headers = {'User-Agent': system_cfg.USER_AGENT} site_rs = requests.get(url=url, params=params, data=data, headers=headers, verify=False) else: headers = {'User-Agent': system_cfg.USER_AGENT} session = requests.Session() session.get(url, headers=headers) cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies)) site_rs = requests.post(url=url, params=params, data=data, headers=headers, cookies=cookies, verify=False) if not site_rs: raise requests.exceptions.ConnectionError return site_rs except requests.exceptions.ConnectionError: try_time += 1 if try_time >= system_cfg.MAX_RETRY_TIME: return None else: time.sleep(system_cfg.WAITING_TIME) log.warn("Retry url %s %r time" % (url, try_time)) return self.try_request(url, post_type, try_time, params, data)