def handle_info_html(self, html, type_tag): soup = BeautifulSoup(html, 'lxml') book = Book() # type_id = db.session.query(Type).filter_by(title=tag).first().id try: title = soup.h1.span.get_text() info = soup.find(class_='article').find(class_='indent').find( class_='subjectwrap clearfix').find( class_='subject clearfix').find(id='info') string = info.get_text().strip() string = string.replace(' ', '') string = string.replace(' ', '') string = string.replace('\n', '') tag_list = [ '出版社:', '出品方:', '副标题:', '原作名:', '译者:', '出版年:', '页数:', '定价:', '装帧:', '丛书:', 'ISBN:' ] value_list = [] if '作者:' in string: string = string.replace('作者:', '') flag = 0 for tag in tag_list: if tag in string: value = string.split(tag)[0] value_list.append(value) if flag != 0: for i in range(flag): value_list.append('') flag = 0 else: flag += 1 continue string = string.split(tag)[1] if tag == 'ISBN:': value_list.append(string) author = value_list[0] publisher = value_list[1] producer = value_list[2] subtitle = value_list[3] original_title = value_list[4] translator = value_list[5] year_of_publisher = value_list[6] pages = value_list[7] price = value_list[8] binding = value_list[9] series = value_list[10] isbn = value_list[11] pic_href = soup.find(class_='article').find(class_='indent').find( class_='subjectwrap clearfix').find( class_='subject clearfix').find(id='mainpic').a['href'] score = soup.find( class_='rating_self clearfix').strong.get_text().strip() score_people = soup.find(class_='rating_people').get_text() related_info = soup.find(class_='related_info') infos = related_info.find_all(class_='indent')[:2] content_info = str(infos[0].find(class_='intro')).replace( '<div class="intro">', '') author_info = str(infos[1].find(class_='intro')).replace( '<div class="intro">', '') book.title = title book.author = author book.publisher = publisher book.producer = producer book.translator = translator book.subtitle = subtitle book.original_title = original_title book.year_of_publisher = year_of_publisher book.pages = pages book.price = price book.binding = binding book.series = series book.isbn = isbn book.score = score book.score_people = score_people book.type = type_tag book.content_info = content_info book.author_info = author_info book.pic_href = pic_href self.safe_commit(book) except Exception as e: self.logger.warning('爬起失败', e) return True return False