def update_raw_book(bookname, scid, ecid): logger.debug("Enter update book func") book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename) parser = [] count = 0 raw_data_list = [] cid_list = [] title_list = [] url_list = [] for url in book.read_book_url(): uparse = urlparse.urlparse(url) dname = uparse.netloc logger.debug('update_raw_book() url:%s dname:%s', url, dname); # add parser instance if dname == "www.ranwen.org": logger.info("using ranwen parser") parser.append(RanWenDotOrgParser(bookname, url, loglevel=parser_log_level)) elif dname == "tw.hjwzw.com": logger.info("using tv hjwzw parser") parser.append(TwDotHjwzwDotComParser(bookname, url, loglevel=parser_log_level)) elif dname == "tw.bsxsw.com": logger.info("using tv bsxsw parser") parser.append(TwBsxswDotComParser(bookname, url, loglevel=parser_log_level)) elif dname == "tw.fxnzw.com": logger.info("using tv fxnzw parser") parser.append(TwFxnzwDotComParser(bookname, url, loglevel=parser_log_level)) elif dname == "tw.zhsxs.com": logger.info("using tv zhsxs parser") parser.append(TwZhsxsDotComParser(bookname, url, loglevel=parser_log_level)) elif dname == "www.shumilou.co": logger.info("using shumilou.co parser") parser.append(ShumilouCoParser(bookname, url, loglevel=parser_log_level)) elif dname == "www.feizw.com": logger.info("using feizw.com parser") parser.append(FeizwDotComParser(bookname, url, loglevel=parser_log_level)) else: logger.info("using default parser") parser.append(TnovelParser(bookname, url)) # print some debug info for p in parser: logger.debug("domain: " + p.dname + " total chapters: " + str(len(p.chapter_list))) # check the count scid, count = calc_chap_count(scid, ecid, len(parser[0].chapter_list)) logger.info('reading raw data url %s count %d', parser[0].index_page, count) for i in range(count): raw_data = "" new_url = "" chapid = scid + i for p in parser: logger.debug('start read raw data for chapter %d', chapid); if new_url == "": raw_title, raw_data = p.get_processed_data(chapid) else: raw_title, raw_data = p.get_processed_data(url=new_url) logger.debug('end read raw data for chapter %d len %d', chapid, len(raw_data)); if len(raw_data) > 200: if new_url == "": url_list.append(p.chapter_list[chapid]) else: url_list.append(new_url) break else: if len(parser) > 1: new_dname = parser[parser.index(p) + 1].dname logger.warning('Invalid url %s try using %s ', p.chapter_list[chapid], new_dname) new_url = get_url_from_user(p.chapter_list[chapid], new_dname) raw_data = "" if len(raw_data) <= 200: logger.error("Raw data is null") logger.warning('Invalid url %s', p.chapter_list[chapid]) if continue_check(): continue else: raise(ValueError) raw_data_list.append(raw_data) cid_list.append(chapid) title_list.append("Chapter " + str(chapid)) logger.info('Total number of chapters read %d', len(raw_data_list)); if count > 0: for index in range(len(cid_list)): if book.is_chapter_exist(cid_list[index]): logger.debug('Updating chapter id %d to db', cid_list[index]); book.update_chapter_raw(cid_list[index], raw_data_list[index]) else: logger.debug('Adding chapter id %d to db', cid_list[index]); book.add_chapter(cid_list[index], title_list[index], raw_data_list[index], weblink=url_list[index])