Пример #1
0
def update_raw_book(bookname, scid, ecid):
    logger.debug("Enter update book func")
    book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename)
    parser = []
    count = 0
    raw_data_list = []
    cid_list = []
    title_list = []
    url_list = []
    for url in book.read_book_url():
        uparse = urlparse.urlparse(url)
        dname = uparse.netloc
        logger.debug('update_raw_book() url:%s dname:%s', url, dname);
        # add parser instance
        if dname == "www.ranwen.org":
            logger.info("using ranwen parser")
            parser.append(RanWenDotOrgParser(bookname, url, loglevel=parser_log_level))
        elif dname == "tw.hjwzw.com":
            logger.info("using tv hjwzw parser")
            parser.append(TwDotHjwzwDotComParser(bookname, url, loglevel=parser_log_level))
        elif dname == "tw.bsxsw.com":
            logger.info("using tv bsxsw parser")
            parser.append(TwBsxswDotComParser(bookname, url, loglevel=parser_log_level))
        elif dname == "tw.fxnzw.com":
            logger.info("using tv fxnzw parser")
            parser.append(TwFxnzwDotComParser(bookname, url, loglevel=parser_log_level))
        elif dname == "tw.zhsxs.com":
            logger.info("using tv zhsxs parser")
            parser.append(TwZhsxsDotComParser(bookname, url, loglevel=parser_log_level))
        elif dname == "www.shumilou.co":
            logger.info("using shumilou.co parser")
            parser.append(ShumilouCoParser(bookname, url, loglevel=parser_log_level))
        elif dname == "www.feizw.com":
            logger.info("using feizw.com parser")
            parser.append(FeizwDotComParser(bookname, url, loglevel=parser_log_level))
        else:
            logger.info("using default parser")
	    parser.append(TnovelParser(bookname, url))
    # print some debug info
    for p in parser:
        logger.debug("domain: " + p.dname + " total chapters: " + str(len(p.chapter_list)))
    # check the count
    scid, count = calc_chap_count(scid, ecid, len(parser[0].chapter_list))
    logger.info('reading raw data url %s count %d', parser[0].index_page, count)
    for i in range(count):
        raw_data = ""
        new_url = ""
        chapid = scid + i
        for p in parser:
            logger.debug('start read raw data for chapter %d', chapid);
            if new_url == "":
                raw_title, raw_data = p.get_processed_data(chapid)
            else:
                raw_title, raw_data = p.get_processed_data(url=new_url)
            logger.debug('end read raw data for chapter %d len %d', chapid, len(raw_data));
            if len(raw_data) > 200:
                if new_url == "":
                    url_list.append(p.chapter_list[chapid])
                else:
                    url_list.append(new_url)
                break
            else:
                if len(parser) > 1:
                    new_dname = parser[parser.index(p) + 1].dname
                    logger.warning('Invalid url %s try using %s ', p.chapter_list[chapid], new_dname)
                    new_url = get_url_from_user(p.chapter_list[chapid], new_dname)
                    raw_data = ""
        if len(raw_data) <= 200:
            logger.error("Raw data is null")
            logger.warning('Invalid url %s', p.chapter_list[chapid])
            if continue_check():
                continue
            else:
                raise(ValueError)

        raw_data_list.append(raw_data)
        cid_list.append(chapid)
        title_list.append("Chapter " + str(chapid))

    logger.info('Total number of chapters read %d', len(raw_data_list));
    if count > 0:
        for index in range(len(cid_list)):
                if book.is_chapter_exist(cid_list[index]):
                    logger.debug('Updating chapter id %d to db', cid_list[index]);
                    book.update_chapter_raw(cid_list[index], raw_data_list[index])
                else:
                    logger.debug('Adding chapter id %d to db', cid_list[index]);
                    book.add_chapter(cid_list[index], title_list[index], raw_data_list[index], weblink=url_list[index])