示例#1
0
def ezpubParseBook(bookname, scid, ecid):
    sections = []
    paragraph = ''
    logger.debug("ezpub parse book func start")
    book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename)
    scid, count = calc_chap_count(scid, ecid, book.get_chapter_count())
    bookdata = book.read_book()
    logger.debug("creating epub scid %d count %d", scid, count)
    for chapid,chaptitle,chapdata,rawdata in bookdata:
        if chapid < scid or chapid > scid + count:
                continue
        logger.debug("reading chapid %d for epub", chapid)
        section = ez_epub.Section()
        section.css = """.em { font-style: italic; }"""
        section.title = chaptitle
        sections.append(section)
        chapdata = chapdata.encode('ascii','ignore')
        for line in chapdata.splitlines():
            fparagraph = formatParagraph(line)
            for f in fparagraph:
                try:
                    f[0].decode('ascii')
                except UnicodeDecodeError:
                    print f
                    print "it was not a ascii-encoded unicode string"
            section.text.append(fparagraph)

    logger.debug("ezpub parse book func end")
    return sections
示例#2
0
def createepub_book(bookname, scid, ecid):
    epubbook = ez_epub.Book()
    book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename)
    epubbook.title = bookname
    epubbook.authors = list(book.read_book_author())
    epubbook.sections = ezpubParseBook(bookname, scid, ecid)
    epubbook.make(r'./epub/%s' % epubbook.title)
示例#3
0
def etranslate_book(bookname, scid, ecid):
    logger.debug("Enter etranslate book func")
    book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename)
    etransmap = book.read_book_etransmap()
    translator = TNovelETranslator(name=bookname, etransmap=etransmap)
    bookdata = book.read_book()
    raw_data_list = []
    chapid_list = []
    cid_list = []
    transcount = 0
    count = 0
    scid, chapcount = calc_chap_count(scid, ecid, book.get_chapter_count())
    logger.debug("traslate book scid %d count %d book:%d", scid, chapcount, len(bookdata))
    for index in range(chapcount):
        cid = scid + index
        if book.is_chapter_exist(cid):
            continue
        else:
            cid_list.append(cid)
    if len(cid_list) > 0:
        logger.debug('Following chapters are not available')
        logger.debug(cid_list)
        raise(ValueError)
    for chapid,chaptitle,chapdata,rawdata in bookdata:
        if chapid < scid or chapid >= (scid + chapcount):
                continue
        logger.debug("chapid id %d is withing the range %d-%d", chapid, scid, scid+chapcount)
        chapid_list.append(chapid)
        raw_data_list.append(chapdata)
        count = count + 1
        if count >= 200:
            logger.debug("in loop going to etranslate chapter count %d", len(raw_data_list))
            trans_data_list = translator.translate(raw_data_list)
            for cid in range(len(chapid_list)):
                book.update_chapter_data(chapid_list[cid],trans_data_list[cid])
                transcount = transcount + 1
            chapid_list = []
            raw_data_list = []
            count = 0

    logger.debug("going to etranslate chapter count %d", len(raw_data_list))
    if count > 0:
        trans_data_list = translator.translate(raw_data_list)
        for cid in range(len(chapid_list)):
            book.update_chapter_data(chapid_list[cid],trans_data_list[cid])
            transcount = transcount + 1

    logger.debug('total number of chapters etranslated %d', transcount)
示例#4
0
def toolkit_book(bookname, scid, ecid):
    logger.debug("Enter toolkit book func")
    btempname = '/tmp/' + bookname + '-temp.txt'
    sdelimitter = "\nStart of chapter id:"
    edelimitter = "\nEnd of chapter id:"
    book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename)
    transmap = book.read_book_transmap()
    translator = TNovelTKTranslator(name=bookname, transmap=transmap)
    bookdata = book.read_book()
    raw_data_list = []
    chapid_list = []
    trans_chapid_list = []
    trans_chapdata_list = []
    cid_list = []
    transcount = 0
    count = 0
    scid, chapcount = calc_chap_count(scid, ecid, book.get_chapter_count())
    logger.debug("traslate toolkit book scid %d count %d book:%d", scid, chapcount, len(bookdata))
    for index in range(chapcount):
        cid = scid + index
        if book.is_chapter_exist(cid):
            continue
        else:
            cid_list.append(cid)
    if len(cid_list) > 0:
        logger.debug('Following chapters are not available')
        logger.debug(cid_list)
        raise(ValueError)
    for chapid,chaptitle,chapdata,rawdata in bookdata:
        if chapid < scid or chapid >= (scid + chapcount):
                continue
        logger.debug("chapid id %d is withing the range %d-%d", chapid, scid, scid+chapcount)
        chapid_list.append(chapid)
        raw_data_list.append(rawdata)
        count = count + 1

    logger.debug("going to etranslate chapter count %d", len(raw_data_list))
    if count > 0:
        trans_data_list = translator.translate(raw_data_list)
        with codecs.open(btempname, 'w', 'utf-8') as outfile:
            for cid in range(len(chapid_list)):
                outfile.write(sdelimitter+str(chapid_list[cid])+"\n")
                outfile.write(trans_data_list[cid])
                outfile.write(edelimitter+str(chapid_list[cid])+"\n")
        raw_input("Press any key once the data is translated")
        with codecs.open(btempname, 'r', 'utf-8') as infile:
            data_list = infile.readlines()
            new_chapter = []
            end_chapter = False
            for data in data_list:
                if sdelimitter.strip() in data:
                    trans_chapid_list.append(int(data.split(':')[1].strip()))
                    end_chapter = False
                    new_chapter = []
                    continue
                if edelimitter.strip() in data:
                    end_chapter = True
                    trans_chapdata_list.append(''.join(new_chapter))
                    new_chapter = []
                    continue
                if end_chapter is False:
                    if data.strip() != '':
                        new_chapter.append(data)
        for cid in range(len(trans_chapid_list)):
            book.update_chapter_data(trans_chapid_list[cid],trans_chapdata_list[cid])
            transcount = transcount + 1

    logger.debug('total number of chapters etranslated %d', transcount)
示例#5
0
def update_raw_book(bookname, scid, ecid):
    logger.debug("Enter update book func")
    book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename)
    parser = []
    count = 0
    raw_data_list = []
    cid_list = []
    title_list = []
    url_list = []
    for url in book.read_book_url():
        uparse = urlparse.urlparse(url)
        dname = uparse.netloc
        logger.debug('update_raw_book() url:%s dname:%s', url, dname);
        # add parser instance
        if dname == "www.ranwen.org":
            logger.info("using ranwen parser")
            parser.append(RanWenDotOrgParser(bookname, url, loglevel=parser_log_level))
        elif dname == "tw.hjwzw.com":
            logger.info("using tv hjwzw parser")
            parser.append(TwDotHjwzwDotComParser(bookname, url, loglevel=parser_log_level))
        elif dname == "tw.bsxsw.com":
            logger.info("using tv bsxsw parser")
            parser.append(TwBsxswDotComParser(bookname, url, loglevel=parser_log_level))
        elif dname == "tw.fxnzw.com":
            logger.info("using tv fxnzw parser")
            parser.append(TwFxnzwDotComParser(bookname, url, loglevel=parser_log_level))
        elif dname == "tw.zhsxs.com":
            logger.info("using tv zhsxs parser")
            parser.append(TwZhsxsDotComParser(bookname, url, loglevel=parser_log_level))
        elif dname == "www.shumilou.co":
            logger.info("using shumilou.co parser")
            parser.append(ShumilouCoParser(bookname, url, loglevel=parser_log_level))
        elif dname == "www.feizw.com":
            logger.info("using feizw.com parser")
            parser.append(FeizwDotComParser(bookname, url, loglevel=parser_log_level))
        else:
            logger.info("using default parser")
	    parser.append(TnovelParser(bookname, url))
    # print some debug info
    for p in parser:
        logger.debug("domain: " + p.dname + " total chapters: " + str(len(p.chapter_list)))
    # check the count
    scid, count = calc_chap_count(scid, ecid, len(parser[0].chapter_list))
    logger.info('reading raw data url %s count %d', parser[0].index_page, count)
    for i in range(count):
        raw_data = ""
        new_url = ""
        chapid = scid + i
        for p in parser:
            logger.debug('start read raw data for chapter %d', chapid);
            if new_url == "":
                raw_title, raw_data = p.get_processed_data(chapid)
            else:
                raw_title, raw_data = p.get_processed_data(url=new_url)
            logger.debug('end read raw data for chapter %d len %d', chapid, len(raw_data));
            if len(raw_data) > 200:
                if new_url == "":
                    url_list.append(p.chapter_list[chapid])
                else:
                    url_list.append(new_url)
                break
            else:
                if len(parser) > 1:
                    new_dname = parser[parser.index(p) + 1].dname
                    logger.warning('Invalid url %s try using %s ', p.chapter_list[chapid], new_dname)
                    new_url = get_url_from_user(p.chapter_list[chapid], new_dname)
                    raw_data = ""
        if len(raw_data) <= 200:
            logger.error("Raw data is null")
            logger.warning('Invalid url %s', p.chapter_list[chapid])
            if continue_check():
                continue
            else:
                raise(ValueError)

        raw_data_list.append(raw_data)
        cid_list.append(chapid)
        title_list.append("Chapter " + str(chapid))

    logger.info('Total number of chapters read %d', len(raw_data_list));
    if count > 0:
        for index in range(len(cid_list)):
                if book.is_chapter_exist(cid_list[index]):
                    logger.debug('Updating chapter id %d to db', cid_list[index]);
                    book.update_chapter_raw(cid_list[index], raw_data_list[index])
                else:
                    logger.debug('Adding chapter id %d to db', cid_list[index]);
                    book.add_chapter(cid_list[index], title_list[index], raw_data_list[index], weblink=url_list[index])
示例#6
0
def remove_book(bookname):
    logger.debug("Enter remove book func")
    book = Tnovel(bookname, dbfile=dbfilename)
    book.remove_book(author)
示例#7
0
def update_transmap(bookname, mapobj, emapobj):
    logger.debug("Enter udpate tranamap func")
    book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename)
    logger.debug("updating transmap prop")
    book.update_transmap(mapobj.read(), emapobj.read())
示例#8
0
def add_book(bookname, author, urllist, mapobj, emapobj):
    logger.debug("Enter add book func")
    book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename)
    logger.debug("updating book prop")
    book.update_book(author, '~'.join(urllist), mapobj.read(), emapobj.read())