Exemplo n.º 1
0
def rows2txt():
    db = DdwDb()
    count = db.get_book_count()["count"]

    pageSize = 100
    page = count / pageSize + 1

    for i in range(0, page + 1):
        step = i * pageSize + 94
        rows = db.list(step)
        if rows is not None:
            for item in rows:
                print "book -------------------------------" + str(item["id"])
                path = mkdir(item["id"])
                chs = db.get_chapters(item["id"])
                if chs is not None:
                    for ch in chs:
                        # db.update_book_last_one(item['id'], one['id'], one['title'], one['create_at'])
                        chtext = db.get_chapter_text(ch["id"])
                        if chtext is not None:
                            try:
                                file_path = os.path.join(path, str(ch["id"]))
                                text = htmlstrip(chtext["text"])
                                # write to file
                                write2file(file_path, text)
                                # update chapter size and text
                                db.update_chapter_size(ch["id"], len(text))
                                db.update_chapter_text(ch["id"], text)
                                print ch["id"]
                            except Exception, ex:
                                log("Error: ch " + str(ch["id"]) + " " + str(Exception) + ":" + str(ex))
Exemplo n.º 2
0
def rows2txt ():
    db = DdwDb()
    count = db.get_book_count()['count']

    pageSize = 100
    page = count / pageSize + 1

    for i in range(0,page+1):
        step = i * pageSize+94
        rows = db.list(step)
        if rows is not None:
            for item in rows:
                print "book -------------------------------" + str(item["id"])
                path = mkdir(item["id"])
                chs = db.get_chapters(item['id'])
                if chs is not None:
                    for ch in chs:
                        #db.update_book_last_one(item['id'], one['id'], one['title'], one['create_at'])
                        chtext = db.get_chapter_text(ch['id'])
                        if chtext is not None:
                            try:
                                file_path = os.path.join(path, str(ch['id']))
                                text = htmlstrip(chtext["text"])
                                #write to file
                                write2file(file_path, text)
                                #update chapter size and text
                                db.update_chapter_size(ch['id'], len(text))
                                db.update_chapter_text(ch['id'], text)
                                print ch['id']
                            except Exception,ex:
                                log("Error: ch "+ str(ch['id']) + " " + str(Exception) + ":" + str(ex) )
Exemplo n.º 3
0
def write_urls():
    db = DdwDb()
    print os.path.dirname(__file__)
    _tmp_path = "D:\\liubaikui\\jiushulou\\script\\urls"
    _count = 24221
    for i in range(24154, _count + 1):
        book = db.get_book(i)
        if book is not None:
            chs = db.get_chapters(i)
            if chs is not None and len(chs) > 0:
                print "write to " + str(i)
                path = os.path.join(_tmp_path, str(i))
                obj_file = open(path, 'w+')
                urls = []
                for var in chs:
                    t = db.get_chapter_text(var['id'])
                    if t is None:
                        urls.append(str(var['id']) + "\n")
                #print urls
                obj_file.writelines(urls)
                obj_file.close()
Exemplo n.º 4
0
def write_urls ():
    db = DdwDb()
    print os.path.dirname(__file__)
    _tmp_path = "D:\\liubaikui\\jiushulou\\script\\urls"
    _count = 24221
    for i in range(24154, _count+1):
        book = db.get_book(i)
        if book is not None:
            chs = db.get_chapters(i)
            if chs is not None and len(chs) > 0:
                print "write to " + str(i)
                path = os.path.join(_tmp_path, str(i))
                obj_file = open(path, 'w+')
                urls = []
                for var in chs:
                    t = db.get_chapter_text(var['id'])
                    if t is None:
                        urls.append(str(var['id'])+"\n")
                #print urls
                obj_file.writelines(urls)
                obj_file.close()
Exemplo n.º 5
0
def save_text (id):
    dw = DdwDb()
    book = dw.get_book(id)
    if book is not None:
        print '-------------begin '+ str(id)+'-------------'
        chs = dw.get_chapters(id)
        if chs is not None or len(chs) > 0:
            for var in chs:
                print var['id']
                text = ''
                t = dw.get_chapter_text(var['id'])
                if t is None:
                    try:
                        text = get_chapter(id, var['id'])
                        if text is not None and text != '':
                            dw.insert_chapter_text(var['id'], text)
                            print '-------------end of '+ str(id)+'-----------'
                    except Exception,ex:
                        log("Error: get_chapter "+ str(id) + " " + str(Exception)+":"+str(ex) )
                print "Info: chapter "+str(var['id'])+" exists"
        else:
            print 'Info: have no chapters'
Exemplo n.º 6
0
Arquivo: ddw.py Projeto: wangjun/novel
def save_text(id):
    dw = DdwDb()
    book = dw.get_book(id)
    if book is not None:
        print '-------------begin ' + str(id) + '-------------'
        chs = dw.get_chapters(id)
        if chs is not None or len(chs) > 0:
            for var in chs:
                print var['id']
                text = ''
                t = dw.get_chapter_text(var['id'])
                if t is None:
                    try:
                        text = get_chapter(id, var['id'])
                        if text is not None and text != '':
                            dw.insert_chapter_text(var['id'], text)
                            print '-------------end of ' + str(
                                id) + '-----------'
                    except Exception, ex:
                        log("Error: get_chapter " + str(id) + " " +
                            str(Exception) + ":" + str(ex))
                print "Info: chapter " + str(var['id']) + " exists"
        else:
            print 'Info: have no chapters'