def run(project_name, data_dir, book_dir, epub_dir, books_file, failure_books, target_dir, category='', book_format='epub3'): # rename success products if not os.path.exists(book_dir): raise Exception('product book directory not exists') if not os.path.exists(data_dir): raise Exception('data directory not exists') if not os.path.exists(epub_dir): raise Exception('epub directory not exists') if not os.path.exists(books_file): raise Exception('books file not exists') for fname in os.listdir(book_dir): name, ext = os.path.splitext(fname) if category: new_fname = '_'.join([name, project_name, category, book_format]) + ext else: new_fname = '_'.join([name, project_name, book_format]) + ext shutil.move(os.sep.join([book_dir, fname]), os.sep.join([book_dir, new_fname])) # move failure books books = Books.create_from_file(books_file) new_books = Books() target_books_file = os.sep.join([target_dir, 'books.jl']) for book in failure_books: bookname = book['bookname'] filename = book['filename'] dirname = os.path.splitext(filename)[0] source_data_file = os.sep.join([data_dir, filename]) target_data_file = os.sep.join([target_dir, filename]) source_epub_dir = os.sep.join([epub_dir, dirname]) target_epub_dir = os.sep.join([target_dir, dirname]) if not os.path.exists(target_epub_dir): shutil.copytree(source_epub_dir, target_epub_dir) if not os.path.exists(target_data_file): shutil.copy(source_data_file, target_data_file) bk = books.get_book(filename) if bk: new_books.add_book(bk, True) new_books.dump_to_file(target_books_file)
def run(source_dir, target_dir, books_file, sitename, bookformat): if not os.path.exists(source_dir): raise Exception('source directory not exists: %s' % source_dir) if not os.path.exists(target_dir): raise Exception('target directory not exists: %s' % target_dir) if not os.path.exists(books_file): raise Exception('books file not exists: %s' % books_file) bookinfo = {} books = Books() for bk in Books.create_from_file(books_file).get_books(): en_name = bk.get_en_name() if en_name in bookinfo: raise Exception('book en_name duplicate: %s' % en_name) if not bk.get_filename(): bk.set_filename(en_name+'.jl') bk.set_sitename(sitename) bk.set_format(bookformat) bookinfo[en_name] = bk data_count = 0 other_count = 0 whitelist = set(['books.jl']) for fname in os.listdir(source_dir): source_file = os.sep.join([source_dir, fname]) if os.path.isdir(source_file): continue if fname in whitelist: continue if fname.endswith('.jl'): # source_file = os.sep.join([source_dir, fname]) target_file = os.sep.join([target_dir, fname]) article_count = 0 word_count = 0 bookname = os.path.splitext(fname)[0] category = '' sub_category = '' with open(source_file, 'r', encoding='utf8') as rf, open(target_file, 'w', encoding='utf8') as wf: for line in rf: line = line.strip() if not line: continue article = json.loads(line) article['title'] = clean_article_title(article['title']) article['content'], count = clean_article_content(article['content']) wf.write(json.dumps(article, ensure_ascii=False)+'\n') article_count += 1 word_count += count if not category: category = article.get('category', '') if not sub_category: sub_category = article.get('sub_category', '') if not bookname: raise Exception('data json has no book field') if bookname not in bookinfo: raise Exception('books file has no [%s]' % bookname) bk = bookinfo[bookname] bk.set_category(category) bk.set_sub_category(sub_category) bk.set_articlecount(article_count) bk.set_wordcount(word_count) books.add_book(bk) data_count += 1 else: shutil.copy(source_file, target_dir) other_count += 1 books.dump_to_file(os.sep.join([target_dir, 'books.jl'])) return [data_count, other_count]