示例#1
0
def run(project_name, data_dir, book_dir, epub_dir, books_file, failure_books, target_dir, category='', book_format='epub3'):
	# rename success products
	if not os.path.exists(book_dir):
		raise Exception('product book directory not exists')
	if not os.path.exists(data_dir):
		raise Exception('data directory not exists')
	if not os.path.exists(epub_dir):
		raise Exception('epub directory not exists')
	if not os.path.exists(books_file):
		raise Exception('books file not exists')

	for fname in os.listdir(book_dir):
		name, ext = os.path.splitext(fname)
		if category:
			new_fname = '_'.join([name, project_name, category, book_format]) + ext
		else:
			new_fname = '_'.join([name, project_name, book_format]) + ext
		shutil.move(os.sep.join([book_dir, fname]), os.sep.join([book_dir, new_fname]))

	# move failure books
	books = Books.create_from_file(books_file)
	new_books = Books()
	target_books_file = os.sep.join([target_dir, 'books.jl'])
	for book in failure_books:
		bookname = book['bookname']
		filename = book['filename']
		dirname = os.path.splitext(filename)[0]
		source_data_file = os.sep.join([data_dir, filename])
		target_data_file = os.sep.join([target_dir, filename])
		source_epub_dir = os.sep.join([epub_dir, dirname])
		target_epub_dir = os.sep.join([target_dir, dirname])
		if not os.path.exists(target_epub_dir):
			shutil.copytree(source_epub_dir, target_epub_dir)
		if not os.path.exists(target_data_file):
			shutil.copy(source_data_file, target_data_file)
		bk = books.get_book(filename)
		if bk:
			new_books.add_book(bk, True)

	new_books.dump_to_file(target_books_file)
示例#2
0
def run(source_dir, target_dir, books_file, sitename, bookformat):
	if not os.path.exists(source_dir):
		raise Exception('source directory not exists: %s' % source_dir)
	if not os.path.exists(target_dir):
		raise Exception('target directory not exists: %s' % target_dir)
	if not os.path.exists(books_file):
		raise Exception('books file not exists: %s' % books_file)
	
	bookinfo = {}
	books = Books()
	for bk in Books.create_from_file(books_file).get_books():
		en_name = bk.get_en_name()
		if en_name in bookinfo:
			raise Exception('book en_name duplicate: %s' % en_name)
		if not bk.get_filename():
			bk.set_filename(en_name+'.jl')
		bk.set_sitename(sitename)
		bk.set_format(bookformat)
		bookinfo[en_name] = bk

	data_count = 0
	other_count = 0
	whitelist = set(['books.jl'])
	for fname in os.listdir(source_dir):
		source_file = os.sep.join([source_dir, fname])
		if os.path.isdir(source_file):
			continue
		if fname in whitelist:
			continue
		if fname.endswith('.jl'):
			# source_file = os.sep.join([source_dir, fname])
			target_file = os.sep.join([target_dir, fname])
			article_count = 0
			word_count = 0
			bookname = os.path.splitext(fname)[0]
			category = ''
			sub_category = ''
			with open(source_file, 'r', encoding='utf8') as rf, open(target_file, 'w', encoding='utf8') as wf:
				for line in rf:
					line = line.strip()
					if not line:
						continue
					article = json.loads(line)
					article['title'] = clean_article_title(article['title'])
					article['content'], count = clean_article_content(article['content'])
					wf.write(json.dumps(article, ensure_ascii=False)+'\n')
					article_count += 1
					word_count += count
					if not category:
						category = article.get('category', '')
					if not sub_category:
						sub_category = article.get('sub_category', '')
			if not bookname:
				raise Exception('data json has no book field')
			if bookname not in bookinfo:
				raise Exception('books file has no [%s]' % bookname)
			bk = bookinfo[bookname]
			bk.set_category(category)
			bk.set_sub_category(sub_category)
			bk.set_articlecount(article_count)
			bk.set_wordcount(word_count)
			books.add_book(bk)
			data_count += 1
		else:
			shutil.copy(source_file, target_dir)
			other_count += 1
	
	books.dump_to_file(os.sep.join([target_dir, 'books.jl']))
	return [data_count, other_count]