def clean(txt, startline=1, pretty_quote=True, correct_word_break=None, guess_chapter=True, guess_parasep=False): from ptxt2ftxt import ptxt2ftxt, ftxtclean from ftxt2markdown import ftxt2markdown txt = ptxt2ftxt(txt, startline) txt = ftxtclean(txt, pretty_quote, correct_word_break) txt = ftxt2markdown(txt, guess_chapter, guess_parasep) return txt
def clean(txt, startline=1, pretty_quote=True, correct_word_break=None, guess_chapter=True, guess_parasep=False): from ptxt2ftxt import ptxt2ftxt, ftxtclean from ftxt2markdown import ftxt2markdown txt = ptxt2ftxt(txt, startline) txt = ftxtclean(txt, pretty_quote, correct_word_break) #open("dbg.txt","w").write(txt.encode('utf-8')) txt = ftxt2markdown(txt, guess_chapter, guess_parasep) return txt
def run(self, path_to_ebook): print("reformatter: "+path_to_ebook) f = open(path_to_ebook, 'r') raw = f.read() encoding = force_encoding(raw, True) print("Detected encoding: ", encoding) txt = unicode(raw, encoding, errors='replace') # reformat if prefs['reformat']: print("reformatting...") from ptxt2ftxt import ptxt2ftxt, ftxtclean from ftxt2markdown import ftxt2markdown txt = ptxt2ftxt(txt, para_by_mark=prefs['para_by_mark']) txt = ftxtclean(txt, pretty_quote=prefs['pretty_quote'], correct_word_break=prefs['correct_word_break']) txt = ftxt2markdown(txt, guessChapter=prefs['guess_chapter'], guessParaSep=prefs['insert_empty_paragraph']) # save as temporary file tempfile = self.temporary_file('.txt') tempfile.write( txt.encode('utf-8') ) tempfile.close() print("save as ", tempfile.name) return tempfile.name