def correct_hyperlinks(book_dir=BOOK_PATH, dest=None, include_tags=None, ext='.nlpiabak', skip_untitled=True): """ DEPRECATED (see translate_line_footnotes) Find bad footnotes (only urls), visit the page, add the title to the footnote >>> len(correct_hyperlinks(book_dir=BOOK_PATH, dest='cleaned_hyperlinks')) 2 >>> rm_rf(os.path.join(BOOK_PATH, 'cleaned_hyperlinks')) """ # bad_url_lines = find_all_bad_footnote_urls(book_dir=book_dir) # file_line_maps = [] return translate_book(translators=HyperlinkStyleCorrector().translate, book_dir=book_dir, dest=dest, include_tags=include_tags, ext=ext, skip_untitled=skip_untitled)
def translate_book(translators=(HyperlinkStyleCorrector().translate, translate_line_footnotes), book_dir=BOOK_PATH, dest=None, include_tags=None, ext='.nlpiabak', skip_untitled=True): """ Fix any style corrections listed in `translate` list of translation functions >>> len(translate_book(book_dir=BOOK_PATH, dest='cleaned_hyperlinks')) 3 >>> rm_rf(os.path.join(BOOK_PATH, 'cleaned_hyperlinks')) """ if callable(translators) or not hasattr(translators, '__len__'): translators = (translators, ) sections = get_tagged_sections(book_dir=book_dir, include_tags=include_tags) file_line_maps = [] for fileid, (filepath, tagged_lines) in enumerate(sections): log.info('filepath={}'.format(filepath)) destpath = filepath if not dest: copyfile(filepath, filepath + '.' + ext.lstrip('.')) elif os.path.sep in dest: destpath = os.path.join(dest, os.path.basename(filepath)) else: destpath = os.path.join(os.path.dirname(filepath), dest, os.path.basename(filepath)) ensure_dir_exists(os.path.dirname(destpath)) with open(destpath, 'w') as fout: log.info('destpath={}'.format(destpath)) for lineno, (tag, line) in enumerate(tagged_lines): if (include_tags is None or tag in include_tags or any( (tag.startswith(t) for t in include_tags))): for translate in translators: new_line = translate( line ) # TODO: be smarter about writing to files in-place if line != new_line: file_line_maps.append((fileid, lineno, filepath, destpath, line, new_line)) line = new_line fout.write(line) return file_line_maps