def print_spine() -> int: """ Entry point for `se print-spine` """ parser = argparse.ArgumentParser(description="Print the <spine> element for the given Standard Ebooks source directory to standard output, for use in that directory’s content.opf.") parser.add_argument("-i", "--in-place", action="store_true", help="overwrite the <spine> element in content.opf instead of printing to stdout") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if not args.in_place and len(args.directories) > 1: se.print_error("Multiple directories are only allowed with the [bash]--in-place[/] option.") return se.InvalidArgumentsException.code for directory in args.directories: try: se_epub = SeEpub(directory) except se.SeException as ex: se.print_error(ex) return ex.code if args.in_place: se_epub.metadata_xml = regex.sub(r"\s*<spine>.+?</spine>", "\n\t" + "\n\t".join(se_epub.generate_spine().splitlines()), se_epub.metadata_xml, flags=regex.DOTALL) with open(se_epub.metadata_file_path, "r+", encoding="utf-8") as file: file.write(se_epub.metadata_xml) file.truncate() else: print(se_epub.generate_spine()) return 0
def recompose_epub() -> int: """ Entry point for `se recompose-epub` """ parser = argparse.ArgumentParser(description="Recompose a Standard Ebooks source directory into a single (X?)HTML5 file, and print to standard output.") parser.add_argument("-o", "--output", metavar="FILE", type=str, default="", help="a file to write output to instead of printing to standard output") parser.add_argument("-x", "--xhtml", action="store_true", help="output XHTML instead of HTML5") parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory") args = parser.parse_args() try: se_epub = SeEpub(args.directory) recomposed_epub = se_epub.recompose(args.xhtml) if args.output: with open(args.output, "w", encoding="utf-8") as file: file.write(recomposed_epub) file.truncate() else: print(recomposed_epub) except se.SeException as ex: se.print_error(ex) return ex.code except Exception as ex: se.print_error("Couldn’t write to output file.") return se.InvalidFileException.code return 0
def build() -> int: """ Entry point for `se build` """ parser = argparse.ArgumentParser(description="Build compatible .epub and pure .epub3 ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.") parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo") parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file; if --kindle is also specified and epubcheck fails, don’t create a Kindle file") parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle") parser.add_argument("-o", "--output-dir", dest="output_directory", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist") parser.add_argument("-p", "--proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof") parser.add_argument("-t", "--covers", dest="build_covers", action="store_true", help="output the cover and a cover thumbnail; can only be used when there is a single build target") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if args.build_covers and len(args.directories) > 1: se.print_error("--covers option specified, but more than one build target specified.") return se.InvalidInputException.code for directory in args.directories: try: se_epub = SeEpub(directory) se_epub.build(args.check, args.build_kobo, args.build_kindle, Path(args.output_directory), args.proof, args.build_covers, args.verbose) except se.SeException as ex: se.print_error(ex, args.verbose) return ex.code return 0
def reorder_endnotes() -> int: """ Entry point for `se reorder-endnotes` """ parser = argparse.ArgumentParser(description="Increment the specified endnote and all following endnotes by 1.") group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-d", "--decrement", action="store_true", help="decrement the target endnote number and all following endnotes") group.add_argument("-i", "--increment", action="store_true", help="increment the target endnote number and all following endnotes") parser.add_argument("target_endnote_number", metavar="ENDNOTE-NUMBER", type=se.is_positive_integer, help="the endnote number to start reordering at") parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory") args = parser.parse_args() try: if args.increment: step = 1 else: step = -1 se_epub = SeEpub(args.directory) se_epub.reorder_endnotes(args.target_endnote_number, step) except se.SeException as ex: se.print_error(ex) return ex.code return 0
def renumber_endnotes(plain_output: bool) -> int: """ Entry point for `se renumber-endnotes` """ parser = argparse.ArgumentParser( description= "Renumber all endnotes and noterefs sequentially from the beginning, taking care to match noterefs and endnotes if possible." ) parser.add_argument( "-b", "--brute-force", action="store_true", help= "renumber without checking that noterefs and endnotes match; may result in endnotes with empty backlinks or noterefs without matching endnotes" ) parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() return_code = 0 for directory in args.directories: try: se_epub = SeEpub(directory) except se.SeException as ex: se.print_error(ex, plain_output=plain_output) return_code = ex.code return return_code try: if args.brute_force: se_epub.recreate_endnotes() else: found_endnote_count, changed_endnote_count = se_epub.generate_endnotes( ) if args.verbose: print( se.prep_output( f"Found {found_endnote_count} endnote{'s' if found_endnote_count != 1 else ''} and changed {changed_endnote_count} endnote{'s' if changed_endnote_count != 1 else ''}.", plain_output)) except se.SeException as ex: se.print_error(ex, plain_output=plain_output) return_code = ex.code except FileNotFoundError: se.print_error("Couldn’t find [path]endnotes.xhtml[/].", plain_output=plain_output) return_code = se.InvalidSeEbookException.code return return_code
def print_toc() -> int: """ Entry point for `se print-toc` The meat of this function is broken out into the generate_toc.py module for readability and maintainability. """ parser = argparse.ArgumentParser( description= "Build a table of contents for an SE source directory and print to stdout." ) parser.add_argument( "-i", "--in-place", action="store_true", help= "overwrite the existing toc.xhtml file instead of printing to stdout") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if not args.in_place and len(args.directories) > 1: se.print_error( "Multiple directories are only allowed with the [bash]--in-place[/] option." ) return se.InvalidArgumentsException.code for directory in args.directories: try: se_epub = SeEpub(directory) except se.SeException as ex: se.print_error(ex) return ex.code try: if args.in_place: toc_path = se_epub.path / "src/epub/toc.xhtml" with open(toc_path, "r+", encoding="utf-8") as file: file.write(se_epub.generate_toc()) file.truncate() else: print(se_epub.generate_toc()) except se.SeException as ex: se.print_error(ex) return ex.code except FileNotFoundError as ex: se.print_error( f"Couldn’t open file: [path][link=file://{toc_path}]{toc_path}[/][/]." ) return se.InvalidSeEbookException.code return 0
def build_toc(plain_output: bool) -> int: """ Entry point for `se build-toc` The meat of this function is broken out into the se_epub_generate_toc.py module for readability and maintainability. """ parser = argparse.ArgumentParser( description= "Generate the table of contents for the ebook’s source directory and update the ToC file." ) parser.add_argument( "-s", "--stdout", action="store_true", help="print to stdout intead of writing to the ToC file") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if args.stdout and len(args.directories) > 1: se.print_error( "Multiple directories are only allowed without the [bash]--stdout[/] option.", plain_output=plain_output) return se.InvalidArgumentsException.code for directory in args.directories: try: se_epub = SeEpub(directory) except se.SeException as ex: se.print_error(ex) return ex.code try: if args.stdout: print(se_epub.generate_toc()) else: toc = se_epub.generate_toc() with open(se_epub.toc_path, "w", encoding="utf-8") as file: file.write(toc) except se.SeException as ex: se.print_error(ex) return ex.code except FileNotFoundError as ex: se.print_error( f"Couldn’t open file: [path][link=file://{se_epub.toc_path}]{se_epub.toc_path}[/][/].", plain_output=plain_output) return se.InvalidSeEbookException.code return 0
def build() -> int: """ Entry point for `se build` """ parser = argparse.ArgumentParser(description="Build compatible .epub and pure .epub3 ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.") parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo") parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file; if --kindle is also specified and epubcheck fails, don’t create a Kindle file") parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle") parser.add_argument("-o", "--output-dir", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist") parser.add_argument("-p", "--proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof") parser.add_argument("-t", "--covers", dest="build_covers", action="store_true", help="output the cover and a cover thumbnail; can only be used when there is a single build target") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() last_output_was_exception = False return_code = 0 console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel if args.build_covers and len(args.directories) > 1: se.print_error("[bash]--covers[/] option specified, but more than one build target specified.") return se.InvalidInputException.code for directory in args.directories: exception = None directory = Path(directory).resolve() if args.verbose or exception: # Print the header console.print(f"Building [path][link=file://{directory}]{directory}[/][/] ... ", end="") try: se_epub = SeEpub(directory) se_epub.build(args.check, args.build_kobo, args.build_kindle, Path(args.output_dir), args.proof, args.build_covers) except se.SeException as ex: exception = ex return_code = se.BuildFailedException.code # Print a newline after we've printed an exception if last_output_was_exception and (args.verbose or exception): console.print("") last_output_was_exception = False if exception: if args.verbose: console.print("") se.print_error(exception, args.verbose) last_output_was_exception = True elif args.verbose: console.print("OK") return return_code
def shift_endnotes(plain_output: bool) -> int: """ Entry point for `se shift-endnotes` """ parser = argparse.ArgumentParser( description= "Increment or decrement the specified endnote and all following endnotes by 1 or a specified amount." ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument( "-d", "--decrement", action="store_true", help="decrement the target endnote number and all following endnotes") group.add_argument( "-i", "--increment", action="store_true", help="increment the target endnote number and all following endnotes") parser.add_argument( "-a", "--amount", metavar="NUMBER", dest="amount", default=1, type=se.is_positive_integer, help="the amount to increment or decrement by; defaults to 1") parser.add_argument("target_endnote_number", metavar="ENDNOTE-NUMBER", type=se.is_positive_integer, help="the endnote number to start shifting at") parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory") args = parser.parse_args() return_code = 0 try: if args.increment: step = args.amount else: step = args.amount * -1 se_epub = SeEpub(args.directory) se_epub.shift_endnotes(args.target_endnote_number, step) except se.SeException as ex: se.print_error(ex, plain_output=plain_output) return_code = ex.code return return_code
def build_spine(plain_output: bool) -> int: """ Entry point for `se build-spine` """ parser = argparse.ArgumentParser( description= "Generate the <spine> element for the given Standard Ebooks source directory and write it to the ebook’s metadata file." ) parser.add_argument( "-s", "--stdout", action="store_true", help="print to stdout instead of writing to the metadata file") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if args.stdout and len(args.directories) > 1: se.print_error( "Multiple directories are only allowed without the [bash]--stdout[/] option.", plain_output=plain_output) return se.InvalidArgumentsException.code for directory in args.directories: try: se_epub = SeEpub(directory) if args.stdout: print(se_epub.generate_spine().to_string()) else: nodes = se_epub.metadata_dom.xpath("/package/spine") if nodes: for node in nodes: node.replace_with(se_epub.generate_spine()) else: for node in se_epub.metadata_dom.xpath("/package"): node.append(se_epub.generate_spine()) with open(se_epub.metadata_file_path, "w", encoding="utf-8") as file: file.write( se.formatting.format_xml( se_epub.metadata_dom.to_string())) except se.SeException as ex: se.print_error(ex) return ex.code return 0
def recompose_epub(plain_output: bool) -> int: """ Entry point for `se recompose-epub` """ parser = argparse.ArgumentParser( description= "Recompose a Standard Ebooks source directory into a single (X?)HTML5 file, and print to standard output." ) parser.add_argument( "-o", "--output", metavar="FILE", type=str, default="", help="a file to write output to instead of printing to standard output" ) parser.add_argument("-x", "--xhtml", action="store_true", help="output XHTML instead of HTML5") parser.add_argument( "-e", "--extra-css-file", metavar="FILE", type=str, default=None, help= "the path to an additional CSS file to include after any CSS files in the epub" ) parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory") args = parser.parse_args() try: se_epub = SeEpub(args.directory) recomposed_epub = se_epub.recompose(args.xhtml, args.extra_css_file) if args.output: with open(args.output, "w", encoding="utf-8") as file: file.write(recomposed_epub) else: print(recomposed_epub) except se.SeException as ex: se.print_error(ex, plain_output=plain_output) return ex.code except Exception as ex: se.print_error("Couldn’t recompose epub.") return se.InvalidFileException.code return 0
def recompose_epub() -> int: """ Entry point for `se recompose-epub` """ parser = argparse.ArgumentParser(description="Recompose a Standard Ebooks source directory into a single HTML5 file, and print to standard output.") parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory") args = parser.parse_args() try: se_epub = SeEpub(args.directory) print(se_epub.recompose()) except se.SeException as ex: se.print_error(ex) return ex.code return 0
def renumber_endnotes() -> int: """ Entry point for `se renumber-endnotes` """ parser = argparse.ArgumentParser( description= "Renumber all endnotes and noterefs sequentially from the beginning.") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() return_code = 0 for directory in args.directories: try: se_epub = SeEpub(directory) except se.SeException as ex: se.print_error(ex) return_code = ex.code try: found_endnote_count, changed_endnote_count = se_epub.generate_endnotes( ) if args.verbose: print( f"Found {found_endnote_count} endnote{'s' if found_endnote_count != 1 else ''} and changed {changed_endnote_count} endnote{'s' if changed_endnote_count != 1 else ''}." ) except se.SeException as ex: se.print_error(ex) return_code = ex.code except FileNotFoundError: se.print_error("Couldn’t find [path]endnotes.xhtml[/].") return_code = se.InvalidSeEbookException.code return return_code
def renumber_endnotes() -> int: """ Entry point for `se renumber-endnotes` """ parser = argparse.ArgumentParser( description= "Renumber all endnotes and noterefs sequentially from the beginning.") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() for directory in args.directories: try: se_epub = SeEpub(directory) except se.SeException as ex: se.print_error(ex) return ex.code try: report = se_epub.generate_endnotes( ) # returns a report on actions taken if args.verbose: print(report) except se.SeException as ex: se.print_error(ex) return ex.code except FileNotFoundError: se.print_error("Couldn’t find `endnotes.xhtml`.") return se.InvalidSeEbookException.code return 0
def build_images() -> int: """ Entry point for `se build-images` """ parser = argparse.ArgumentParser( description= "Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/." ) parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() for directory in args.directories: directory = Path(directory) if args.verbose: print(f"Processing {directory} ...") directory = directory.resolve() se_epub = SeEpub(directory) try: if args.verbose: print("\tCleaning metadata ...", end="", flush=True) # Remove useless metadata from cover source files for root, _, filenames in os.walk(directory): for filename in fnmatch.filter(filenames, "cover.*"): se.images.remove_image_metadata(Path(root) / filename) if args.verbose: print(" OK") print("\tBuilding cover.svg ...", end="", flush=True) se_epub.generate_cover_svg() if args.verbose: print(" OK") print("\tBuilding titlepage.svg ...", end="", flush=True) se_epub.generate_titlepage_svg() if args.verbose: print(" OK") except se.SeException as ex: se.print_error(ex) return ex.code return 0
def prepare_release() -> int: """ Entry point for `se prepare-release` """ parser = argparse.ArgumentParser(description="Calculate work word count, insert release date if not yet set, and update modified date and revision number.") parser.add_argument("-n", "--no-word-count", dest="word_count", action="store_false", help="don’t calculate word count") parser.add_argument("-r", "--no-revision", dest="revision", action="store_false", help="don’t increment the revision number") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() for directory in args.directories: directory = Path(directory).resolve() if args.verbose: print("Processing {} ...".format(directory)) try: se_epub = SeEpub(directory) if args.word_count: if args.verbose: print("\tUpdating word count and reading ease ...", end="", flush=True) se_epub.update_word_count() se_epub.update_flesch_reading_ease() if args.verbose: print(" OK") if args.revision: if args.verbose: print("\tUpdating revision number ...", end="", flush=True) se_epub.set_release_timestamp() if args.verbose: print(" OK") except se.SeException as ex: se.print_error(ex) return ex.code return 0
def build_images() -> int: """ Entry point for `se build-images` """ parser = argparse.ArgumentParser(description="Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/.") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() for directory in args.directories: directory = Path(directory) if args.verbose: print("Processing {} ...".format(directory)) directory = directory.resolve() se_epub = SeEpub(directory) try: if args.verbose: print("\tBuilding cover.svg ...", end="", flush=True) se_epub.generate_cover_svg() if args.verbose: print(" OK") if args.verbose: print("\tBuilding titlepage.svg ...", end="", flush=True) se_epub.generate_titlepage_svg() if args.verbose: print(" OK") except se.SeException as ex: se.print_error(ex) return ex.code return 0
def lint(plain_output: bool) -> int: """ Entry point for `se lint` """ parser = argparse.ArgumentParser(description="Check for various Standard Ebooks style errors.") parser.add_argument("-s", "--skip-lint-ignore", action="store_true", help="ignore rules in se-lint-ignore.xml file") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() called_from_parallel = se.is_called_from_parallel(False) force_terminal = True if called_from_parallel else None # True will force colors, None will guess whether colors are enabled, False will disable colors first_output = True return_code = 0 # Rich needs to know the terminal width in order to format tables. # If we're called from Parallel, there is no width because Parallel is not a terminal. Thus we must export $COLUMNS before # invoking Parallel, and then get that value here. console = Console(width=int(os.environ["COLUMNS"]) if called_from_parallel and "COLUMNS" in os.environ else None, highlight=False, theme=se.RICH_THEME, force_terminal=force_terminal) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for directory in args.directories: directory = Path(directory).resolve() messages = [] exception = None table_data = [] has_output = False try: se_epub = SeEpub(directory) messages = se_epub.lint(args.skip_lint_ignore) except se.SeException as ex: exception = ex if len(args.directories) > 1: return_code = se.LintFailedException.code else: return_code = ex.code # Print a separator newline if more than one table is printed if not first_output and (args.verbose or messages or exception): console.print("") elif first_output: first_output = False # Print the table header if ((len(args.directories) > 1 or called_from_parallel) and (messages or exception)) or args.verbose: has_output = True if plain_output: console.print(directory) else: console.print(f"[reverse][path][link=file://{directory}]{directory}[/][/][/reverse]") if exception: has_output = True se.print_error(exception, plain_output=plain_output) # Print the tables if messages: has_output = True return_code = se.LintFailedException.code if plain_output: for message in messages: label = "[Manual Review]" if message.message_type == se.MESSAGE_TYPE_ERROR: label = "[Error]" # Replace color markup with ` message.text = se.prep_output(message.text, True) message_filename = "" if message.filename: message_filename = message.filename.name console.print(f"{message.code} {label} {message_filename} {message.text}") if message.submessages: for submessage in message.submessages: # Indent each line in case we have a multi-line submessage console.print(regex.sub(r"^", "\t", submessage, flags=regex.MULTILINE)) else: for message in messages: alert = "[bright_yellow]Manual Review[/bright_yellow]" if message.message_type == se.MESSAGE_TYPE_ERROR: alert = "[bright_red]Error[/bright_red]" # Add hyperlinks around message filenames message_filename = "" if message.filename: message_filename = f"[link=file://{message.filename.resolve()}]{message.filename.name}[/link]" table_data.append([message.code, alert, message_filename, message.text]) if message.submessages: for submessage in message.submessages: # Brackets don't need to be escaped in submessages if we instantiate them in Text() submessage_object = Text(submessage, style="dim") table_data.append([" ", " ", Text("→", justify="right"), submessage_object]) table = Table(show_header=True, header_style="bold", show_lines=True, expand=True) table.add_column("Code", width=5, no_wrap=True) table.add_column("Severity", no_wrap=True) table.add_column("File", no_wrap=True) table.add_column("Message", ratio=10) for row in table_data: table.add_row(row[0], row[1], row[2], row[3]) console.print(table) if args.verbose and not messages and not exception: if plain_output: console.print("OK") else: table = Table(show_header=False, box=box.SQUARE) table.add_column("", style="white on green4 bold") table.add_row("OK") console.print(table) # Print a newline if we're called from parallel and we just printed something, to # better visually separate output blocks if called_from_parallel and has_output: console.print("") return return_code
def lint() -> int: """ Entry point for `se lint` """ parser = argparse.ArgumentParser( description="Check for various Standard Ebooks style errors.") parser.add_argument("-n", "--no-colors", dest="colors", action="store_false", help="don’t use color or hyperlinks in output") parser.add_argument( "-p", "--plain", action="store_true", help="print plain text output, without tables or colors") parser.add_argument("-s", "--skip-lint-ignore", action="store_true", help="ignore rules in se-lint-ignore.xml file") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() called_from_parallel = se.is_called_from_parallel() first_output = True return_code = 0 console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=called_from_parallel ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for directory in args.directories: directory = Path(directory).resolve() messages = [] exception = None table_data = [] has_output = False try: se_epub = SeEpub(directory) messages = se_epub.lint(args.skip_lint_ignore) except se.SeException as ex: exception = ex if len(args.directories) > 1: return_code = se.LintFailedException.code else: return_code = ex.code # Print a separator newline if more than one table is printed if not first_output and (args.verbose or messages or exception): console.print("") elif first_output: first_output = False # Print the table header if ((len(args.directories) > 1 or called_from_parallel) and (messages or exception)) or args.verbose: has_output = True if args.plain: console.print(directory) else: console.print(f"[reverse]{directory}[/reverse]") if exception: has_output = True se.print_error(exception) # Print the tables if messages: has_output = True return_code = se.LintFailedException.code if args.plain: for message in messages: label = "Manual Review:" if message.message_type == se.MESSAGE_TYPE_ERROR: label = "Error:" # Replace color markup with ` message.text = regex.sub( r"\[(?:/|xhtml|xml|val|attr|val|class|path|url|text|bash|link)(?:=[^\]]*?)*\]", "`", message.text) message.text = regex.sub(r"`+", "`", message.text) message_filename = "" if message.filename: message_filename = message.filename.name console.print( f"{message.code} {label} {message_filename} {message.text}" ) if message.submessages: for submessage in message.submessages: # Indent each line in case we have a multi-line submessage console.print( regex.sub(r"^", "\t", submessage, flags=regex.MULTILINE)) else: for message in messages: alert = "Manual Review" if message.message_type == se.MESSAGE_TYPE_ERROR: alert = "Error" message_text = message.text if args.colors: if message.message_type == se.MESSAGE_TYPE_ERROR: alert = f"[bright_red]{alert}[/bright_red]" else: alert = f"[bright_yellow]{alert}[/bright_yellow]" # Add hyperlinks around message filenames message_filename = "" if message.filename: message_filename = f"[link=file://{message.filename.resolve()}]{message.filename.name}[/link]" else: # Replace color markup with ` message_text = regex.sub( r"\[(?:/|xhtml|xml|val|attr|val|class|path|url|text|bash|link)(?:=[^\]]*?)*\]", "`", message_text) message_text = regex.sub(r"`+", "`", message_text) message_filename = "" if message.filename: message_filename = message.filename.name table_data.append( [message.code, alert, message_filename, message_text]) if message.submessages: for submessage in message.submessages: # Brackets don't need to be escaped in submessages if we instantiate them in Text() if args.colors: submessage_object = Text(submessage, style="dim") else: submessage_object = Text(submessage) table_data.append([ " ", " ", Text("→", justify="right"), submessage_object ]) table = Table(show_header=True, header_style="bold", show_lines=True) table.add_column("Code", width=5, no_wrap=True) table.add_column("Severity", no_wrap=True) table.add_column("File", no_wrap=True) table.add_column("Message") for row in table_data: table.add_row(row[0], row[1], row[2], row[3]) console.print(table) if args.verbose and not messages and not exception: if args.plain: console.print("OK") else: table = Table(show_header=False, box=box.SQUARE) table.add_column( "", style="white on green4 bold" if args.colors else None) table.add_row("OK") console.print(table) # Print a newline if we're called from parallel and we just printed something, to # better visually separate output blocks if called_from_parallel and has_output: console.print("") return return_code
def lint() -> int: """ Entry point for `se lint` """ parser = argparse.ArgumentParser( description="Check for various Standard Ebooks style errors.") parser.add_argument( "-p", "--plain", action="store_true", help="print plain text output, without tables or colors") parser.add_argument("-n", "--no-colors", dest="colors", action="store_false", help="do not use colored output") parser.add_argument("-s", "--skip-lint-ignore", action="store_true", help="ignore rules in se-lint-ignore.xml file") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() first_output = True return_code = 0 for directory in args.directories: try: se_epub = SeEpub(directory) messages = se_epub.lint(args.skip_lint_ignore) except se.SeException as ex: se.print_error(ex) return ex.code table_data = [] # Print a separator newline if more than one table is printed if not first_output and (args.verbose or messages): print("") elif first_output: first_output = False # Print the table header if args.verbose or (messages and len(args.directories) > 1): if args.plain: print(se_epub.path) else: print(stylize(str(se_epub.path), attr("reverse"))) # Print the table if messages: return_code = se.LintFailedException.code if args.plain: for message in messages: label = "Manual Review:" if message.message_type == se.MESSAGE_TYPE_ERROR: label = "Error:" print( f"{message.code} {label} {message.filename} {message.text}" ) if message.submessages: for submessage in message.submessages: print(f"\t{submessage}") else: table_data.append([ stylize("Code", attr("bold")), stylize("Severity", attr("bold")), stylize("File", attr("bold")), stylize("Message", attr("bold")) ]) for message in messages: alert = "Manual Review" if message.message_type == se.MESSAGE_TYPE_ERROR: alert = "Error" message_text = message.text if args.colors: if message.message_type == se.MESSAGE_TYPE_ERROR: alert = stylize(alert, fg("red")) else: alert = stylize(alert, fg("yellow")) # By convention, any text within the message text that is surrounded in backticks # is rendered in blue message_text = regex.sub( r"`(.+?)`", stylize(r"\1", fg("light_blue")), message_text) table_data.append( [message.code, alert, message.filename, message_text]) if message.submessages: for submessage in message.submessages: table_data.append([" ", " ", "→", f"{submessage}"]) _print_table(table_data, 3) if args.verbose and not messages: if args.plain: print("OK") else: table_data.append([ stylize(" OK ", bg("green") + fg("white") + attr("bold")) ]) _print_table(table_data) return return_code
def lint() -> int: """ Entry point for `se lint` """ from termcolor import colored parser = argparse.ArgumentParser(description="Check for various Standard Ebooks style errors.") parser.add_argument("-p", "--plain", action="store_true", help="print plain output") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() first_output = True return_code = 0 for directory in args.directories: try: se_epub = SeEpub(directory) except se.SeException as ex: se.print_error(ex) return ex.code messages = se_epub.lint() table_data = [] # Print a separator newline if more than one table is printed if not first_output and (args.verbose or messages): print("") elif first_output: first_output = False # Print the table header if args.verbose or (messages and len(args.directories) > 1): if args.plain: print(se_epub.path) else: print(colored(se_epub.path, "white", attrs=["reverse"])) # Print the table if messages: return_code = se.LintFailedException.code if args.plain: for message in messages: if message.is_submessage: print("\t" + message.text) else: label = "Manual Review:" if message.message_type == se.MESSAGE_TYPE_ERROR: label = "Error:" print(label, message.filename, message.text) else: for message in messages: if message.is_submessage: table_data.append([" ", "→", "{}".format(message.text)]) else: alert = colored("Manual Review", "yellow") if message.message_type == se.MESSAGE_TYPE_ERROR: alert = colored("Error", "red") table_data.append([alert, message.filename, message.text]) se.print_table(table_data, 2) if args.verbose and not messages: if args.plain: print("OK") else: table_data.append([colored("OK", "green", attrs=["reverse"])]) se.print_table(table_data) return return_code
def prepare_release() -> int: """ Entry point for `se prepare-release` """ parser = argparse.ArgumentParser( description= "Calculate work word count, insert release date if not yet set, and update modified date and revision number." ) parser.add_argument("-w", "--no-word-count", dest="word_count", action="store_false", help="don’t calculate word count") parser.add_argument("-r", "--no-revision", dest="revision", action="store_false", help="don’t increment the revision number") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel() ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for directory in args.directories: directory = Path(directory).resolve() if args.verbose: console.print( f"Processing [path][link=file://{directory}]{directory}[/][/] ..." ) try: se_epub = SeEpub(directory) if args.word_count: if args.verbose: console.print("\tUpdating word count and reading ease ...", end="") se_epub.update_word_count() se_epub.update_flesch_reading_ease() if args.verbose: console.print(" OK") if args.revision: if args.verbose: console.print("\tUpdating revision number ...", end="") se_epub.set_release_timestamp() if args.verbose: console.print(" OK") except se.SeException as ex: se.print_error(ex) return ex.code return 0
def build_images(plain_output: bool) -> int: """ Entry point for `se build-images` """ parser = argparse.ArgumentParser( description= "Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/." ) parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel() ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for directory in args.directories: directory = Path(directory).resolve() if args.verbose: console.print( se.prep_output( f"Processing [path][link=file://{directory}]{directory}[/][/] ...", plain_output)) try: se_epub = SeEpub(directory) if args.verbose: console.print("\tCleaning metadata ...", end="") # Remove useless metadata from cover source files for file_path in directory.glob("**/cover.*"): se.images.remove_image_metadata(file_path) if args.verbose: console.print(" OK") console.print(se.prep_output( f"\tBuilding [path][link=file://{directory / 'src/epub/images/cover.svg'}]cover.svg[/][/] ...", plain_output), end="") se_epub.generate_cover_svg() if args.verbose: console.print(" OK") console.print(se.prep_output( f"\tBuilding [path][link=file://{directory / 'src/epub/images/titlepage.svg'}]titlepage.svg[/][/] ...", plain_output), end="") se_epub.generate_titlepage_svg() if args.verbose: console.print(" OK") except se.SeException as ex: se.print_error(ex) return ex.code return 0
def _create_draft(args: Namespace): """ Implementation for `se create-draft` """ # Put together some variables for later use authors = [] translators = [] illustrators = [] pg_producers = [] title = args.title.replace("'", "’") for author in args.author: authors.append({ "name": author.replace("'", "’"), "wiki_url": None, "nacoaf_url": None }) if args.translator: for translator in args.translator: translators.append({ "name": translator.replace("'", "’"), "wiki_url": None, "nacoaf_url": None }) if args.illustrator: for illustrator in args.illustrator: illustrators.append({ "name": illustrator.replace("'", "’"), "wiki_url": None, "nacoaf_url": None }) title_string = title if authors and authors[0]["name"].lower() != "anonymous": title_string += ", by " + _generate_contributor_string(authors, False) identifier = "" for author in authors: identifier += se.formatting.make_url_safe(author["name"]) + "_" identifier = identifier.rstrip("_") + "/" + se.formatting.make_url_safe( title) sorted_title = regex.sub(r"^(A|An|The) (.+)$", "\\2, \\1", title) if translators: title_string = title_string + ". Translated by " + _generate_contributor_string( translators, False) identifier = identifier + "/" for translator in translators: identifier += se.formatting.make_url_safe(translator["name"]) + "_" identifier = identifier.rstrip("_") if illustrators: title_string = title_string + ". Illustrated by " + _generate_contributor_string( illustrators, False) identifier = identifier + "/" for illustrator in illustrators: identifier += se.formatting.make_url_safe( illustrator["name"]) + "_" identifier = identifier.rstrip("_") repo_name = identifier.replace("/", "_") repo_path = Path(repo_name).resolve() if repo_path.is_dir(): raise se.InvalidInputException( f"Directory already exists: [path][link=file://{repo_path}]{repo_path}[/][/]." ) # Get data on authors for i, author in enumerate(authors): if not args.offline and author["name"].lower() != "anonymous": author["wiki_url"], author["nacoaf_url"] = _get_wikipedia_url( author["name"], True) # Get data on translators for i, translator in enumerate(translators): if not args.offline and translator["name"].lower() != "anonymous": translator["wiki_url"], translator[ "nacoaf_url"] = _get_wikipedia_url(translator["name"], True) # Get data on illlustrators for i, illustrator in enumerate(illustrators): if not args.offline and illustrator["name"].lower() != "anonymous": illustrator["wiki_url"], illustrator[ "nacoaf_url"] = _get_wikipedia_url(illustrator["name"], True) # Download PG HTML and do some fixups if args.pg_url: if args.offline: raise se.RemoteCommandErrorException( "Cannot download Project Gutenberg ebook when offline option is enabled." ) args.pg_url = args.pg_url.replace("http://", "https://") # Get the ebook metadata try: response = requests.get(args.pg_url) pg_metadata_html = response.text except Exception as ex: raise se.RemoteCommandErrorException( f"Couldn’t download Project Gutenberg ebook metadata page. Exception: {ex}" ) parser = etree.HTMLParser() dom = etree.parse(StringIO(pg_metadata_html), parser) # Get the ebook HTML URL from the metadata pg_ebook_url = None for node in dom.xpath("/html/body//a[contains(@type, 'text/html')]"): pg_ebook_url = regex.sub(r"^//", "https://", node.get("href")) pg_ebook_url = regex.sub(r"^/", "https://www.gutenberg.org/", pg_ebook_url) if not pg_ebook_url: raise se.RemoteCommandErrorException( "Could download ebook metadata, but couldn’t find URL for the ebook HTML." ) # Get the ebook LCSH categories pg_subjects = [] for node in dom.xpath( "/html/body//td[contains(@property, 'dcterms:subject')]"): if node.get("datatype") == "dcterms:LCSH": for subject_link in node.xpath("./a"): pg_subjects.append(subject_link.text.strip()) # Get the PG publication date pg_publication_year = None for node in dom.xpath("//td[@itemprop='datePublished']"): pg_publication_year = regex.sub(r".+?([0-9]{4})", "\\1", node.text) # Get the actual ebook URL try: response = requests.get(pg_ebook_url) pg_ebook_html = response.text except Exception as ex: raise se.RemoteCommandErrorException( f"Couldn’t download Project Gutenberg ebook HTML. Exception: {ex}" ) try: fixed_pg_ebook_html = fix_text(pg_ebook_html, uncurl_quotes=False) pg_ebook_html = se.strip_bom(fixed_pg_ebook_html) except Exception as ex: raise se.InvalidEncodingException( f"Couldn’t determine text encoding of Project Gutenberg HTML file. Exception: {ex}" ) # Try to guess the ebook language pg_language = "en-US" if "colour" in pg_ebook_html or "favour" in pg_ebook_html or "honour" in pg_ebook_html: pg_language = "en-GB" # Create necessary directories (repo_path / "images").mkdir(parents=True) (repo_path / "src" / "epub" / "css").mkdir(parents=True) (repo_path / "src" / "epub" / "images").mkdir(parents=True) (repo_path / "src" / "epub" / "text").mkdir(parents=True) (repo_path / "src" / "META-INF").mkdir(parents=True) is_pg_html_parsed = True # Write PG data if we have it if args.pg_url and pg_ebook_html: try: dom = etree.parse( StringIO(regex.sub(r"encoding=\".+?\"", "", pg_ebook_html)), parser) namespaces = {"re": "http://exslt.org/regular-expressions"} for node in dom.xpath( "//*[re:test(text(), '\\*\\*\\*\\s*Produced by.+')]", namespaces=namespaces): producers_text = regex.sub( r"^<[^>]+?>", "", etree.tostring(node, encoding=str, with_tail=False)) producers_text = regex.sub(r"<[^>]+?>$", "", producers_text) producers_text = regex.sub(r".+?Produced by (.+?)\s*$", "\\1", producers_text, flags=regex.DOTALL) producers_text = regex.sub(r"\(.+?\)", "", producers_text, flags=regex.DOTALL) producers_text = regex.sub(r"(at )?https?://www\.pgdp\.net", "", producers_text, flags=regex.DOTALL) producers_text = regex.sub(r"[\r\n]+", " ", producers_text, flags=regex.DOTALL) producers_text = regex.sub(r",? and ", ", and ", producers_text) producers_text = producers_text.replace( " and the Online", " and The Online") producers_text = producers_text.replace(", and ", ", ").strip() pg_producers = [ producer.strip() for producer in regex.split(',|;', producers_text) ] # Try to strip out the PG header for node in dom.xpath( "//*[re:test(text(), '\\*\\*\\*\\s*START OF THIS')]", namespaces=namespaces): for sibling_node in node.xpath("./preceding-sibling::*"): easy_node = se.easy_xml.EasyXmlElement(sibling_node) easy_node.remove() easy_node = se.easy_xml.EasyXmlElement(node) easy_node.remove() # Try to strip out the PG license footer for node in dom.xpath( "//*[re:test(text(), 'End of (the )?Project Gutenberg')]", namespaces=namespaces): for sibling_node in node.xpath("./following-sibling::*"): easy_node = se.easy_xml.EasyXmlElement(sibling_node) easy_node.remove() easy_node = se.easy_xml.EasyXmlElement(node) easy_node.remove() # lxml will but the xml declaration in a weird place, remove it first output = regex.sub(r"<\?xml.+?\?>", "", etree.tostring(dom, encoding="unicode")) # Now re-add it output = """<?xml version="1.0" encoding="utf-8"?>\n""" + output # lxml can also output duplicate default namespace declarations so remove the first one only output = regex.sub(r"(xmlns=\".+?\")(\sxmlns=\".+?\")+", r"\1", output) with open(repo_path / "src" / "epub" / "text" / "body.xhtml", "w", encoding="utf-8") as file: file.write(output) except OSError as ex: raise se.InvalidFileException( f"Couldn’t write to ebook directory. Exception: {ex}") except Exception as ex: # Save this error for later, because it's still useful to complete the create-draft process # even if we've failed to parse PG's HTML source. is_pg_html_parsed = False se.quiet_remove(repo_path / "src" / "epub" / "text" / "body.xhtml") # Copy over templates _copy_template_file("gitignore", repo_path / ".gitignore") _copy_template_file("LICENSE.md", repo_path) _copy_template_file("container.xml", repo_path / "src" / "META-INF") _copy_template_file("mimetype", repo_path / "src") _copy_template_file("content.opf", repo_path / "src" / "epub") _copy_template_file("onix.xml", repo_path / "src" / "epub") _copy_template_file("toc.xhtml", repo_path / "src" / "epub") _copy_template_file("core.css", repo_path / "src" / "epub" / "css") _copy_template_file("local.css", repo_path / "src" / "epub" / "css") _copy_template_file("se.css", repo_path / "src" / "epub" / "css") _copy_template_file("logo.svg", repo_path / "src" / "epub" / "images") _copy_template_file("colophon.xhtml", repo_path / "src" / "epub" / "text") _copy_template_file("imprint.xhtml", repo_path / "src" / "epub" / "text") _copy_template_file("titlepage.xhtml", repo_path / "src" / "epub" / "text") _copy_template_file("uncopyright.xhtml", repo_path / "src" / "epub" / "text") _copy_template_file("titlepage.svg", repo_path / "images") _copy_template_file("cover.jpg", repo_path / "images" / "cover.jpg") _copy_template_file("cover.svg", repo_path / "images" / "cover.svg") # Try to find Wikipedia links if possible ebook_wiki_url = None if not args.offline and title != "Short Fiction": # There's a "Short Fiction" Wikipedia article, so make an exception for that case ebook_wiki_url, _ = _get_wikipedia_url(title, False) # Pre-fill a few templates _replace_in_file(repo_path / "src" / "epub" / "text" / "titlepage.xhtml", "TITLE_STRING", title_string) _replace_in_file(repo_path / "images" / "titlepage.svg", "TITLE_STRING", title_string) _replace_in_file(repo_path / "images" / "cover.svg", "TITLE_STRING", title_string) # Create the titlepage SVG contributors = {} if args.translator: contributors["translated by"] = _generate_contributor_string( translators, False) if args.illustrator: contributors["illustrated by"] = _generate_contributor_string( illustrators, False) with open(repo_path / "images" / "titlepage.svg", "w", encoding="utf-8") as file: file.write( _generate_titlepage_svg(title, [author["name"] for author in authors], contributors, title_string)) # Create the cover SVG with open(repo_path / "images" / "cover.svg", "w", encoding="utf-8") as file: file.write( _generate_cover_svg(title, [author["name"] for author in authors], title_string)) # Build the cover/titlepage for distribution epub = SeEpub(repo_path) epub.generate_cover_svg() epub.generate_titlepage_svg() if args.pg_url: _replace_in_file(repo_path / "src" / "epub" / "text" / "imprint.xhtml", "PG_URL", args.pg_url) # Fill out the colophon with open(repo_path / "src" / "epub" / "text" / "colophon.xhtml", "r+", encoding="utf-8") as file: colophon_xhtml = file.read() colophon_xhtml = colophon_xhtml.replace("SE_IDENTIFIER", identifier) colophon_xhtml = colophon_xhtml.replace("TITLE", title) contributor_string = _generate_contributor_string(authors, True) if contributor_string == "": colophon_xhtml = colophon_xhtml.replace( " by<br/>\n\t\t\t<a href=\"AUTHOR_WIKI_URL\">AUTHOR</a>", contributor_string) else: colophon_xhtml = colophon_xhtml.replace( "<a href=\"AUTHOR_WIKI_URL\">AUTHOR</a>", contributor_string) if translators: translator_block = f"It was translated from ORIGINAL_LANGUAGE in TRANSLATION_YEAR by<br/>\n\t\t\t{_generate_contributor_string(translators, True)}.</p>" colophon_xhtml = colophon_xhtml.replace( "</p>\n\t\t\t<p>This ebook was produced for the<br/>", f"<br/>\n\t\t\t{translator_block}\n\t\t\t<p>This ebook was produced for the<br/>" ) if args.pg_url: colophon_xhtml = colophon_xhtml.replace("PG_URL", args.pg_url) if pg_publication_year: colophon_xhtml = colophon_xhtml.replace( "PG_YEAR", pg_publication_year) if pg_producers: producers_xhtml = "" for i, producer in enumerate(pg_producers): if "Distributed Proofread" in producer: producers_xhtml = producers_xhtml + "<a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a>" elif "anonymous" in producer.lower(): producers_xhtml = producers_xhtml + "<b class=\"name\">An Anonymous Volunteer</b>" else: producers_xhtml = producers_xhtml + f"<b class=\"name\">{_add_name_abbr(producer).strip('.')}</b>" if i < len(pg_producers) - 1: producers_xhtml = producers_xhtml + ", " if i == len(pg_producers) - 2: producers_xhtml = producers_xhtml + "and " producers_xhtml = producers_xhtml + "<br/>" colophon_xhtml = colophon_xhtml.replace( "<b class=\"name\">TRANSCRIBER_1</b>, <b class=\"name\">TRANSCRIBER_2</b>, and <a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a><br/>", producers_xhtml) file.seek(0) file.write(colophon_xhtml) file.truncate() # Fill out the metadata file with open(repo_path / "src" / "epub" / "content.opf", "r+", encoding="utf-8") as file: metadata_xml = file.read() metadata_xml = metadata_xml.replace("SE_IDENTIFIER", identifier) metadata_xml = metadata_xml.replace(">TITLE_SORT<", f">{sorted_title}<") metadata_xml = metadata_xml.replace(">TITLE<", f">{title}<") metadata_xml = metadata_xml.replace("VCS_IDENTIFIER", str(repo_name)) if pg_producers: producers_xhtml = "" i = 1 for producer in pg_producers: if "Distributed Proofread" in producer: producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">The Online Distributed Proofreading Team</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">Online Distributed Proofreading Team, The</meta>\n\t\t<meta property=\"se:url.homepage\" refines=\"#transcriber-{i}\">https://pgdp.net</meta>\n" elif "anonymous" in producer.lower(): producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">An Anonymous Volunteer</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">Anonymous Volunteer, An</meta>\n" else: producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">{producer.strip('.')}</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">TRANSCRIBER_SORT</meta>\n" producers_xhtml = producers_xhtml + f"\t\t<meta property=\"role\" refines=\"#transcriber-{i}\" scheme=\"marc:relators\">trc</meta>\n" i = i + 1 metadata_xml = regex.sub( r"\t\t<dc:contributor id=\"transcriber-1\">TRANSCRIBER</dc:contributor>\s*<meta property=\"file-as\" refines=\"#transcriber-1\">TRANSCRIBER_SORT</meta>\s*<meta property=\"se:url.homepage\" refines=\"#transcriber-1\">TRANSCRIBER_URL</meta>\s*<meta property=\"role\" refines=\"#transcriber-1\" scheme=\"marc:relators\">trc</meta>", "\t\t" + producers_xhtml.strip(), metadata_xml, flags=regex.DOTALL) if ebook_wiki_url: metadata_xml = metadata_xml.replace(">EBOOK_WIKI_URL<", f">{ebook_wiki_url}<") authors_xml = _generate_metadata_contributor_xml(authors, "author") authors_xml = authors_xml.replace("dc:contributor", "dc:creator") metadata_xml = regex.sub( r"<dc:creator id=\"author\">AUTHOR</dc:creator>.+?scheme=\"marc:relators\">aut</meta>", authors_xml, metadata_xml, flags=regex.DOTALL) if translators: translators_xml = _generate_metadata_contributor_xml( translators, "translator") metadata_xml = regex.sub( r"<dc:contributor id=\"translator\">.+?scheme=\"marc:relators\">trl</meta>", translators_xml, metadata_xml, flags=regex.DOTALL) else: metadata_xml = regex.sub( r"<dc:contributor id=\"translator\">.+?scheme=\"marc:relators\">trl</meta>\n\t\t", "", metadata_xml, flags=regex.DOTALL) if illustrators: illustrators_xml = _generate_metadata_contributor_xml( illustrators, "illustrator") metadata_xml = regex.sub( r"<dc:contributor id=\"illustrator\">.+?scheme=\"marc:relators\">ill</meta>", illustrators_xml, metadata_xml, flags=regex.DOTALL) else: metadata_xml = regex.sub( r"<dc:contributor id=\"illustrator\">.+?scheme=\"marc:relators\">ill</meta>\n\t\t", "", metadata_xml, flags=regex.DOTALL) if args.pg_url: if pg_subjects: subject_xhtml = "" i = 1 for subject in pg_subjects: subject_xhtml = subject_xhtml + f"\t\t<dc:subject id=\"subject-{i}\">{subject}</dc:subject>\n" i = i + 1 i = 1 for subject in pg_subjects: subject_xhtml = subject_xhtml + f"\t\t<meta property=\"authority\" refines=\"#subject-{i}\">LCSH</meta>\n" # Now, get the LCSH ID by querying LCSH directly. try: response = requests.get( f"https://id.loc.gov/search/?q=%22{urllib.parse.quote(subject)}%22" ) result = regex.search( fr"<a title=\"Click to view record\" href=\"/authorities/subjects/([^\"]+?)\">{regex.escape(subject.replace(' -- ', '--'))}</a>", response.text) loc_id = "Unknown" try: loc_id = result.group(1) except Exception as ex: pass subject_xhtml = subject_xhtml + f"\t\t<meta property=\"term\" refines=\"#subject-{i}\">{loc_id}</meta>\n" except Exception as ex: raise se.RemoteCommandErrorException( f"Couldn’t connect to [url][link=https://id.loc.gov]https://id.loc.gov[/][/]. Exception: {ex}" ) i = i + 1 metadata_xml = regex.sub( r"\t\t<dc:subject id=\"subject-1\">SUBJECT_1</dc:subject>\s*<dc:subject id=\"subject-2\">SUBJECT_2</dc:subject>\s*<meta property=\"authority\" refines=\"#subject-1\">LCSH</meta>\s*<meta property=\"term\" refines=\"#subject-1\">LCSH_ID_1</meta>\s*<meta property=\"authority\" refines=\"#subject-2\">LCSH</meta>\s*<meta property=\"term\" refines=\"#subject-2\">LCSH_ID_2</meta>", "\t\t" + subject_xhtml.strip(), metadata_xml) metadata_xml = metadata_xml.replace( "<dc:language>LANG</dc:language>", f"<dc:language>{pg_language}</dc:language>") metadata_xml = metadata_xml.replace( "<dc:source>PG_URL</dc:source>", f"<dc:source>{args.pg_url}</dc:source>") file.seek(0) file.write(metadata_xml) file.truncate() # Set up local git repo repo = git.Repo.init(repo_path) if args.email: with repo.config_writer() as config: config.set_value("user", "email", args.email) if args.pg_url and pg_ebook_html and not is_pg_html_parsed: raise se.InvalidXhtmlException( "Couldn’t parse Project Gutenberg ebook source. This is usually due to invalid HTML in the ebook." )
def build(plain_output: bool) -> int: """ Entry point for `se build` """ parser = argparse.ArgumentParser(description="Build compatible .epub and advanced .epub ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.") parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo") parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file, and the Nu Validator (v.Nu) to validate XHTML5; if Ace is installed, also validate using Ace; if --kindle is also specified and epubcheck, v.Nu, or Ace fail, don’t create a Kindle file") parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle") parser.add_argument("-o", "--output-dir", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist") parser.add_argument("-p", "--proof", dest="proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("-y", "--check-only", action="store_true", help="run tests used by --check but don’t output any ebook files and exit after checking") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() called_from_parallel = se.is_called_from_parallel(False) force_terminal = True if called_from_parallel else None # True will force colors, None will guess whether colors are enabled, False will disable colors first_output = True return_code = 0 # Rich needs to know the terminal width in order to format tables. # If we're called from Parallel, there is no width because Parallel is not a terminal. Thus we must export $COLUMNS before # invoking Parallel, and then get that value here. console = Console(width=int(os.environ["COLUMNS"]) if called_from_parallel and "COLUMNS" in os.environ else None, highlight=False, theme=se.RICH_THEME, force_terminal=force_terminal) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel if args.check_only and (args.check or args.build_kindle or args.build_kobo or args.proof or args.output_dir): se.print_error("The [bash]--check-only[/] option can’t be combined with any other flags except for [bash]--verbose[/].", plain_output=plain_output) return se.InvalidArgumentsException.code for directory in args.directories: directory = Path(directory).resolve() messages = [] exception = None table_data = [] has_output = False try: se_epub = SeEpub(directory) se_epub.build(args.check, args.check_only, args.build_kobo, args.build_kindle, Path(args.output_dir), args.proof) except se.BuildFailedException as ex: exception = ex messages = ex.messages except se.SeException as ex: se.print_error(ex, plain_output=plain_output) # Print a separator newline if more than one table is printed if not first_output and (args.verbose or messages or exception): console.print("") elif first_output: first_output = False # Print the table header if ((len(args.directories) > 1 or called_from_parallel) and (messages or exception)) or args.verbose: has_output = True if plain_output: console.print(directory) else: console.print(f"[reverse][path][link=file://{directory}]{directory}[/][/][/reverse]") if exception: has_output = True se.print_error(exception, plain_output=plain_output) # Print the tables if messages: has_output = True return_code = se.BuildFailedException.code if plain_output: for message in messages: # Replace color markup with ` message.text = se.prep_output(message.text, True) message_filename = "" if message.filename: message_filename = message.filename.name console.print(f"{message.source}: {message.code} {message_filename}{message.location if message.location else ''} {message.text}") else: for message in messages: # Add hyperlinks around message filenames message_filename = "" if message.filename: message_filename = f"[link=file://{message.filename}]{message.filename.name}[/link]{message.location if message.location else ''}" table_data.append([message.source, message.code, message_filename, message.text]) if message.submessages: for submessage in message.submessages: # Brackets don't need to be escaped in submessages if we instantiate them in Text() submessage_object = Text(submessage, style="dim") table_data.append([" ", " ", Text("→", justify="right"), submessage_object]) table = Table(show_header=True, header_style="bold", show_lines=True, expand=True) table.add_column("Source", width=9, no_wrap=True) table.add_column("Code", no_wrap=True) table.add_column("File", no_wrap=True) table.add_column("Message", ratio=10) for row in table_data: table.add_row(row[0], row[1], row[2], row[3]) console.print(table) if args.verbose and not messages and not exception: if plain_output: console.print("OK") else: table = Table(show_header=False, box=box.SQUARE) table.add_column("", style="white on green4 bold") table.add_row("OK") console.print(table) # Print a newline if we're called from parallel and we just printed something, to # better visually separate output blocks if called_from_parallel and has_output: console.print("") return return_code
def _create_draft(args: Namespace): """ Implementation for `se create-draft` """ # Put together some variables for later use identifier = se.formatting.make_url_safe( args.author) + "/" + se.formatting.make_url_safe(args.title) title_string = args.title.replace( "'", "’") + ", by " + args.author.replace("'", "’") sorted_title = regex.sub(r"^(A|An|The) (.+)$", "\\2, \\1", args.title) pg_producers = [] if args.translator: identifier = identifier + "/" + se.formatting.make_url_safe( args.translator) title_string = title_string + ". Translated by " + args.translator if args.illustrator: identifier = identifier + "/" + se.formatting.make_url_safe( args.illustrator) title_string = title_string + ". Illustrated by " + args.illustrator repo_name = identifier.replace("/", "_") repo_path = Path(repo_name).resolve() if repo_path.is_dir(): raise se.InvalidInputException( f"Directory already exists: [path][link=file://{repo_path}]{repo_path}[/][/]." ) # Download PG HTML and do some fixups if args.pg_url: if args.offline: raise se.RemoteCommandErrorException( "Cannot download Project Gutenberg ebook when offline option is enabled." ) args.pg_url = args.pg_url.replace("http://", "https://") # Get the ebook metadata try: response = requests.get(args.pg_url) pg_metadata_html = response.text except Exception as ex: raise se.RemoteCommandErrorException( f"Couldn’t download Project Gutenberg ebook metadata page. Exception: {ex}" ) soup = BeautifulSoup(pg_metadata_html, "lxml") # Get the ebook HTML URL from the metadata pg_ebook_url = None for element in soup.select("a[type^=\"text/html\"]"): pg_ebook_url = regex.sub(r"^//", "https://", element["href"]) pg_ebook_url = regex.sub(r"^/", "https://www.gutenberg.org/", pg_ebook_url) if not pg_ebook_url: raise se.RemoteCommandErrorException( "Could download ebook metadata, but couldn’t find URL for the ebook HTML." ) # Get the ebook LCSH categories pg_subjects = [] for element in soup.select("td[property=\"dcterms:subject\"]"): if element["datatype"] == "dcterms:LCSH": for subject_link in element.find("a"): pg_subjects.append(subject_link.strip()) # Get the PG publication date pg_publication_year = None for element in soup.select("td[itemprop=\"datePublished\"]"): pg_publication_year = regex.sub(r".+?([0-9]{4})", "\\1", element.text) # Get the actual ebook URL try: response = requests.get(pg_ebook_url) pg_ebook_html = response.text except Exception as ex: raise se.RemoteCommandErrorException( f"Couldn’t download Project Gutenberg ebook HTML. Exception: {ex}" ) try: fixed_pg_ebook_html = fix_text(pg_ebook_html, uncurl_quotes=False) pg_ebook_html = se.strip_bom(fixed_pg_ebook_html) except Exception as ex: raise se.InvalidEncodingException( f"Couldn’t determine text encoding of Project Gutenberg HTML file. Exception: {ex}" ) # Try to guess the ebook language pg_language = "en-US" if "colour" in pg_ebook_html or "favour" in pg_ebook_html or "honour" in pg_ebook_html: pg_language = "en-GB" # Create necessary directories (repo_path / "images").mkdir(parents=True) (repo_path / "src" / "epub" / "css").mkdir(parents=True) (repo_path / "src" / "epub" / "images").mkdir(parents=True) (repo_path / "src" / "epub" / "text").mkdir(parents=True) (repo_path / "src" / "META-INF").mkdir(parents=True) is_pg_html_parsed = True # Write PG data if we have it if args.pg_url and pg_ebook_html: try: soup = BeautifulSoup(pg_ebook_html, "html.parser") # Try to get the PG producers. We only try this if there's a <pre> block with the header info (which is not always the case) for element in soup(text=regex.compile(r"\*\*\*\s*Produced by.+$", flags=regex.DOTALL)): if element.parent.name == "pre": producers_text = regex.sub(r".+?Produced by (.+?)\s*$", "\\1", element, flags=regex.DOTALL) producers_text = regex.sub(r"\(.+?\)", "", producers_text, flags=regex.DOTALL) producers_text = regex.sub( r"(at )?https?://www\.pgdp\.net", "", producers_text, flags=regex.DOTALL) producers_text = regex.sub(r"[\r\n]+", " ", producers_text, flags=regex.DOTALL) producers_text = regex.sub(r",? and ", ", and ", producers_text) producers_text = producers_text.replace( " and the Online", " and The Online") producers_text = producers_text.replace(", and ", ", ").strip() pg_producers = producers_text.split(", ") # Try to strip out the PG header for element in soup(text=regex.compile(r"\*\*\*\s*START OF THIS")): for sibling in element.parent.find_previous_siblings(): sibling.decompose() element.parent.decompose() # Try to strip out the PG license footer for element in soup( text=regex.compile(r"End of (the )?Project Gutenberg")): for sibling in element.parent.find_next_siblings(): sibling.decompose() element.parent.decompose() with open(repo_path / "src" / "epub" / "text" / "body.xhtml", "w", encoding="utf-8") as file: file.write(str(soup)) except OSError as ex: raise se.InvalidFileException( f"Couldn’t write to ebook directory. Exception: {ex}") except: # Save this error for later, because it's still useful to complete the create-draft process # even if we've failed to parse PG's HTML source. is_pg_html_parsed = False se.quiet_remove(repo_path / "src" / "epub" / "text" / "body.xhtml") # Copy over templates _copy_template_file("gitignore", repo_path / ".gitignore") _copy_template_file("LICENSE.md", repo_path) _copy_template_file("container.xml", repo_path / "src" / "META-INF") _copy_template_file("mimetype", repo_path / "src") _copy_template_file("content.opf", repo_path / "src" / "epub") _copy_template_file("onix.xml", repo_path / "src" / "epub") _copy_template_file("toc.xhtml", repo_path / "src" / "epub") _copy_template_file("core.css", repo_path / "src" / "epub" / "css") _copy_template_file("local.css", repo_path / "src" / "epub" / "css") _copy_template_file("logo.svg", repo_path / "src" / "epub" / "images") _copy_template_file("colophon.xhtml", repo_path / "src" / "epub" / "text") _copy_template_file("imprint.xhtml", repo_path / "src" / "epub" / "text") _copy_template_file("titlepage.xhtml", repo_path / "src" / "epub" / "text") _copy_template_file("uncopyright.xhtml", repo_path / "src" / "epub" / "text") _copy_template_file("titlepage.svg", repo_path / "images") _copy_template_file("cover.jpg", repo_path / "images" / "cover.jpg") _copy_template_file("cover.svg", repo_path / "images" / "cover.svg") # Try to find Wikipedia links if possible if args.offline: author_wiki_url = None author_nacoaf_url = None ebook_wiki_url = None translator_wiki_url = None translator_nacoaf_url = None else: author_wiki_url, author_nacoaf_url = _get_wikipedia_url( args.author, True) ebook_wiki_url = None if args.title != "Short Fiction": # There's a "Short Fiction" Wikipedia article, so make an exception for that case ebook_wiki_url, _ = _get_wikipedia_url(args.title, False) translator_wiki_url = None if args.translator: translator_wiki_url, translator_nacoaf_url = _get_wikipedia_url( args.translator, True) # Pre-fill a few templates _replace_in_file(repo_path / "src" / "epub" / "text" / "titlepage.xhtml", "TITLE_STRING", title_string) _replace_in_file(repo_path / "images" / "titlepage.svg", "TITLE_STRING", title_string) _replace_in_file(repo_path / "images" / "cover.svg", "TITLE_STRING", title_string) # Create the titlepage SVG contributors = {} if args.translator: contributors["translated by"] = args.translator if args.illustrator: contributors["illustrated by"] = args.illustrator with open(repo_path / "images" / "titlepage.svg", "w", encoding="utf-8") as file: file.write( _generate_titlepage_svg(args.title, args.author, contributors, title_string)) # Create the cover SVG with open(repo_path / "images" / "cover.svg", "w", encoding="utf-8") as file: file.write(_generate_cover_svg(args.title, args.author, title_string)) # Build the cover/titlepage for distribution epub = SeEpub(repo_path) epub.generate_cover_svg() epub.generate_titlepage_svg() if args.pg_url: _replace_in_file(repo_path / "src" / "epub" / "text" / "imprint.xhtml", "PG_URL", args.pg_url) with open(repo_path / "src" / "epub" / "text" / "colophon.xhtml", "r+", encoding="utf-8") as file: colophon_xhtml = file.read() colophon_xhtml = colophon_xhtml.replace("SE_IDENTIFIER", identifier) colophon_xhtml = colophon_xhtml.replace(">AUTHOR<", f">{args.author}<") colophon_xhtml = colophon_xhtml.replace("TITLE", args.title) if author_wiki_url: colophon_xhtml = colophon_xhtml.replace("AUTHOR_WIKI_URL", author_wiki_url) if args.pg_url: colophon_xhtml = colophon_xhtml.replace("PG_URL", args.pg_url) if pg_publication_year: colophon_xhtml = colophon_xhtml.replace( "PG_YEAR", pg_publication_year) if pg_producers: producers_xhtml = "" for i, producer in enumerate(pg_producers): if "Distributed Proofread" in producer: producers_xhtml = producers_xhtml + "<a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a>" elif "anonymous" in producer.lower(): producers_xhtml = producers_xhtml + "<b class=\"name\">An Anonymous Volunteer</b>" else: producers_xhtml = producers_xhtml + f"<b class=\"name\">{producer.strip('.')}</b>" if i < len(pg_producers) - 1: producers_xhtml = producers_xhtml + ", " if i == len(pg_producers) - 2: producers_xhtml = producers_xhtml + "and " producers_xhtml = producers_xhtml + "<br/>" colophon_xhtml = colophon_xhtml.replace( "<b class=\"name\">TRANSCRIBER_1</b>, <b class=\"name\">TRANSCRIBER_2</b>, and <a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a><br/>", producers_xhtml) file.seek(0) file.write(colophon_xhtml) file.truncate() with open(repo_path / "src" / "epub" / "content.opf", "r+", encoding="utf-8") as file: metadata_xml = file.read() metadata_xml = metadata_xml.replace("SE_IDENTIFIER", identifier) metadata_xml = metadata_xml.replace(">AUTHOR<", f">{args.author}<") metadata_xml = metadata_xml.replace(">TITLE_SORT<", f">{sorted_title}<") metadata_xml = metadata_xml.replace(">TITLE<", f">{args.title}<") metadata_xml = metadata_xml.replace("VCS_IDENTIFIER", str(repo_name)) if pg_producers: producers_xhtml = "" i = 1 for producer in pg_producers: if "Distributed Proofread" in producer: producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">The Online Distributed Proofreading Team</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">Online Distributed Proofreading Team, The</meta>\n\t\t<meta property=\"se:url.homepage\" refines=\"#transcriber-{i}\">https://pgdp.net</meta>\n" elif "anonymous" in producer.lower(): producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">An Anonymous Volunteer</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">Anonymous Volunteer, An</meta>\n" else: producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">{producer.strip('.')}</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">TRANSCRIBER_SORT</meta>\n" producers_xhtml = producers_xhtml + f"\t\t<meta property=\"role\" refines=\"#transcriber-{i}\" scheme=\"marc:relators\">trc</meta>\n" i = i + 1 metadata_xml = regex.sub( r"\t\t<dc:contributor id=\"transcriber-1\">TRANSCRIBER</dc:contributor>\s*<meta property=\"file-as\" refines=\"#transcriber-1\">TRANSCRIBER_SORT</meta>\s*<meta property=\"se:url.homepage\" refines=\"#transcriber-1\">TRANSCRIBER_URL</meta>\s*<meta property=\"role\" refines=\"#transcriber-1\" scheme=\"marc:relators\">trc</meta>", "\t\t" + producers_xhtml.strip(), metadata_xml, flags=regex.DOTALL) if author_wiki_url: metadata_xml = metadata_xml.replace(">AUTHOR_WIKI_URL<", f">{author_wiki_url}<") if author_nacoaf_url: metadata_xml = metadata_xml.replace(">AUTHOR_NACOAF_URL<", f">{author_nacoaf_url}<") if ebook_wiki_url: metadata_xml = metadata_xml.replace(">EBOOK_WIKI_URL<", f">{ebook_wiki_url}<") if args.translator: metadata_xml = metadata_xml.replace(">TRANSLATOR<", f">{args.translator}<") if translator_wiki_url: metadata_xml = metadata_xml.replace( ">TRANSLATOR_WIKI_URL<", f">{translator_wiki_url}<") if translator_nacoaf_url: metadata_xml = metadata_xml.replace( ">TRANSLATOR_NACOAF_URL<", f">{translator_nacoaf_url}<") else: metadata_xml = regex.sub( r"<dc:contributor id=\"translator\">.+?<dc:contributor id=\"artist\">", "<dc:contributor id=\"artist\">", metadata_xml, flags=regex.DOTALL) if args.pg_url: if pg_subjects: subject_xhtml = "" i = 1 for subject in pg_subjects: subject_xhtml = subject_xhtml + f"\t\t<dc:subject id=\"subject-{i}\">{subject}</dc:subject>\n" i = i + 1 i = 1 for subject in pg_subjects: subject_xhtml = subject_xhtml + f"\t\t<meta property=\"authority\" refines=\"#subject-{i}\">LCSH</meta>\n" # Now, get the LCSH ID by querying LCSH directly. try: response = requests.get( f"https://id.loc.gov/search/?q=%22{urllib.parse.quote(subject)}%22" ) result = regex.search( fr"<a title=\"Click to view record\" href=\"/authorities/subjects/([^\"]+?)\">{regex.escape(subject.replace(' -- ', '--'))}</a>", response.text) loc_id = "Unknown" try: loc_id = result.group(1) except Exception as ex: pass subject_xhtml = subject_xhtml + f"\t\t<meta property=\"term\" refines=\"#subject-{i}\">{loc_id}</meta>\n" except Exception as ex: raise se.RemoteCommandErrorException( f"Couldn’t connect to [url][link=https://id.loc.gov]https://id.loc.gov[/][/]. Exception: {ex}" ) i = i + 1 metadata_xml = regex.sub( r"\t\t<dc:subject id=\"subject-1\">SUBJECT_1</dc:subject>\s*<dc:subject id=\"subject-2\">SUBJECT_2</dc:subject>\s*<meta property=\"authority\" refines=\"#subject-1\">LCSH</meta>\s*<meta property=\"term\" refines=\"#subject-1\">LCSH_ID_1</meta>\s*<meta property=\"authority\" refines=\"#subject-2\">LCSH</meta>\s*<meta property=\"term\" refines=\"#subject-2\">LCSH_ID_2</meta>", "\t\t" + subject_xhtml.strip(), metadata_xml) metadata_xml = metadata_xml.replace( "<dc:language>LANG</dc:language>", f"<dc:language>{pg_language}</dc:language>") metadata_xml = metadata_xml.replace( "<dc:source>PG_URL</dc:source>", f"<dc:source>{args.pg_url}</dc:source>") file.seek(0) file.write(metadata_xml) file.truncate() # Set up local git repo repo = git.Repo.init(repo_path) if args.email: with repo.config_writer() as config: config.set_value("user", "email", args.email) if args.pg_url and pg_ebook_html and not is_pg_html_parsed: raise se.InvalidXhtmlException( "Couldn’t parse Project Gutenberg ebook source. This is usually due to invalid HTML in the ebook." )
def build_manifest(plain_output: bool) -> int: """ Entry point for `se build-manifest` """ parser = argparse.ArgumentParser( description= "Generate the <manifest> element for the given Standard Ebooks source directory and write it to the ebook’s metadata file." ) parser.add_argument( "-s", "--stdout", action="store_true", help="print to stdout instead of writing to the metadata file") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if args.stdout and len(args.directories) > 1: se.print_error( "Multiple directories are only allowed without the [bash]--stdout[/] option.", plain_output=plain_output) return se.InvalidArgumentsException.code for directory in args.directories: try: se_epub = SeEpub(directory) if args.stdout: print(se_epub.generate_manifest().to_string()) else: nodes = se_epub.metadata_dom.xpath("/package/manifest") if nodes: for node in nodes: node.replace_with(se_epub.generate_manifest()) else: for node in se_epub.metadata_dom.xpath("/package"): node.append(se_epub.generate_manifest()) # If we have images in the manifest, add or remove some accessibility metadata while we're here access_mode_nodes = se_epub.metadata_dom.xpath( "/package/metadata/meta[@property='schema:accessMode' and text() = 'visual']" ) access_mode_sufficient_nodes = se_epub.metadata_dom.xpath( "/package/metadata/meta[@property='schema:accessibilityFeature' and text() = 'alternativeText']" ) if se_epub.metadata_dom.xpath( "/package/manifest/item[starts-with(@media-type, 'image/') and not(re:test(@href, 'images/(cover\\.svg||logo\\.svg|titlepage\\.svg)$'))]" ): # Add access modes if we have images if not access_mode_nodes: se_epub.metadata_dom.xpath( "/package/metadata/meta[@property='schema:accessMode' and text() = 'textual']" )[0].lxml_element.addnext( etree.XML( "<meta property=\"schema:accessMode\">visual</meta>" )) if not access_mode_sufficient_nodes: se_epub.metadata_dom.xpath( "/package/metadata/meta[@property='schema:accessModeSufficient']" )[0].lxml_element.addnext( etree.XML( "<meta property=\"schema:accessibilityFeature\">alternativeText</meta>" )) else: # If we don't have images, then remove any access modes that might be there erroneously for node in access_mode_nodes: node.remove() for node in access_mode_sufficient_nodes: node.remove() with open(se_epub.metadata_file_path, "w", encoding="utf-8") as file: file.write( se.formatting.format_xml( se_epub.metadata_dom.to_string())) except se.SeException as ex: se.print_error(ex) return ex.code return 0