def main(): import argparse import sys sys.path.append("../../helpers") import sourcefile parser = argparse.ArgumentParser(description='Footnotes checker PGDP PP.') parser.add_argument('filename', metavar='FILENAME', type=str, help='input text file') args = parser.parse_args() myfile = sourcefile.SourceFile() myfile.load_text(args.filename) if myfile.text is None: print("Cannot read file", f) return x = FootNotes() x.check_footnotes(myfile) print(x.anchor_ranges) print(x.fn_found) print(x.anchor_not_found)
def index_source(self, source_directory: str = "", specific_pages: [str] = []) -> None: """ Indexes source directory. Defaults to self.directory; specific filenames can be provided as string list """ source_path = os.path.join(self.directory, source_directory) source_filenames = [ file for file in os.listdir(source_path) if os.path.isfile(os.path.join(source_path, file)) ] for filename in source_filenames: if len(specific_pages) > 0: if filename not in specific_pages: continue self.source_files.append( sourcefile.SourceFile(self.directory, os.path.join(source_directory, filename)))
def main(): import argparse import sys sys.path.append("../helpers") import sourcefile parser = argparse.ArgumentParser(description='Quotes checker PGDP PP.') parser.add_argument('--encoding', dest='encoding', help='force document encoding (latin1, utf-8, ...)', default=None) parser.add_argument('filename', metavar='FILENAME', type=str, help='input text file') help_string = 'Type of quotes to check:' for i, q in enumerate(quotes, start=1): help_string += " " + str(i) + " = " + q['open'] + " ... " + q['close'] parser.add_argument('type', type=int, help=help_string, default=0) args = parser.parse_args() myfile = sourcefile.SourceFile() myfile.load_text(args.filename) if myfile.text is None: print("Cannot read file", f) return x = check() if args.type == 0: for q in quotes: x.check_quotes(myfile, q) else: x.check_quotes(myfile, quotes[args.type - 1]) for qkey, errors in x.unbalanced.items(): print(qkey) for err in errors: print(err[0], err[1], err[2])
def test_kpoints(): import argparse import os import sys import sourcefile myfile = sourcefile.SourceFile() myfile.load_xhtml("data/testfiles/kpoints.html") assert (myfile.tree) kp = KPoints() kp.check_points(myfile) # There should be 2 strings to check assert ("centimes. traduct" in kp.point_matches) assert ("dot. or" in kp.point_matches) assert (len(kp.point_matches) == 2)
self.point_matches = sorted(set(self.point_matches)) if __name__ == '__main__': import argparse import os import sourcefile parser = argparse.ArgumentParser( description='Diff text document for PGDP PP.') parser.add_argument('filename', metavar='FILENAME', type=str, help='input text file') args = parser.parse_args() kp = KPoints() myfile = sourcefile.SourceFile() myfile.load_xhtml(args.filename) kp.check_points(myfile) for x in sorted(kp.point_matches): print(x)