def main(): args = docopt(__doc__, version='meTypeset 0.1') bare_gv = GV(args) if args['--debug']: bare_gv.debug.enable_debug(args['--nogit']) nlm_instance = TeiToNlm(bare_gv) if args['process']: # run non-transform portions of teitonlm TeiToNlm(bare_gv).run(True, False) # run reference linker rl = ReferenceLinker(bare_gv) rl.run(args['--interactive']) rl.cleanup() bibliography_classifier = BibliographyClassifier(bare_gv) # run table classifier cc = CaptionClassifier(bare_gv) if int(args['--aggression']) > int( bare_gv.settings.get_setting( 'tablecaptions', None, domain='aggression')): cc.run_tables() if int(args['--aggression']) > int( bare_gv.settings.get_setting( 'graphiccaptions', None, domain='aggression')): cc.run_graphics() if args['--interactive']: bibliography_classifier.run_prompt(True) # process any bibliography entries that are possible BibliographyDatabase(bare_gv).run() # remove stranded titles manipulate = NlmManipulate(bare_gv) manipulate.final_clean() if args['--identifiers']: IdGenerator(bare_gv).run() if args['--chain']: # construct and run an XSLT chainer XslChain(bare_gv).run() if args['--clean']: ComplianceEnforcer(bare_gv).run()
def main(): args = docopt(__doc__, version='meTypeset 0.1') bare_gv = GV(args) if args['--debug']: bare_gv.debug.enable_debug(args['--nogit']) nlm_instance = TeiToNlm(bare_gv) if args['process']: # run non-transform portions of teitonlm TeiToNlm(bare_gv).run(True, False) # run reference linker rl = ReferenceLinker(bare_gv) rl.run(args['--interactive']) rl.cleanup() bibliography_classifier = BibliographyClassifier(bare_gv) # run table classifier cc = CaptionClassifier(bare_gv) if int(args['--aggression']) > int(bare_gv.settings.get_setting('tablecaptions', None, domain='aggression')): cc.run_tables() if int(args['--aggression']) > int(bare_gv.settings.get_setting('graphiccaptions', None, domain='aggression')): cc.run_graphics() if args['--interactive']: bibliography_classifier.run_prompt(True) # process any bibliography entries that are possible BibliographyDatabase(bare_gv).run() # remove stranded titles manipulate = NlmManipulate(bare_gv) manipulate.final_clean() if args['--identifiers']: IdGenerator(bare_gv).run() if args['--chain']: # construct and run an XSLT chainer XslChain(bare_gv).run() if args['--clean']: ComplianceEnforcer(bare_gv).run()
def run_modules(self): ag = int(self.gv.settings.args['--aggression']) self.debug.print_debug(self, u'Running at aggression level {0} {1}'.format(ag, "[grrr!]" if ag == 10 else "")) if ag > 10: self.debug.print_debug(self, "WARNING: safety bail-out features are disabled at aggression level 11") if self.args['bibscan']: BibliographyDatabase(self.gv).scan() else: # check for stylesheets self.gv.check_file_exists(self.gv.docx_style_sheet_dir) # metadata file gv.metadata_file = self.set_metadata_file() self.gv.mk_dir(self.gv.output_folder_path) if self.args['doc']: # run doc to docx conversion # then run docx to tei UnoconvToDocx(self.gv).run('doc') DocxToTei(self.gv).run(True, self.args['--proprietary']) elif self.args['odt']: # run odt to docx conversion # then run docx to tei UnoconvToDocx(self.gv).run('odt') DocxToTei(self.gv).run(True, self.args['--proprietary']) elif self.args['other']: # run other unoconv-supported format to docx conversion # then run docx to tei UnoconvToDocx(self.gv).run('unoconv') DocxToTei(self.gv).run(True, self.args['--proprietary']) elif self.args['docx']: # run docx to tei conversion # includes hooks for proprietary transforms if enabled DocxToTei(self.gv).run(True, self.args['--proprietary']) elif self.args['docxextracted']: self.debug.print_debug(self, u'Skipping docx extraction') DocxToTei(self.gv).run(False, self.args['--proprietary']) elif self.args['tei']: self.debug.print_debug(self, u'Skipping docx extraction; processing TEI file') DocxToTei(self.gv).run(False, self.args['--proprietary'], tei=True) if self.args['--puretei']: self.debug.print_debug(self, u'Exiting as TEI transform complete') return metadata = Metadata(self.gv) metadata.pre_clean() # run size classifier # aggression 5 SizeClassifier(self.gv).run() # run bibliographic addins handler # aggression 4 found_bibliography = BibliographyAddins(self.gv).run() # run list classifier # aggression 4 ListClassifier(self.gv).run() bibliography_classifier = BibliographyClassifier(self.gv) if not found_bibliography: # run bibliographic classifier # aggression 4 bibliography_classifier.run() # tei # aggression 3 TeiManipulate(self.gv).run() # run tei to nlm conversion TeiToNlm(self.gv).run(not found_bibliography) if self.gv.settings.args['--purenlm']: self.debug.print_debug(self, u'Exiting as NLM transform complete') return manipulate = NlmManipulate(self.gv) if not self.gv.used_list_method: manipulate.fuse_references() # run reference linker if not (self.args['--nolink']): rl = ReferenceLinker(self.gv) rl.run(self.args['--interactive']) rl.cleanup() # run table classifier cc = CaptionClassifier(self.gv) if int(self.args['--aggression']) > int(self.gv.settings.get_setting('tablecaptions', self, domain='aggression')): cc.run_tables() if int(self.args['--aggression']) > int(self.gv.settings.get_setting('graphiccaptions', self, domain='aggression')): cc.run_graphics() # run metadata merge metadata.run() if self.args['--interactive']: bibliography_classifier.run_prompt(True) # process any bibliography entries that are possible BibliographyDatabase(self.gv).run() # remove stranded titles and cleanup manipulate.final_clean() if self.args['--identifiers']: IdGenerator(self.gv).run() if self.args['--chain']: # construct and run an XSLT chainer XslChain(self.gv).run() if self.args['--clean']: ComplianceEnforcer(self.gv).run()
def run_modules(self): ag = int(self.gv.settings.args['--aggression']) self.debug.print_debug( self, u'Running at aggression level {0} {1}'.format( ag, "[grrr!]" if ag == 10 else "")) if ag > 10: self.debug.print_debug( self, "WARNING: safety bail-out features are disabled at aggression level 11" ) if self.args['bibscan']: BibliographyDatabase(self.gv).scan() else: # check for stylesheets self.gv.check_file_exists(self.gv.docx_style_sheet_dir) # metadata file gv.metadata_file = self.set_metadata_file() self.gv.mk_dir(self.gv.output_folder_path) if self.args['doc']: # run doc to docx conversion # then run docx to tei UnoconvToDocx(self.gv).run('doc') DocxToTei(self.gv).run(True, self.args['--proprietary']) elif self.args['odt']: # run odt to docx conversion # then run docx to tei UnoconvToDocx(self.gv).run('odt') DocxToTei(self.gv).run(True, self.args['--proprietary']) elif self.args['other']: # run other unoconv-supported format to docx conversion # then run docx to tei UnoconvToDocx(self.gv).run('unoconv') DocxToTei(self.gv).run(True, self.args['--proprietary']) elif self.args['docx']: # run docx to tei conversion # includes hooks for proprietary transforms if enabled DocxToTei(self.gv).run(True, self.args['--proprietary']) elif self.args['docxextracted']: self.debug.print_debug(self, u'Skipping docx extraction') DocxToTei(self.gv).run(False, self.args['--proprietary']) elif self.args['tei']: self.debug.print_debug( self, u'Skipping docx extraction; processing TEI file') DocxToTei(self.gv).run(False, self.args['--proprietary'], tei=True) if self.args['--puretei']: self.debug.print_debug(self, u'Exiting as TEI transform complete') return metadata = Metadata(self.gv) metadata.pre_clean() # run size classifier # aggression 5 SizeClassifier(self.gv).run() # run bibliographic addins handler # aggression 4 found_bibliography = BibliographyAddins(self.gv).run() # run list classifier # aggression 4 ListClassifier(self.gv).run() bibliography_classifier = BibliographyClassifier(self.gv) if not found_bibliography: # run bibliographic classifier # aggression 4 bibliography_classifier.run() # tei # aggression 3 TeiManipulate(self.gv).run() # run tei to nlm conversion TeiToNlm(self.gv).run(not found_bibliography) if self.gv.settings.args['--purenlm']: self.debug.print_debug(self, u'Exiting as NLM transform complete') return manipulate = NlmManipulate(self.gv) if not self.gv.used_list_method: manipulate.fuse_references() # run reference linker if not (self.args['--nolink']): rl = ReferenceLinker(self.gv) rl.run(self.args['--interactive']) rl.cleanup() # run table classifier cc = CaptionClassifier(self.gv) if int(self.args['--aggression']) > int( self.gv.settings.get_setting( 'tablecaptions', self, domain='aggression')): cc.run_tables() if int(self.args['--aggression']) > int( self.gv.settings.get_setting( 'graphiccaptions', self, domain='aggression')): cc.run_graphics() # run metadata merge metadata.run() if self.args['--interactive']: bibliography_classifier.run_prompt(True) # process any bibliography entries that are possible BibliographyDatabase(self.gv).run() # remove stranded titles and cleanup manipulate.final_clean() if self.args['--identifiers']: IdGenerator(self.gv).run() if self.args['--chain']: # construct and run an XSLT chainer XslChain(self.gv).run() if self.args['--clean']: ComplianceEnforcer(self.gv).run()