# en_patterns_pretty = gentask.patterns_pretty( # 'en_patterns_pretty', en_patterns(), target_dir / 'en.patterns.json') # patterns_allline_task = gentask_pattern.pipeline_allline_task( # 'moviesub_en_patterns_allline', en_truecase()) filtered_patterns = gentask_pattern.filtered_patterns_from_sentences( 'moviesub_en_filtered_patterns', en()) # ch = gentask.slice_lines_grouped_by_n('ch', ench(), target_dir / 'ch.txt', # n=3, # s=1) ch_untok = gentask.untok('ch_untok', ch(), target_dir / 'ch.untok.txt') ch_toktag = gentask.zhtoktag('ch_toktag', ch_untok(), target_dir / 'ch.toktag.txt', tm=sbc4_tok_tag_tm(), lm=sbc4_tag_lm()) ch_tok = gentask.remove_slashtag('ch_tok', ch_toktag(), target_dir / 'ch.tok.txt') en_chtok = gentask.parallel_lines_merge('en_chtok', en(), ch_tok(), target_dir / 'en_chtok.txt') # giza_task = gentask_giza.giza(inputf=str(target_dir / 'en_chtok.txt'), # outputd=str(target_dir / 'giza/')) unpack_singleline_patterns = gentask.localtarget_task( target_dir / 'en.gt.hiih.patterns.pretty.unpack_singleline.json') phrasetable = gentask.localtarget_task(target_dir / 'phrase-table.10000.gz')