enrichment_done = False for lang in ef.langs: filtered_f = ef.get_filtered_file(lang) enriched_f = ef.get_enriched_file(lang) if not os.path.exists(enriched_f): enrichment_done = True if USE_CONDOR: model_prefix, name = ef.get_condor_enrich(lang) run_cmd([p3path, intent_script, 'enrich', '--align', 'heur,heurpos,giza,gizaheur', '--pos class', '--parse trans', filtered_f, enriched_f], model_prefix, name, False) else: enrich(**{ARG_INFILE:filtered_f, ARG_OUTFILE:enriched_f, ALN_VAR:ARG_ALN_METHODS, POS_VAR:ARG_POS_CLASS, PARSE_VAR:ARG_PARSE_TRANS}) if USE_CONDOR and enrichment_done: condor_wait_notify("Data has been enriched.", email_address, "CONDOR: Enrichment Complete.") # ------------------------------------------- # 3) Re-project the data... # ------------------------------------------- projection_done = False for lang in ef.langs: for aln_method in aln_methods: enriched_f = ef.get_enriched_file(lang) projected_f = ef.get_projected_file(aln_method, lang) if not os.path.exists(projected_f): projection_done = True
def test_inst_1(self): kwargs = {ARG_INFILE:dep_path, ARG_OUTFILE:'/dev/null', ALN_VAR:[ARG_ALN_HEUR], PARSE_VAR:[ARG_PARSE_PROJ, ARG_PARSE_TRANS]} self.assertIsNone(enrich(**kwargs))
def test_ger(self): enrich(IN_FILE=ger_file, **no_enrich_args)
except PathArgException as pae: # If we get some kind of invalid file in the arguments, print it and exit. MAIN_LOG.critical(str(pae)) # sys.stderr.write(str(pae)+'\n') sys.exit(2) # Decide on action based on subcommand and args. ------------------------------- # =============================================================================== # Set verbosity level # =============================================================================== logging.getLogger().setLevel(logging.WARNING - 10 * (min(args.verbose, 2))) # ENRICH if args.subcommand == CMD_ENRICH: enrich(**vars(args)) # STATS elif args.subcommand == CMD_STATS: igt_stats(flatten_list(args.FILE), type='xigt', show_filename=True) # SPLIT elif args.subcommand == CMD_SPLIT: split_corpus(flatten_list(args.FILE), args.train, args.dev, args.test, prefix=args.prefix, overwrite=args.overwrite, nfold=args.nfold) # FILTER elif args.subcommand == CMD_FILTER: filter_corpus(flatten_list(getattr(args, ARG_INFILE)), getattr(args, ARG_OUTFILE), **vars(args)) # EXTRACT
def test_ctn(self): enrich(IN_FILE=ctn_file, **no_enrich_args)
def test_kor(self): enrich(IN_FILE=kor_file, **no_enrich_args)
def test_814(self): d = all_enrich_args.copy() d['IN_FILE'] = os.path.join(testfile_dir, 'xigt/814.xml') self.assertIsNone(enrich(**d))
def test_911(self): d = all_enrich_args.copy() d['IN_FILE'] = os.path.join(testfile_dir, 'xigt/multiple_line_tests.xml') self.assertIsNone(enrich(**d))
def harness(self, name): d = all_enrich_args.copy() d['IN_FILE'] = os.path.join(testfile_dir, name) self.assertIsNone(enrich(**d))
def test_hanging(self): xp = xigt_testfile('hang_test.xml') def_enrich_args[ARG_INFILE] = xp enrich(**def_enrich_args)
def test_encoding(self): xp = xigt_testfile('encoding-error-test.xml') def_enrich_args[ARG_INFILE] = xp enrich(**def_enrich_args)