Пример #1
0
enrichment_done = False
for lang in ef.langs:
    filtered_f = ef.get_filtered_file(lang)
    enriched_f = ef.get_enriched_file(lang)

    if not os.path.exists(enriched_f):
        enrichment_done = True
        if USE_CONDOR:
            model_prefix, name = ef.get_condor_enrich(lang)
            run_cmd([p3path, intent_script, 'enrich',
                     '--align', 'heur,heurpos,giza,gizaheur',
                     '--pos class', '--parse trans',
                     filtered_f, enriched_f],
                    model_prefix, name, False)
        else:
            enrich(**{ARG_INFILE:filtered_f, ARG_OUTFILE:enriched_f, ALN_VAR:ARG_ALN_METHODS, POS_VAR:ARG_POS_CLASS, PARSE_VAR:ARG_PARSE_TRANS})

if USE_CONDOR and enrichment_done:
    condor_wait_notify("Data has been enriched.", email_address, "CONDOR: Enrichment Complete.")

# -------------------------------------------
# 3) Re-project the data...
# -------------------------------------------
projection_done = False
for lang in ef.langs:
    for aln_method in aln_methods:
        enriched_f  = ef.get_enriched_file(lang)
        projected_f = ef.get_projected_file(aln_method, lang)

        if not os.path.exists(projected_f):
            projection_done = True
Пример #2
0
 def test_inst_1(self):
     kwargs = {ARG_INFILE:dep_path,
               ARG_OUTFILE:'/dev/null',
               ALN_VAR:[ARG_ALN_HEUR],
               PARSE_VAR:[ARG_PARSE_PROJ, ARG_PARSE_TRANS]}
     self.assertIsNone(enrich(**kwargs))
Пример #3
0
 def test_ger(self):
     enrich(IN_FILE=ger_file, **no_enrich_args)
Пример #4
0
except PathArgException as pae:  # If we get some kind of invalid file in the arguments, print it and exit.
    MAIN_LOG.critical(str(pae))
    # sys.stderr.write(str(pae)+'\n')
    sys.exit(2)

# Decide on action based on subcommand and args. -------------------------------

# ===============================================================================
# Set verbosity level
# ===============================================================================

logging.getLogger().setLevel(logging.WARNING - 10 * (min(args.verbose, 2)))

# ENRICH
if args.subcommand == CMD_ENRICH:
    enrich(**vars(args))

# STATS
elif args.subcommand == CMD_STATS:
    igt_stats(flatten_list(args.FILE), type='xigt', show_filename=True)

# SPLIT
elif args.subcommand == CMD_SPLIT:
    split_corpus(flatten_list(args.FILE), args.train, args.dev, args.test, prefix=args.prefix, overwrite=args.overwrite,
                 nfold=args.nfold)

# FILTER
elif args.subcommand == CMD_FILTER:
    filter_corpus(flatten_list(getattr(args, ARG_INFILE)), getattr(args, ARG_OUTFILE), **vars(args))

# EXTRACT
Пример #5
0
 def test_ctn(self):
     enrich(IN_FILE=ctn_file, **no_enrich_args)
Пример #6
0
 def test_kor(self):
     enrich(IN_FILE=kor_file, **no_enrich_args)
Пример #7
0
 def test_814(self):
     d = all_enrich_args.copy()
     d['IN_FILE'] = os.path.join(testfile_dir, 'xigt/814.xml')
     self.assertIsNone(enrich(**d))
Пример #8
0
 def test_911(self):
     d = all_enrich_args.copy()
     d['IN_FILE'] = os.path.join(testfile_dir, 'xigt/multiple_line_tests.xml')
     self.assertIsNone(enrich(**d))
Пример #9
0
 def harness(self, name):
     d = all_enrich_args.copy()
     d['IN_FILE'] = os.path.join(testfile_dir, name)
     self.assertIsNone(enrich(**d))
Пример #10
0
 def test_hanging(self):
     xp = xigt_testfile('hang_test.xml')
     def_enrich_args[ARG_INFILE] = xp
     enrich(**def_enrich_args)
Пример #11
0
 def test_encoding(self):
     xp = xigt_testfile('encoding-error-test.xml')
     def_enrich_args[ARG_INFILE] = xp
     enrich(**def_enrich_args)