Пример #1
0
def cross_validate_inner(i):
    corpus_dir, algo_dir, morph_analysis_dir, N_func, error_dir, num_iters, corpus_files, splits, algo_name = _NAIVE_CV_GLOBALS

    remove_directory_content(algo_dir)
    print "Starting {0} fold".format( i )
    train_fold_corpus_files = flatten(splits[j] for j in range(num_iters) if i != j)
    test_corpus_files = flatten(splits[j] for j in range(num_iters) if i == j)

    morph_analysis_files = [ os.path.join( morph_analysis_dir, os.path.basename( test_file ) ) for test_file in test_corpus_files if os.path.exists( os.path.join( morph_analysis_dir, os.path.basename( test_file ) ) )]
    algo = None
    if algo_name == ALGONAMES.BASELINE:
        algo = NaiveAlgorithm(N_func=N_func)
        algo.train_from_filelist( train_fold_corpus_files )
    elif algo_name == ALGONAMES.HMM:
        algo = HMMAlgorithm(N_filter_func=N_func)
        algo.train_model_from_filelist(corpus_files =  train_fold_corpus_files )
    elif algo_name == ALGONAMES.MEMM:
        algo = MMEMAlgorithm(N_filter_func=N_func)
        algo.train_model_file_list(corpus_filelist =  train_fold_corpus_files, ambiguity_dir = morph_analysis_dir )
    if algo is None:
        raise Exception("Not supported algorithm {0}".format( algo_name ))

    print "Finished training. Starting testing phase!"
    remove_ambiguity_file_list(ambig_filelist=morph_analysis_files, output_dir= algo_dir, algo = algo )
    print "Finished working of algo. Starting measuring phase"
    total_correct_known, total_correct_unknown, total_known, total_unknown, upper_bound_known,upper_bound_unknown  = calculate_dir_precision( algo_dir = algo_dir, ambi_dir= morph_analysis_dir, gold_dir =  corpus_dir, M = M_strict_mathcher, N =  N_func, P = P_no_garbage,
        errors_context_filename = os.path.join(error_dir, "{1}_errors_context_{0}_{2}.txt".format( i , algo_name, get_tag_set_by_func( N_func ) ) ) )

    return (total_correct_known, total_correct_unknown, total_known, total_unknown, upper_bound_known,upper_bound_unknown )
Пример #2
0
def bull(action, experiment_name, experiment_path, fold, gold = None, morph_analysis = None):
    experiment_dir = os.path.join( experiment_path, experiment_name  )


    sys.path.append( os.path.abspath(experiment_path) )
    mod_run_py = '.'.join( [experiment_name, RUNPY_FILENAME ] )

    try:
        # Import the module
        mod = __import__(mod_run_py, globals(), locals(), ['*'])
    except ImportError:
        # Log error
        print >>sys.stderr, "Failed to run.py module from experiment {0}. {1}".format(experiment_name,traceback.format_exc())
    print "Loaded run.py for experiment {0}".format( experiment_name )

    params = {
        'action' : action,
        'experiment_name' : experiment_name,
        'experiment_dir' : experiment_dir,
        'fold' : fold
    }
    if action == 'train':
        fold_dir = os.path.join(experiment_dir, FOLDS_DIRNAME, fold )

        train_files_source_dir = os.path.join(experiment_dir, TRAIN_DIR)
        fold_train_file = os.path.join( fold_dir, TRAIN_FILENAME )

        params['train_file_list'] = [os.path.abspath(os.path.join(train_files_source_dir,line.strip())) for line in open(fold_train_file, 'r').readlines()]
        if morph_analysis:
            params['ambiguity_dir'] = morph_analysis

        params['action'] = 'train'
        mod.runner( **params )

    elif action == 'test':
        fold_dir = os.path.join(experiment_dir, FOLDS_DIRNAME, fold )

        test_files_source_dir = os.path.join(experiment_dir, TEST_DIR)
        fold_test_file = os.path.join( fold_dir, TEST_FILENAME )
        test_files= [os.path.abspath(os.path.join(test_files_source_dir,line.strip())) for line in open(fold_test_file, 'r').readlines()]

        result_dir =  os.path.join( fold_dir, 'test_result' )
        create_dir( result_dir )


        params['action'] = 'load_model'
        algo = mod.runner( **params )
        remove_directory_content( result_dir )
        remove_ambiguity_file_list( ambig_filelist = test_files, output_dir=result_dir, algo=algo)
        print "Finished removing ambiguity for experiment {0} for fold {1}".format(experiment_name, fold )
    elif action == 'eval':
        #результат работы алгоритма в конкретном фолде
        eval_funcs = mod.runner( **params )
	
	fold_dir = os.path.join(experiment_dir, 'folds', fold )

        errors_filename =  os.path.join( fold_dir, 'errors.txt' )
        algo_dir = os.path.join( fold_dir, 'test_result' )

        return calculate_dir_precision( algo_dir = algo_dir, gold_dir = gold,ambi_dir=morph_analysis, M=eval_funcs['M'],
        N=eval_funcs['N'], P = eval_funcs['P'], errors_context_filename=errors_filename)