def test_models(file_in, file_out, model_files, source_attr, target_attr, oov_test_file, oov_part, pos_attr, test_indiv): """\ Test all the given models on the selected file and save the target. If oov_test_file is set, performs also OOV evaluation. If test_pos is True, prints detailed results for various POSs. """ # load testing data log_info('Loading data: ' + file_in) data = DataSet() data.load_from_arff(file_in) forms = data[source_attr] # apply all models for model_num, model_file in enumerate(model_files, start=1): model = Model.load_from_file(model_file) log_info('Applying model: ' + model_file) rules = model.classify(data) output_attr = 'OUTPUT_M' + str(model_num) data.add_attrib(Attribute(output_attr, 'string'), rules) if test_indiv: good = count_correct(data, model.class_attr, output_attr) print_score(good, len(data), 'Model accuracy') forms = [inflect(form, rule) for form, rule in zip(forms, rules)] forms_attr = 'FORMS_M' + str(model_num) data.add_attrib(Attribute(forms_attr, 'string'), forms) # test the final performance log_info('Evaluating...') good = count_correct(data, target_attr, forms_attr) print_score(good, len(data), 'ALL') # evaluate without punctuation evaluate_nopunct(data, source_attr, target_attr, forms_attr) # evaluate forms different from lemma evaluate_nolemma(data, source_attr, target_attr, forms_attr) # load training data for OOV tests, evaluate on OOV if oov_test_file: evaluate_oov(data, source_attr, target_attr, forms_attr, oov_test_file, oov_part) # test on different POSes if pos_attr: evaluate_poses(data, target_attr, forms_attr, pos_attr) # save the classification results log_info('Saving data: ' + file_out) data.save_to_arff(file_out)
def main(): """\ Main application entry: parse command line and run the test. """ opts, filenames = getopt.getopt(sys.argv[1:], 'g:p:ai') show_help = False annot_errors = False gold = None predicted = 'PREDICTED' ignore_case = False for opt, arg in opts: if opt == '-g': gold = arg elif opt == '-p': predicted = arg elif opt == '-a': annot_errors = True elif opt == '-i': ignore_case = True # display help and exit if len(filenames) != 2 or not gold or show_help: display_usage() sys.exit(1) # run the training filename_in, filename_out = filenames data = DataSet() log_info('Loading data: ' + filename_in) data.load_from_arff(filename_in) if ignore_case: cmp_func = lambda a, b: a.lower() != b.lower() else: cmp_func = lambda a, b: a != b if annot_errors: log_info('Annotating errors...') err_ind = [ 'ERR' if cmp_func(i[gold], i[predicted]) else '' for i in data ] data.add_attrib(Attribute('ERROR_IND', 'string'), err_ind) else: log_info('Selecting errors...') data = data[lambda _, i: cmp_func(i[gold], i[predicted])] log_info('Saving data: ' + filename_out) data.save_to_arff(filename_out)
def main(): """\ Main application entry: parse command line and run the test. """ opts, filenames = getopt.getopt(sys.argv[1:], "g:p:ai") show_help = False annot_errors = False gold = None predicted = "PREDICTED" ignore_case = False for opt, arg in opts: if opt == "-g": gold = arg elif opt == "-p": predicted = arg elif opt == "-a": annot_errors = True elif opt == "-i": ignore_case = True # display help and exit if len(filenames) != 2 or not gold or show_help: display_usage() sys.exit(1) # run the training filename_in, filename_out = filenames data = DataSet() log_info("Loading data: " + filename_in) data.load_from_arff(filename_in) if ignore_case: cmp_func = lambda a, b: a.lower() != b.lower() else: cmp_func = lambda a, b: a != b if annot_errors: log_info("Annotating errors...") err_ind = ["ERR" if cmp_func(i[gold], i[predicted]) else "" for i in data] data.add_attrib(Attribute("ERROR_IND", "string"), err_ind) else: log_info("Selecting errors...") data = data[lambda _, i: cmp_func(i[gold], i[predicted])] log_info("Saving data: " + filename_out) data.save_to_arff(filename_out)