def main(training_data, test_data, output_file): if test_data: training_features = get_features(training_data) test_features = get_features(test_data) write_f("train.tab", training_features) write_f("test.tab", test_features) results = tweaked_on_testdata("train.tab", "test.tab") classification = classify_results(results, 0.5) print "witing output" write(classification, output_file) else: training_features = get_features(training_data) write_f("train.tab", training_features) results = tweaked("train.tab") # cross-validation print "classifying" classification = classify_results(results, 0.5) print "writing output" write(classification, output_file) print "Accuracy = %.4f" % evaluate(training_data, output_file)
def main(tree, output, method, threshold, find_best, n=4, idf_enabled=False): #load xml and idf if method in ["word", "lemma", "bleu"]: print "Loading xmlfile" tree = (load_xml.get_pairs(tree), tree) print "done." if idf_enabled: generate_idf_score(tree[0]) elif method in ["print_ted", "ted"]: print "Loading xmlfile" tree = (create_tree.generate_syntax_tree(tree), tree) print "done." if idf_enabled: generate_idf_score(load_xml.get_pairs(tree[1])) elif method in ["features"]: features = get_features(tree, idf_enabled) write_features(output, features) return elif method in ["knn", "knn-xv"]: tree = (tree, tree) #run methods if find_best: find_best_threshold(tree[0], METHODS[method], tree[1], output, n=n, idf_enabled=idf_enabled) else: if method in ["knn", "knn-xv"]: features = get_features(tree[0], idf_enabled=idf_enabled) write_features("features.tab", features) results = METHODS[method](None, outfile="features.tab") else: results = METHODS[method](tree[0], n=n, idf_enabled=idf_enabled, output=output) if method == "print_ted": return classification = classify_results(results, threshold) print "writing output" write(classification, output) print "Accuracy = %.4f" % evaluate(tree[1], output)