def eval_on_multi_file( test_corpus_dir, feature_map_character, feature_map_numeric, feature_show, tfidf_score_map, args ): exno = args.experiment_no stop = args.stop if args.liblinear == True: multipule_eval_for_liblinear(test_corpus_dir, feature_map_character, feature_map_numeric, feature_show, args) elif args.arow == True: multipule_eval_for_arow(test_corpus_dir, feature_map_character, feature_map_numeric, feature_show, args) elif args.logistic == True: multipule_eval_for_logistic(test_corpus_dir, feature_map_character, feature_map_numeric, feature_show, args) elif args.mulan == True: test_data_list_multi = [] # ------------------------------------------------------------ for test_file in load_files(test_corpus_dir): # ------------------------------------------------------------ if args.persian_test == True: tokens_stack, motif_stack = file_loader(test_file, stop) elif args.dutch_test == True: tokens_stack, motif_stack = file_loader_dutch(test_file, stop) # ------------------------------------------------------------ test_data_tuple = unify_stack(tokens_stack, motif_stack) test_data_list_multi.append(test_data_tuple) # ------------------------------------------------------------ call_mulan.out_mulan_file( test_data_list_multi, feature_map_character, feature_map_numeric, tfidf_score_map, args ) model_type = args.mulan_reduce_method arff_train = "../get_thompson_motif/classifier/mulan/exno{}.arff".format(exno) modelsavepath = "../get_thompson_motif/classifier/mulan/exno{}.model".format(exno) arff_test = "./arff_and_xml/test_{}.arff".format(args.experiment_no) xml_file = "./arff_and_xml/test_{}.xml".format(args.experiment_no) call_mulan.mulan_command(model_type, arff_train, xml_file, arff_test, modelsavepath, args.experiment_no)
def eval_on_single_file( test_corpus_dir, feature_map_numeric, feature_map_character, tfidf_score_map, classifier_path_list, args ): """ RETURN void """ stop = args.stop feature_show = args.feature_show tfidf_flag = args.tfidf exno = args.experiment_no # ------------------------------------------------------------ test_filepath = test_corpus_dir # ------------------------------------------------------------ if args.persian_test == True: tokens_stack, motif_stack = file_loader(test_filepath, stop) elif args.dutch_test == True: tokens_stack, motif_stack = file_loader_dutch(test_filepath, stop) test_data_list = unify_stack(tokens_stack, motif_stack) # ------------------------------------------------------------ if args.arow == True: # この関数はまだ未改修 out_libsvm_format(tokens_stack, feature_map_character, feature_map_numeric) result_map = eval_with_arow() elif args.liblinear == True: out_libsvm_format(tokens_stack, feature_map_character, feature_map_numeric, feature_show, tfidf_flag) result_map = call_liblinear.eval_with_liblinear(exno) elif args.mulan == True: call_mulan.out_mulan_file(test_data_list, feature_map_character, feature_map_numeric, tfidf_score_map, args) model_type = "RAkEL" arff_train = "../get_thompson_motif/classifier/mulan/exno{}.arff".format(exno) modelsavepath = "../get_thompson_motif/classifier/mulan/exno{}.model".format(exno) arff_test = "./arff_and_xml/test_{}.arff".format(args.experiment_no) xml_file = "./arff_and_xml/test_{}.xml".format(args.experiment_no) call_mulan.mulan_command( model_type, arff_train, xml_file, arff_test, modelsavepath, args.experiment_no, args.reduce_method ) sys.exit("Stil not implemented") else: # この関数はまだ未改修 test_matrix = construct_input_matrix(tokens_stack, feature_map_character, feature_map_numeric) result_map = predict_labels(test_matrix, classifier_path_list) # ------------------------------------------------------------ gold_map = {} list_gold_cap_result = [] for gold_motif in motif_stack: alphabet_label = gold_motif[0] gold_map[alphabet_label] = 1 gold_cap_result = 0 for result_label in result_map: if result_label in gold_map and result_map[result_label] == 1: gold_cap_result += 1 list_gold_cap_result.append(result_label) print "-" * 30 print "RESULT\nresult of classifiers:{}\ngold:{}\ncorrect estimation:{}\n".format( result_map, gold_map, list_gold_cap_result ) if 1 in result_map.values(): precision = float(gold_cap_result) / len([label for label in result_map.values() if label == 1]) else: precision = 0 recall = float(gold_cap_result) / len(gold_map) if not precision == 0 and not recall == 0: F = float(2 * precision * recall) / (precision + recall) else: F = 0 print "Precision:{}\nRecall:{}\nF:{}".format(precision, recall, F)