def get_features(model_file, output_file): """\ """ m = Model.load_from_file(model_file) labels = m.data_headers.get_attrib(m.class_attr).labels feats = m.vectorizer.get_feature_names() fh = codecs.open(output_file, 'w', 'UTF-8') for i, label in enumerate(labels): log_info('Enumerating features for label %d (\'%s\')' % (i, label)) coefs = m.classifier.coef_[i] nonzero = [(f, c) for (f, c) in zip(feats, coefs) if c != 0] print >> fh, 'LABEL == %s' % label for f, c in sorted(nonzero, key=itemgetter(1), reverse=True): print >> fh, '%s - %f' % (f, c) print >> fh, "\n\n" fh.close()
def test_models(file_in, file_out, model_files, source_attr, target_attr, oov_test_file, oov_part, pos_attr, test_indiv): """\ Test all the given models on the selected file and save the target. If oov_test_file is set, performs also OOV evaluation. If test_pos is True, prints detailed results for various POSs. """ # load testing data log_info('Loading data: ' + file_in) data = DataSet() data.load_from_arff(file_in) forms = data[source_attr] # apply all models for model_num, model_file in enumerate(model_files, start=1): model = Model.load_from_file(model_file) log_info('Applying model: ' + model_file) rules = model.classify(data) output_attr = 'OUTPUT_M' + str(model_num) data.add_attrib(Attribute(output_attr, 'string'), rules) if test_indiv: good = count_correct(data, model.class_attr, output_attr) print_score(good, len(data), 'Model accuracy') forms = [inflect(form, rule) for form, rule in zip(forms, rules)] forms_attr = 'FORMS_M' + str(model_num) data.add_attrib(Attribute(forms_attr, 'string'), forms) # test the final performance log_info('Evaluating...') good = count_correct(data, target_attr, forms_attr) print_score(good, len(data), 'ALL') # evaluate without punctuation evaluate_nopunct(data, source_attr, target_attr, forms_attr) # evaluate forms different from lemma evaluate_nolemma(data, source_attr, target_attr, forms_attr) # load training data for OOV tests, evaluate on OOV if oov_test_file: evaluate_oov(data, source_attr, target_attr, forms_attr, oov_test_file, oov_part) # test on different POSes if pos_attr: evaluate_poses(data, target_attr, forms_attr, pos_attr) # save the classification results log_info('Saving data: ' + file_out) data.save_to_arff(file_out)