def folds_static(in_corpus, out_corpus, params, condition_name, n_folds): maF1s = defaultdict(list) miF1s = defaultdict(list) folds = list(get_static_folds()) predictions = defaultdict(dict) decisions = defaultdict(dict) for train_tids, test_tids, i in folds[:n_folds]: print(f"Iteration: {i}") ensemble_basename = condition_name.split('|')[0] ensemble_name = "{}__{}__{}".format( ensemble_basename, hash_of_featureset(params['feature_set']), i) clf = EvidenceGraphClassifier(feature_function_segments, feature_function_segmentpairs, **params) train_txt = [g for t, g in in_corpus.items() if t in train_tids] train_arg = [g for t, g in out_corpus.items() if t in train_tids] try: # load ensemble of pretrained base classifiers clf.ensemble = load(modelpath + ensemble_name) if params['optimize_weighting']: # and train metaclassifier (if desired) clf.train_metaclassifier(train_txt, train_arg) except RuntimeError: # train ensemble clf.train(train_txt, train_arg) save(clf.ensemble, modelpath + ensemble_name) # test test_txt = [g for t, g in in_corpus.items() if t in test_tids] test_arg = [g for t, g in out_corpus.items() if t in test_tids] score_msg = '' for level, base_classifier in clf.ensemble.items(): maF1, miF1 = base_classifier.test(test_txt, test_arg) maF1s[level].append(maF1) miF1s[level].append(miF1) score_msg += "{}: {:.3f}\t".format(level, maF1) decoded_scores = [] for t in test_tids: mst = clf.predict(in_corpus[t]) decoded_scores.append(clf.score(mst, out_corpus[t])) predictions[i][t] = mst.get_triples() decisions[i][t] = clf.predict_decisions(in_corpus[t]) score_msg += "decoded: {:.3f}\t".format(mean(decoded_scores)) print(score_msg) print("Average macro and micro F1:") for level in maF1s: avg_maF1 = mean(maF1s[level]) avg_miF1 = mean(miF1s[level]) print(level, avg_maF1, avg_miF1) return predictions, decisions
def init_lang_classifier(model_path): init_language('de') features = [ 'default', 'bow', 'bow_2gram', 'first_three', 'tags', 'deps_lemma', 'deps_tag', 'punct', 'verb_main', 'verb_all', 'discourse_marker', 'context', 'clusters', 'clusters_2gram', 'discourse_relation', 'vector_left_right', 'vector_source_target', 'verb_segment', 'same_sentence', 'matrix_clause' ] PARAMS = {'feature_set': features, 'relation_set': FULL_RELATION_SET_ADU, 'optimize': False, 'optimize_weighting': False} clf = EvidenceGraphClassifier(feature_function_segments, feature_function_segmentpairs, **PARAMS) clf.ensemble = eg_utils.load(model_path) return clf
'same_sentence', 'matrix_clause' ] params = { 'feature_set': features, 'relation_set': FULL_RELATION_SET_ADU, 'optimize': False, 'optimize_weighting': False } logger.info("Loading pre-trained model") clf = EvidenceGraphClassifier(feature_function_segments, feature_function_segmentpairs, **params) clf.ensemble = eg_utils.load(model_path) logger.info("Predicting on test case") mst = clf.predict(segments) roles = mst.get_ro_vector() triples = mst.get_triples() role_map = {1: 'opp', 0: 'pro'} seg_map = { "support": "sup", "rebut": "reb", "undercut": "und",