Пример #1
0
def folds_static(in_corpus, out_corpus, params, condition_name, n_folds):
    maF1s = defaultdict(list)
    miF1s = defaultdict(list)
    folds = list(get_static_folds())
    predictions = defaultdict(dict)
    decisions = defaultdict(dict)

    for train_tids, test_tids, i in folds[:n_folds]:
        print(f"Iteration: {i}")
        ensemble_basename = condition_name.split('|')[0]
        ensemble_name = "{}__{}__{}".format(
            ensemble_basename, hash_of_featureset(params['feature_set']), i)
        clf = EvidenceGraphClassifier(feature_function_segments,
                                      feature_function_segmentpairs, **params)
        train_txt = [g for t, g in in_corpus.items() if t in train_tids]
        train_arg = [g for t, g in out_corpus.items() if t in train_tids]
        try:
            # load ensemble of pretrained base classifiers
            clf.ensemble = load(modelpath + ensemble_name)
            if params['optimize_weighting']:
                # and train metaclassifier (if desired)
                clf.train_metaclassifier(train_txt, train_arg)
        except RuntimeError:
            # train ensemble
            clf.train(train_txt, train_arg)
            save(clf.ensemble, modelpath + ensemble_name)

        # test
        test_txt = [g for t, g in in_corpus.items() if t in test_tids]
        test_arg = [g for t, g in out_corpus.items() if t in test_tids]
        score_msg = ''
        for level, base_classifier in clf.ensemble.items():
            maF1, miF1 = base_classifier.test(test_txt, test_arg)
            maF1s[level].append(maF1)
            miF1s[level].append(miF1)
            score_msg += "{}: {:.3f}\t".format(level, maF1)
        decoded_scores = []
        for t in test_tids:
            mst = clf.predict(in_corpus[t])
            decoded_scores.append(clf.score(mst, out_corpus[t]))
            predictions[i][t] = mst.get_triples()
            decisions[i][t] = clf.predict_decisions(in_corpus[t])
        score_msg += "decoded: {:.3f}\t".format(mean(decoded_scores))
        print(score_msg)

    print("Average macro and micro F1:")
    for level in maF1s:
        avg_maF1 = mean(maF1s[level])
        avg_miF1 = mean(miF1s[level])
        print(level, avg_maF1, avg_miF1)

    return predictions, decisions
Пример #2
0
def init_lang_classifier(model_path):
  
  init_language('de')

  features = [
        'default', 'bow', 'bow_2gram', 'first_three',
        'tags', 'deps_lemma', 'deps_tag',
        'punct', 'verb_main', 'verb_all', 'discourse_marker',
        'context', 'clusters', 'clusters_2gram', 'discourse_relation',
        'vector_left_right', 'vector_source_target',
        'verb_segment', 'same_sentence', 'matrix_clause'
    ]
  
  PARAMS = {'feature_set': features,
            'relation_set': FULL_RELATION_SET_ADU,
            'optimize': False,
            'optimize_weighting': False}
  
  clf = EvidenceGraphClassifier(feature_function_segments, feature_function_segmentpairs, **PARAMS)
  clf.ensemble = eg_utils.load(model_path)
  
  return clf
Пример #3
0
        'same_sentence', 'matrix_clause'
    ]

    params = {
        'feature_set': features,
        'relation_set': FULL_RELATION_SET_ADU,
        'optimize': False,
        'optimize_weighting': False
    }

    logger.info("Loading pre-trained model")

    clf = EvidenceGraphClassifier(feature_function_segments,
                                  feature_function_segmentpairs, **params)

    clf.ensemble = eg_utils.load(model_path)

    logger.info("Predicting on test case")

    mst = clf.predict(segments)

    roles = mst.get_ro_vector()

    triples = mst.get_triples()

    role_map = {1: 'opp', 0: 'pro'}

    seg_map = {
        "support": "sup",
        "rebut": "reb",
        "undercut": "und",