def get_entity_scores(train, dev=None, test=None, weights=None, embeddings=None, avg_embeddings=None): # First do the normal thing to make sure our cache is reasonable train_X, train_Y, _ = build_feature_array(train, avg_embeddings) if test is not None: test_X, test_Y, test_words_Y, sents = build_feature_array( test, embeddings, get_sents=True) logistic_regression(train_X, train_Y, test_X, test_Y, weights, do_print=True, return_preds=True, is_token=True, test_words_Y=test_words_Y) # We set is_token to false so that we skip the aggregation -- we don't need it preds = logistic_regression(train_X, train_Y, embeddings.m, embeddings.iw, weights, do_print=False, return_preds=True, is_token=False) # Out default settings +1 to scores since we use them as idxs. -1 (though it doesn't really matter, since we're always comparing, not # really caring about absolute scores) return [p - 1 for p in preds]
def type_to_token_eval(headers, embeddings, avg_embeddings): print( "What's the accuracy of using the learned type-level predictions on token-level test data?" ) for h, h2 in headers: train, test, dev = load_hannah_split(config.CONNO_DIR, h, binarize=True, remove_neutral=False, plus_one=True) sent_to_score = load_raw_annotations(config.RAW_CONNOTATIONS, h2, binarize=True, plus_one=True) print(h) weights = raw_header_to_weights[h] train_X, train_Y, _ = build_feature_array(train, avg_embeddings) test_X, test_Y, test_words = build_feature_array(test, avg_embeddings) preds = logistic_regression(train_X, train_Y, test_X, test_Y, weights=weights, do_print=False, is_token=False, test_words_Y=None, return_preds=True) word_to_pred = {} for w, s in zip(test_words, preds): word_to_pred[w] = s print(len(word_to_pred), len(test)) type_level = [] sent_level = [] for key, score in sent_to_score.items(): if key[0] in word_to_pred: type_level.append(word_to_pred[key[0]]) sent_level.append(score) print("Macro F1", f1_score(sent_level, type_level, average='macro')) print("Accuracy", accuracy_score(sent_level, type_level))
def do_normal_regression(headers, avg_embeddings, weights=None): for h, h2 in headers: print(h) train, test, dev = load_hannah_split(config.CONNO_DIR, h, binarize=True, remove_neutral=False) if weights is not None: print("Running normal type-level regression") train_X, train_Y, _ = build_feature_array(train, avg_embeddings) test_X, test_Y, _ = build_feature_array(test, avg_embeddings) logistic_regression(train_X, train_Y, test_X, test_Y, weights=weights[h], do_print=True, is_token=False, test_words_Y=None, return_preds=True) else: do_logistic_regression(train, dev, test, avg_embeddings)
def avg_token_eval(headers, embeddings, avg_embeddings, sent_to_key, weights=None): print( "What's the accuracy of using type-level training and token-level test? i.e. what we did in the paper" ) for h, h2 in headers: train, test, dev = load_hannah_split(config.CONNO_DIR, h, binarize=True, remove_neutral=False, plus_one=True) sent_to_score = load_raw_annotations(config.RAW_CONNOTATIONS, h2, binarize=True, plus_one=True) print(h) train_X, train_Y, _ = build_feature_array(train, avg_embeddings) test_X, test_Y = build_sent_array(test, embeddings, sent_to_score, sent_to_key) if weights is not None: print(weights) logistic_regression(train_X, train_Y, test_X, test_Y, weights[h], do_print=True) else: dev_X, dev_Y = build_sent_array(dev, embeddings, sent_to_score, sent_to_key) score, new_weights = find_logistic_regression_weights( train_X, train_Y, dev_X, dev_Y) print("Running logistic regression with weights", new_weights, "Dev F1:", score) logistic_regression(train_X, train_Y, test_X, test_Y, new_weights, do_print=True)
def score_keyed_embeddings(key_to_embeds, key_to_signs, m, avg_embeddings): train = load_power_all(cfg.POWER_AGENCY) train_X, train_Y, _ = build_feature_array(train, avg_embeddings) preds = logistic_regression(train_X, train_Y, m, {}, weights.power_token_regression, do_print=False, return_preds=True, is_token=False) ent_to_score = {} for key, idxs in key_to_embeds.items(): signs = key_to_signs[key] scores = [preds[i] for i in idxs] sum_score = sum([p * s for p, s in zip(scores, signs)]) ent_to_score[key] = sum_score / len(scores) return ent_to_score