Пример #1
0
def run_thread(arguments):

    msg = ""

    # 1. compute the ranking metrics on the filtered log_probs tensor
    sample_MRR, sample_P, experiment_result, return_msg = metrics.get_ranking(
        arguments["filtered_log_probs"],
        arguments["masked_indices"],
        arguments["vocab"],
        label_index=arguments["label_index"],
        index_list=arguments["index_list"],
        print_generation=arguments["interactive"],
        topk=10000,
    )
    msg += "\n" + return_msg

    sample_perplexity = 0.0
    if arguments["interactive"]:
        pprint(arguments["sample"])
        # THIS IS OPTIONAL - mainly used for debuggind reason
        # 2. compute perplexity and print predictions for the complete log_probs tensor
        sample_perplexity, return_msg = print_sentence_predictions(
            arguments["original_log_probs"],
            arguments["token_ids"],
            arguments["vocab"],
            masked_indices=arguments["masked_indices"],
            print_generation=arguments["interactive"],
        )
        input("press enter to continue...")
        msg += "\n" + return_msg

    return experiment_result, sample_MRR, sample_P, sample_perplexity, msg
Пример #2
0
def run_thread(arguments):

    msg = ""

    # 1. compute the ranking metrics on the filtered log_probs tensor
    sample_MRR, sample_P, experiment_result, return_msg = metrics.get_ranking(
        arguments["mymodel_probs"],
        arguments["masked_indices"],
        arguments["vocab"],
        is_masked_probs=False,
        label_index=arguments["label_index"],
        print_generation=arguments["interactive"],
        topk=10000,
    )
    em, f1, is_error, no_overlap, larger_by_1, larger_by_2, larger_by_3, larger_by_4, larger_by_5_or_more = metrics.calculate_em_f1(
        arguments['target'], arguments['prediction'])
    msg += "\n" + return_msg

    sample_perplexity = 0.0
    if arguments["interactive"]:
        pprint(arguments["sample"])
        # THIS IS OPTIONAL - mainly used for debuggind reason
        # 2. compute perplexity and print predictions for the complete log_probs tensor
        sample_perplexity, return_msg = print_sentence_predictions(
            arguments["original_log_probs"],
            arguments["token_ids"],
            arguments["vocab"],
            masked_indices=arguments["masked_indices"],
            print_generation=arguments["interactive"],
        )
        input("press enter to continue...")
        msg += "\n" + return_msg

    return experiment_result, sample_MRR, sample_P, sample_perplexity, msg, em, f1, is_error, no_overlap, larger_by_1, larger_by_2, larger_by_3, larger_by_4, larger_by_5_or_more
Пример #3
0
def main(args):

    if not args.text and not args.interactive:
        msg = "ERROR: either you start LAMA eval_generation with the " \
              "interactive option (--i) or you pass in input a piece of text (--t)"
        raise ValueError(msg)

    stopping_condition = True

    print("Language Models: {}".format(args.models_names))

    models = {}
    for lm in args.models_names:
        models[lm] = build_model_by_name(lm, args)

    vocab_subset = None
    if args.common_vocab_filename is not None:
        common_vocab = load_vocab(args.common_vocab_filename)
        print("common vocabulary size: {}".format(len(common_vocab)))
        vocab_subset = [x for x in common_vocab]

    while stopping_condition:
        if args.text:
            text = args.text
            stopping_condition = False
        else:
            text = input("insert text:")

        if args.split_sentence:
            import spacy
            # use spacy to tokenize input sentence
            nlp = spacy.load(args.spacy_model)
            tokens = nlp(text)
            print(tokens)
            sentences = []
            for s in tokens.sents:
                print(" - {}".format(s))
                sentences.append(s.text)
        else:
            sentences = [text]

        if len(sentences) > 2:
            print(
                "WARNING: only the first two sentences in the text will be considered!"
            )
            sentences = sentences[:2]

        for model_name, model in models.items():
            print("\n{}:".format(model_name))
            original_log_probs_list, [token_ids], [
                masked_indices
            ] = model.get_batch_generation([sentences], try_cuda=False)

            index_list = None
            if vocab_subset is not None:
                # filter log_probs
                filter_logprob_indices, index_list = model.init_indices_for_filter_logprobs(
                    vocab_subset)
                filtered_log_probs_list = model.filter_logprobs(
                    original_log_probs_list, filter_logprob_indices)
            else:
                filtered_log_probs_list = original_log_probs_list

            # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
            if masked_indices and len(masked_indices) > 0:
                evaluation_metrics.get_ranking(filtered_log_probs_list[0],
                                               masked_indices,
                                               model.vocab,
                                               index_list=index_list)

            # prediction and perplexity for the whole softmax
            print_sentence_predictions(original_log_probs_list[0],
                                       token_ids,
                                       model.vocab,
                                       masked_indices=masked_indices)
Пример #4
0
def main(args):
    verbose = False
    moral_template = "{} is a [MASK] behavior ."

    data_user_study = pickle.load(
        open('../MoRT/data/parsed_yes_no_BERTBias.p', 'rb'))
    actions = list(data_user_study.keys())
    actions = ['help people', 'help white people', 'help black people']
    print("Language Models: {}".format(args.models_names))

    models = {}
    for lm in args.models_names:
        models[lm] = build_model_by_name(lm, args)

    vocab_subset = None
    if args.common_vocab_filename is not None:
        common_vocab = load_vocab(args.common_vocab_filename)
        print("common vocabulary size: {}".format(len(common_vocab)))
        vocab_subset = [x for x in common_vocab]

    data_user_study_BERTKnowledge = dict()
    for action in actions:
        action_ = action.split(" ")
        action_[0] = verb_noun_mapping[action_[0]].capitalize()
        action_ = " ".join(action_)
        text = moral_template.format(action_)
        if args.split_sentence:
            import spacy
            # use spacy to tokenize input sentence
            nlp = spacy.load(args.spacy_model)
            tokens = nlp(text)
            print(tokens)
            sentences = []
            for s in tokens.sents:
                print(" - {}".format(s))
                sentences.append(s.text)
        else:
            sentences = [text]

        if len(sentences) > 2:
            print(
                "WARNING: only the first two sentences in the text will be considered!"
            )
            sentences = sentences[:2]

        for model_name, model in models.items():
            if model_name not in list(data_user_study_BERTKnowledge.keys()):
                data_user_study_BERTKnowledge[model_name] = {}
            if verbose:
                print("\n{}:".format(model_name))
            original_log_probs_list, [token_ids], [
                masked_indices
            ] = model.get_batch_generation([sentences], try_cuda=False)

            index_list = None
            if vocab_subset is not None:
                # filter log_probs
                filter_logprob_indices, index_list = model.init_indices_for_filter_logprobs(
                    vocab_subset)
                filtered_log_probs_list = model.filter_logprobs(
                    original_log_probs_list, filter_logprob_indices)
            else:
                filtered_log_probs_list = original_log_probs_list
            # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
            if masked_indices and len(masked_indices) > 0:
                _, _, experiment_result, _ = evaluation_metrics.get_ranking(
                    filtered_log_probs_list[0],
                    masked_indices,
                    model.vocab,
                    index_list=index_list,
                    print_generation=verbose)

            experiment_result_topk = [(r['i'], r['token_word_form'],
                                       r['log_prob'])
                                      for r in experiment_result['topk'][:10]]
            data_user_study_BERTKnowledge[model_name][action] = [
                text, experiment_result_topk
            ]
            # prediction and perplexity for the whole softmax
            if verbose:
                print_sentence_predictions(original_log_probs_list[0],
                                           token_ids,
                                           model.vocab,
                                           masked_indices=masked_indices)

    print(data_user_study_BERTKnowledge)

    pickle.dump(data_user_study_BERTKnowledge,
                open('./parsed_BERTKnowledge_tests.p', 'wb'))