def run(experiment_name, model_name, load_previous, learning_rate, batch_size,
        samples_per_epoch, epochs, print_after_batches, save_after_steps,
        learning_rate_decay, L1_reg, L2_reg, n_factors_emb, n_hidden,
        using_dropout, dropout_rate, loss_weights):

    print 'Meta parameters: '
    print('experiment_name: ', experiment_name)
    print('learning_rate: ', learning_rate)
    print('batch_size: ', batch_size)
    print('samples_per_epoch: ', samples_per_epoch)
    print('epochs: ', epochs)
    print('n_factors_emb: ', n_factors_emb)
    print('n_hidden: ', n_hidden)
    print('using_dropout: ', using_dropout)
    print('dropout_rate: ', dropout_rate)
    print('loss_weights: ', loss_weights)
    print ''

    start_time = time.clock()

    experiment_name_prefix = "%s_" % experiment_name
    tmp_exp_name = experiment_name_prefix + "temp"
    final_exp_name = experiment_name_prefix + "final"
    e27_exp_name = experiment_name_prefix + "e27"

    with open(DATA_PATH + "description", 'rb') as data_file:
        description = cPickle.load(data_file)
        print description.keys()
        word_vocabulary = description['word_vocabulary']
        role_vocabulary = description['role_vocabulary']
        unk_word_id = description['NN_unk_word_id']
        unk_role_id = description['unk_role_id']
        missing_word_id = description['NN_missing_word_id']

        print(unk_word_id, unk_role_id, missing_word_id)

    print '... building the model'

    rng = np.random

    word_vocabulary['<NULL>'] = missing_word_id
    word_vocabulary['<UNKNOWN>'] = unk_word_id
    role_vocabulary['<UNKNOWN>'] = unk_role_id
    n_word_vocab = len(word_vocabulary)
    n_role_vocab = len(role_vocabulary)

    adagrad = Adagrad(lr=learning_rate, epsilon=1e-08, decay=0.0)

    if re.search('NNRF', model_name):
        model = NNRF(n_word_vocab,
                     n_role_vocab,
                     n_factors_emb,
                     512,
                     n_hidden,
                     word_vocabulary,
                     role_vocabulary,
                     unk_word_id,
                     unk_role_id,
                     missing_word_id,
                     using_dropout,
                     dropout_rate,
                     optimizer=adagrad,
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'])
    else:
        model = eval(model_name)(n_word_vocab,
                                 n_role_vocab,
                                 n_factors_emb,
                                 n_hidden,
                                 word_vocabulary,
                                 role_vocabulary,
                                 unk_word_id,
                                 unk_role_id,
                                 missing_word_id,
                                 using_dropout,
                                 dropout_rate,
                                 optimizer=adagrad,
                                 loss='sparse_categorical_crossentropy',
                                 metrics=['accuracy'],
                                 loss_weights=loss_weights)
    # else:
    #     sys.exit('No such model!!!')

    model.summary()

    print model.model.metrics_names

    epoch = 0
    max_output_length = 0
    validation_cost_history = []
    best_validation_cost = np.inf
    best_epoch = 0

    valid_sample_size = config.OCT_VALID_SIZE
    test_sample_size = config.OCT_TEST_SIZE
    train_steps = samples_per_epoch / batch_size
    valid_steps = valid_sample_size / batch_size
    test_steps = test_sample_size / batch_size
    # # DEBUG
    # valid_steps = 10
    # test_steps = 10

    save_after_steps = save_after_steps + 1

    training_verbose = 2
    max_q_size = 10
    workers = 2
    pickle_safe = True

    def thematic_fit_evaluation(model_name, experiment_name, model,
                                print_result):
        result = dict()
        # Pado, Mcrae A0/A1/A2
        tempdict = dict()
        tempdict['pado'], _, _ = eval_pado_mcrae(model_name, experiment_name,
                                                 'pado', model, print_result)
        tempdict['mcrae'], _, _ = eval_pado_mcrae(model_name, experiment_name,
                                                  'mcrae', model, print_result)
        # tempdict['pado_fixed'] = eval_pado_mcrae(model_name, experiment_name, 'pado_fixed', model=model, print_result=False)
        # tempdict['mcrad_fixed'] = eval_pado_mcrae(model_name, experiment_name, 'mcrad_fixed', model=model, print_result=False)
        for k, v in tempdict.items():
            for sk, sv in v.items():
                result[k + '-' + sk] = sv

        r, _, _, _, _ = eval_MNR_LOC(model_name,
                                     experiment_name,
                                     'AM-MNR',
                                     model,
                                     print_result,
                                     skip_header=True)
        result['mcrae-MNR'] = round(r, 4)
        r2, _, _, _, _ = eval_MNR_LOC(model_name, experiment_name, 'AM-LOC',
                                      model, print_result)
        result['mcrae-LOC'] = round(r2, 4)

        rho_obj, _, _, rho_fil, _, _, rho_gre, _, _ = eval_greenberg_all(
            model_name, experiment_name, model, print_result)
        result['GObject'] = round(rho_obj, 4)
        result['GFiller'] = round(rho_fil, 4)
        result['greenberg'] = round(rho_gre, 4)

        correct, _, acc = eval_bicknell_switch(model_name,
                                               experiment_name,
                                               'bicknell',
                                               model,
                                               print_result,
                                               switch_test=False)
        result['bicknell'] = (acc, correct)

        correlation = eval_GS(model_name, experiment_name, 'GS2013data.txt',
                              model, print_result)
        result['GS'] = round(correlation, 4)

        return result

    class CallbackContainer(Callback):
        """Callback that records events into a `History` object.
        """
        def on_train_begin(self, logs=None):
            self.epoch = []
            self.history = {}
            self.best_validation_cost = -1
            self.best_epoch = -1

        def on_epoch_begin(self, epoch, logs):
            self.epoch_start = time.clock()

        def on_batch_end(self, batch, logs):
            batch_n = batch + 1
            epoch_n = len(self.epoch)
            if batch_n % print_after_batches == 0:
                elapsed_time = time.clock() - self.epoch_start
                output = "batch %d; %d samples; %.1f sps; " % (
                    batch_n, batch_n * batch_size, batch_n * batch_size /
                    (elapsed_time + 1e-32))
                print output
            if batch_n % save_after_steps == 0:
                model.save(MODEL_PATH, tmp_exp_name, model_name, learning_rate,
                           self.history, self.best_validation_cost,
                           self.best_epoch, epoch_n)
                print "Temp model saved! "

        def on_epoch_end(self, epoch, logs=None):
            epoch_n = epoch + 1
            logs = logs or {}
            self.epoch.append(epoch_n)

            print 'Validating...'
            valid_result = model.model.evaluate_generator(generator=generator(
                DATA_PATH + "NN_dev",
                model_name,
                unk_word_id,
                unk_role_id,
                missing_word_id,
                role_vocabulary,
                random=False,
                batch_size=batch_size),
                                                          steps=test_steps,
                                                          max_q_size=1,
                                                          workers=1,
                                                          pickle_safe=False)
            print('validate_result', valid_result)

            for i, m in enumerate(model.model.metrics_names):
                logs['valid_' + m] = valid_result[i]

            # print model.model.get_layer("softmax_word_output").get_weights()[1]

            result = thematic_fit_evaluation(model_name, experiment_name,
                                             model, False)
            for k, v in result.items():
                logs[k] = v

            # print model.model.get_layer("softmax_word_output").get_weights()[1]

            for k, v in logs.items():
                self.history.setdefault(k, []).append(v)

            if epoch_n > 1 and self.history['valid_loss'][-1] < self.history[
                    'valid_loss'][-2]:
                print "Best model saved! "
                self.best_validation_cost = np.min(
                    np.array(self.history['valid_loss']))
                self.best_epoch = np.argmin(
                    np.array(self.history['valid_loss'])) + 1
                model.save(MODEL_PATH, final_exp_name, model_name,
                           learning_rate, self.history,
                           self.best_validation_cost, self.best_epoch, epoch_n)
                print('best_validation_cost, best_epoch, epoch_n',
                      self.best_validation_cost, self.best_epoch, epoch_n)
                for k, v in self.history.items():
                    print k, v

            if epoch_n == 27:
                model.save(MODEL_PATH, experiment_name, model_name,
                           learning_rate, self.history,
                           self.best_validation_cost, self.best_epoch, epoch_n)

            print "Current model saved! "
            model.save(MODEL_PATH, experiment_name, model_name, learning_rate,
                       self.history, self.best_validation_cost,
                       self.best_epoch, epoch_n)

    callback_container = CallbackContainer()

    # saves the backup model weights after each epoch if the validation loss decreased
    # backup_checkpointer = ModelCheckpoint(filepath='backup_' + experiment_name + '.hdf5', verbose=1, save_best_only=True)

    stopper = EarlyStopping(monitor='valid_loss',
                            min_delta=1e-3,
                            patience=5,
                            verbose=1)
    naNChecker = TerminateOnNaN()
    reduce_lr = ReduceLROnPlateau(monitor='valid_loss',
                                  factor=0.1,
                                  patience=3,
                                  min_lr=0.001)

    print 'Training...'
    train_start = time.clock()

    model.model.fit_generator(
        generator=generator(DATA_PATH + "NN_train",
                            model_name,
                            unk_word_id,
                            unk_role_id,
                            missing_word_id,
                            role_vocabulary,
                            random=True,
                            rng=rng,
                            batch_size=batch_size),
        steps_per_epoch=train_steps,
        epochs=epochs,
        verbose=training_verbose,
        workers=workers,
        max_q_size=max_q_size,
        pickle_safe=pickle_safe,
        callbacks=[callback_container, stopper, naNChecker, reduce_lr])
    print callback_container.epoch
    for k, v in callback_container.history.items():
        print k, v

    train_end = time.clock()
    print 'train and validate time: %f, sps: %f' % (train_end - train_start,
                                                    train_steps * batch_size /
                                                    (train_end - train_start))

    print 'Testing...'
    test_start = time.clock()

    description_best = model_builder.load_description(MODEL_PATH,
                                                      experiment_name)
    model.load(MODEL_PATH, experiment_name, description_best)

    test_result = model.model.evaluate_generator(generator=generator(
        DATA_PATH + "NN_test",
        model_name,
        unk_word_id,
        unk_role_id,
        missing_word_id,
        role_vocabulary,
        random=False,
        batch_size=batch_size),
                                                 steps=test_steps,
                                                 max_q_size=1,
                                                 workers=1,
                                                 pickle_safe=False)
    print('test_result', test_result)

    test_end = time.clock()
    print 'test time: %f, sps: %f' % (test_end - test_start, test_steps *
                                      batch_size / (test_end - test_start))

    end_time = time.clock()
    print "Total running time %.2fh" % ((end_time - start_time) / 3600.)

    print 'Optimization complete. Best validation cost of %f obtained at epoch %i' % (
        callback_container.best_validation_cost, callback_container.best_epoch)
def evaluate(model_name, experiment_name, batch_size):
    MODEL_NAME = experiment_name
    repr_file = os.path.join(MODEL_PATH, 'confusionM_' + MODEL_NAME)

    description = model_builder.load_description(MODEL_PATH, MODEL_NAME)
    net = model_builder.build_model(model_name, description)
    net.load(MODEL_PATH, MODEL_NAME, description)

    n_roles = len(net.role_vocabulary)
    print(net.role_vocabulary)  #Added () to print (team1-change)
    print("unk_word_id", net.unk_word_id)
    print("missing_word_id", net.missing_word_id)

    net.model.summary()
    print(net.model.metrics_names)  #Added () to print (team1-change)
    reverse_role_vocabulary = utils.get_reverse_map(net.role_vocabulary)

    test_sample_size = config.OCT_TEST_SIZE
    test_steps = test_sample_size / batch_size
    # # DEBUG
    # test_steps = 10

    print('Testing...')  #Added () to print (team1-change)
    test_start = time.process_time()  #Changed from time.clock() (team1-change)

    # Always use generator in Keras
    if re.search('NAME_WHICH_YOU_NEED_OLD_BATCHER', experiment_name):
        test_gen = get_minibatch(DATA_PATH + "NN_test",
                                 net.unk_word_id,
                                 net.unk_role_id,
                                 net.missing_word_id,
                                 n_roles,
                                 random=False,
                                 batch_size=batch_size)
    else:
        test_gen = generator(DATA_PATH + "NN_test",
                             model_name,
                             net.unk_word_id,
                             net.unk_role_id,
                             net.missing_word_id,
                             n_roles,
                             random=False,
                             batch_size=batch_size)

    # Test the model
    test_result = net.model.evaluate_generator(generator=test_gen,
                                               steps=test_steps,
                                               max_q_size=1,
                                               workers=1,
                                               pickle_safe=False)
    print('test_result', test_result)

    # Compute confusion matrix
    metrics_names = net.model.metrics_names
    result_dict = {(x, 0) for x in metrics_names}
    batch_n = 0
    confusionM = np.zeros((n_roles, n_roles), dtype='int32')
    ppl_role_list = dict()
    ppl_role = dict()

    result_list = []
    for ([i_w, i_r, t_w, t_r], _) in generator(DATA_PATH + "NN_test",
                                               model_name,
                                               net.unk_word_id,
                                               net.unk_role_id,
                                               net.missing_word_id,
                                               n_roles,
                                               random=False,
                                               batch_size=batch_size):
        result_role = net.predict_role(i_w, i_r, t_w, t_r, batch_size)
        result_word_likelihood = net.predict(i_w, i_r, t_w, t_r, batch_size)[0]
        neg_log_likelihoods = -np.log(result_word_likelihood)

        for i, row in enumerate(neg_log_likelihoods, start=0):
            target_word = t_w[i][0]
            target_role = t_r[i][0]
            neg_log_likelihood = row[target_word]
            ppl_role_list.setdefault(target_role,
                                     []).append(neg_log_likelihood)

        for i, true_r in enumerate(t_r, start=0):
            confusionM[true_r, result_role[i]] += 1
            if true_r == result_role[i]:
                result_list.append(1)
        batch_n += 1
        print(batch_n)  #Added () to print (team1-change)
        if batch_n >= test_steps:
            break

    for k, v in ppl_role_list.items():
        neg_log_likelihood_role = np.mean(np.array(v))
        ppl_role[k] = np.exp(neg_log_likelihood_role)

    print("Confusion Matrix: ")  #Added () to print (team1-change)
    print("    A0,  A1, LOC, TMP, MNR,   V, <UNKNOWN>"
          )  #Added () to print (team1-change)
    print(confusionM)  #Added () to print (team1-change)
    np.savetxt('confusionM_' + experiment_name + '.csv',
               confusionM,
               delimiter=',')
    np.savetxt('result_list_' + experiment_name + '.csv',
               result_list,
               delimiter=',')

    stats(net, confusionM)

    print("Loss(neg_log_likelihood) by role: "
          )  #Added () to print (team1-change)
    for r in ppl_role.keys():
        print(reverse_role_vocabulary[r], np.log(ppl_role[r]))

    print("PPL by role: ")  #Added () to print (team1-change)
    for r in ppl_role.keys():
        print(reverse_role_vocabulary[r], ppl_role[r])

    with open(repr_file, 'w') as f_out:
        f_out.write('[')
        for i in range(n_roles):
            f_out.write('[')
            for j in range(n_roles):
                f_out.write(str(confusionM[i][j]) + ", ")
            f_out.write('] \n')
        f_out.write(']')

    test_end = time.process_time()  #Changed from time.clock() (team1-change)
    print('test time: %f, sps: %f' %
          (test_end - test_start, test_steps * batch_size /
           (test_end - test_start)))  #Added () to print (team1-change)
def eval_GS(model_name,
            experiment_name,
            eval_file_name,
            model=None,
            print_result=True,
            verb_baseline=False):
    MODEL_NAME = experiment_name
    eval_file = os.path.join(EVAL_PATH, eval_file_name)
    result_file = os.path.join(MODEL_PATH, MODEL_NAME + '_' + eval_file_name)

    if model:
        net = model
    else:
        description = model_builder.load_description(MODEL_PATH, MODEL_NAME)
        net = model_builder.build_model(model_name, description)
        net.load(MODEL_PATH, MODEL_NAME, description)

    sent_layer = 'context_embedding'

    sent_model = Model(inputs=net.model.input,
                       outputs=net.model.get_layer(sent_layer).output)

    # if print_result:
    #     sent_model.summary()

    n_input_length = len(net.role_vocabulary) - 1

    print net.role_vocabulary

    scores = []
    similarities = []
    original_sim_f = []
    similarities_f = []
    lo_similarities = []
    hi_similarities = []
    records = []

    print("Embedding: " + experiment_name)
    print("=" * 60)
    print("\n")
    print("sentence1\tsentence2\taverage_score\tembedding_cosine")
    print("-" * 60)

    with open(eval_file, 'r') as f, \
        open(result_file, 'w') as f_out:

        first = True
        for line in f:
            # skip header
            if first:
                first = False
                continue

            s = line.split()
            sentence = " ".join(s[1:5])
            score = float(s[5])
            hilo = s[6].upper()

            # verb subject object landmark
            # A1 - object; A0 - subject
            V1, A0, A1, V2 = sentence.split()

            V1 = wnl.lemmatize(V1, wn.VERB)
            A0 = wnl.lemmatize(A0, wn.NOUN)
            A1 = wnl.lemmatize(A1, wn.NOUN)
            V2 = wnl.lemmatize(V2, wn.VERB)

            V1_i = net.word_vocabulary.get(V1, net.unk_word_id)
            A0_i = net.word_vocabulary.get(A0, net.unk_word_id)
            A1_i = net.word_vocabulary.get(A1, net.unk_word_id)
            V2_i = net.word_vocabulary.get(V2, net.unk_word_id)

            # if np.array([V1_i, A0_i, A1_i, V2_i]).any() == net.unk_word_id:
            #     print 'OOV: ', A0, A1, V1, V2

            V_ri = net.role_vocabulary['V']
            A0_ri = net.role_vocabulary['A0']
            A1_ri = net.role_vocabulary['A1']

            sent1_x = dict((r, net.missing_word_id)
                           for r in (net.role_vocabulary.values()))
            sent2_x = dict((r, net.missing_word_id)
                           for r in (net.role_vocabulary.values()))

            sent1_x.pop(n_input_length)
            sent2_x.pop(n_input_length)

            sent1_x[V_ri] = V1_i
            sent2_x[V_ri] = V2_i

            if not verb_baseline:
                sent1_x[A0_ri] = A0_i
                sent1_x[A1_ri] = A1_i
                sent2_x[A0_ri] = A0_i
                sent2_x[A1_ri] = A1_i

            zeroA = np.array([0])

            s1_w = np.array(sent1_x.values()).reshape((1, n_input_length))
            s1_r = np.array(sent1_x.keys()).reshape((1, n_input_length))
            s2_w = np.array(sent2_x.values()).reshape((1, n_input_length))
            s2_r = np.array(sent2_x.keys()).reshape((1, n_input_length))

            if re.search('NNRF', model_name):
                sent1_emb = sent_model.predict([s1_w, s1_r, zeroA])
                sent2_emb = sent_model.predict([s2_w, s2_r, zeroA])
            else:
                sent1_emb = sent_model.predict([s1_w, s1_r, zeroA, zeroA])
                sent2_emb = sent_model.predict([s2_w, s2_r, zeroA, zeroA])

            # Baseline
            #sent1_emb = V1_i
            #sent2_emb = V2_i
            # Compositional
            # sent1_emb = V1_i + A0_i + A1_i
            # sent2_emb = V2_i + A0_i + A1_i
            #sent1_emb = V1_i * A0_i * A1_i
            #sent2_emb = V2_i * A0_i * A1_i

            similarity = -(cosine(sent1_emb, sent2_emb) - 1.0
                           )  # convert distance to similarity

            if hilo == "HIGH":
                hi_similarities.append(similarity)
            elif hilo == "LOW":
                lo_similarities.append(similarity)
            else:
                raise Exception("Unknown hilo value %s" % hilo)

            if (V1, A0, A1, V2) not in records:
                records.append((V1, A0, A1, V2))
                # print "\"%s %s %s\"\t\"%s %s %s\"\t%.2f\t%.2f \n" % (A0, V1, A1, A0, V2, A1, score, similarity)

            scores.append(score)
            similarities.append(similarity)

            f_out.write("\"%s %s %s\"\t\"%s %s %s\"\t %.2f \t %.2f \n" %
                        (A0, V1, A1, A0, V2, A1, score, similarity))

    print("-" * 60)

    correlation, pvalue = spearmanr(scores, similarities)

    if print_result:
        print("Total number of samples: %d" % len(scores)
              )  #Added paranthesis to the print statements (team1-change)
        print("Spearman correlation: %.4f; 2-tailed p-value: %.10f" %
              (correlation, pvalue)
              )  #Added paranthesis to the print statements (team1-change)
        print("High: %.2f; Low: %.2f" %
              (np.mean(hi_similarities), np.mean(lo_similarities))
              )  #Added paranthesis to the print statements (team1-change)

        # import pylab
        # pylab.scatter(scores, similarities)
        # pylab.show()

    return correlation
示例#4
0
def eval_bicknell_switch(model_name,
                         experiment_name,
                         evaluation,
                         model=None,
                         print_result=True,
                         switch_test=False):
    MODEL_NAME = experiment_name

    if model:
        net = model
    else:
        description = model_builder.load_description(MODEL_PATH, MODEL_NAME)
        net = model_builder.build_model(model_name, description)
        net.load(MODEL_PATH, MODEL_NAME, description)

    bias = net.set_0_bias()

    if print_result:
        print net.role_vocabulary

    eval_data_file = os.path.join(RF_EVAL_PATH, evaluation + '.txt')

    result_file = os.path.join(MODEL_PATH,
                               MODEL_NAME + '_' + evaluation + '.txt')

    probs = []
    baseline = []
    oov_count = 0

    if print_result:
        print eval_data_file
        print "=" * 60

    dataset = numpy.genfromtxt(eval_data_file,
                               dtype=str,
                               delimiter='\t',
                               usecols=[0, 1, 2, 3, 4])

    samples = []
    i = 0

    while True:
        d = dataset[i]
        d2 = dataset[i + 1]

        A0 = d[0][:-2]
        V = d[1][:-2]
        assert d2[0][:-2] == A0
        assert d2[1][:-2] == V

        if d[3] == 'yes':
            assert d2[3] == 'no'
            A1_correct = d[2][:-2]
            A1_incorrect = d2[2][:-2]
            b_correct = d[4]
            b_incorrect = d2[4]
        else:
            assert d[3] == 'no'
            A1_correct = d2[2][:-2]
            A1_incorrect = d[2][:-2]
            b_correct = d2[4]
            b_incorrect = d[4]

        if A1_correct not in net.word_vocabulary or A1_incorrect not in net.word_vocabulary:
            if A1_correct not in net.word_vocabulary and print_result:
                print "%s MISSING FROM VOCABULARY. SKIPPING..." % A1_correct
            if A1_incorrect not in net.word_vocabulary and print_result:
                print "%s MISSING FROM VOCABULARY. SKIPPING..." % A1_incorrect
        else:
            roles = net.role_vocabulary.values()
            del roles[net.unk_role_id]

            input_roles_words = dict((r, net.missing_word_id) for r in (roles))

            input_roles_words[
                net.role_vocabulary["A0"]] = utils.input_word_index(
                    net.word_vocabulary, A0, net.unk_word_id, warn_unk=True)
            input_roles_words[
                net.role_vocabulary["V"]] = utils.input_word_index(
                    net.word_vocabulary, V, net.unk_word_id, warn_unk=True)

            sample = (
                numpy.asarray(
                    [input_roles_words.values(),
                     input_roles_words.values()],
                    dtype=numpy.int64),  # x_w_i
                numpy.asarray(
                    [input_roles_words.keys(),
                     input_roles_words.keys()],
                    dtype=numpy.int64),  # x_r_i
                numpy.asarray([
                    net.word_vocabulary[A1_correct],
                    net.word_vocabulary[A1_incorrect]
                ],
                              dtype=numpy.int64
                              ),  # y_i (1st is correct and 2nd is incorrect
                numpy.asarray(
                    [net.role_vocabulary["A1"], net.role_vocabulary["A1"]],
                    dtype=numpy.int64),  # y_r_i
                [b_correct, b_incorrect],  # bicknell scores
                "\"" + A0 + " " + V + "\"",  # context
                [A1_correct, A1_incorrect])

            samples.append(sample)

        i += 2
        if i > len(dataset) - 2:
            break

    num_samples = len(samples)
    num_correct = 0
    num_total = 0

    if print_result:
        print "context", "correct", "incorrect", "P(correct)", "P(incorrect)", "bicnell_correct", "bicnell_incorrect"

    result_list = []

    for x_w_i, x_r_i, y_w_i, y_r_i, bicknell, context, a1 in samples:

        p = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i)

        p_correct = p[0]
        p_incorrect = p[1]

        if print_result:
            print context, a1[0], a1[1], p_correct, p_incorrect, bicknell[
                0], bicknell[1]

        if p_correct > p_incorrect:
            result_list.append(1)
        else:
            result_list.append(0)

        num_correct += p_correct > p_incorrect
        num_total += 1

    assert num_total == num_samples

    accuracy = float(num_correct) / float(num_samples)

    if print_result:
        print "Number of lines %d" % num_samples
        print "Baseline Lenci11 is 43/64=0.671875"
        print "Final score of theano model is %d/%d=%.6f" % (
            num_correct, num_samples, accuracy)

    print result_list

    if switch_test and print_result:
        print "\nSwitch A0/A1 TEST"

        input_words = []
        input_roles = []
        for i in range(1):
            roles = net.role_vocabulary.values()
            print net.unk_role_id
            roles.remove(net.unk_role_id)

            input_role_word_pairs = dict(
                (r, net.missing_word_id) for r in roles)
            input_role_word_pairs[
                net.role_vocabulary["V"]] = utils.input_word_index(
                    net.word_vocabulary, "buy", net.unk_word_id, warn_unk=True)

            input_words.append(input_role_word_pairs.values())
            input_roles.append(input_role_word_pairs.keys())

        man = utils.input_word_index(net.word_vocabulary,
                                     "man",
                                     net.unk_word_id,
                                     warn_unk=True)
        car = utils.input_word_index(net.word_vocabulary,
                                     "car",
                                     net.unk_word_id,
                                     warn_unk=True)
        a1 = net.role_vocabulary["A1"]
        a0 = net.role_vocabulary["A0"]

        a0_test = (
            numpy.asarray(input_words, dtype=numpy.int64),
            numpy.asarray(input_roles, dtype=numpy.int64),
            numpy.asarray([man, car], dtype=numpy.int64),
            numpy.asarray([a0], dtype=numpy.int64),
        )
        x_w_i, x_r_i, y_w_i, y_r_i = a0_test
        p0 = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i)
        print p0

        a1_test = (
            numpy.asarray(input_words, dtype=numpy.int64),
            numpy.asarray(input_roles, dtype=numpy.int64),
            numpy.asarray([man, car], dtype=numpy.int64),
            numpy.asarray([a1], dtype=numpy.int64),
        )
        x_w_i, x_r_i, y_w_i, y_r_i = a1_test
        p1 = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i)
        print p1

        print "man buy", p0[0]
        print "buy man", p1[0]
        print "car buy", p0[1]
        print "buy car", p1[1]

    net.set_bias(bias)

    return num_correct, num_samples, accuracy
def evaluate(model_name,
             experiment_name,
             test_name,
             batch_size,
             VR_SP_SRL=True,
             bootstrapping=False,
             majority_baseline=False):
    MODEL_NAME = experiment_name
    # repr_file = os.path.join(MODEL_PATH, 'confusionM_' + MODEL_NAME)

    description = model_builder.load_description(MODEL_PATH, MODEL_NAME)
    net = model_builder.build_model(model_name, description)
    net.load(MODEL_PATH, MODEL_NAME, description)

    n_roles = len(net.role_vocabulary)
    reverse_word_vocabulary = utils.get_reverse_map(net.word_vocabulary)
    reverse_role_vocabulary = utils.get_reverse_map(net.role_vocabulary)
    # net.set_0_bias()

    print net.role_vocabulary
    print("unk_word_id", net.unk_word_id)
    print("missing_word_id", net.missing_word_id)

    net.model.summary()

    # print net.model.metrics_names

    test_sample_size = 0
    with open(EVAL_PATH + test_name, 'r') as lines:
        for l in lines:
            test_sample_size += 1
    print(test_sample_size)

    test_steps = test_sample_size / float(batch_size)
    # test_steps = test_sample_size
    # # DEBUG
    # test_steps = 10

    print 'Testing ' + test_name + ' ...'
    print 'VR_SP_SRL: ' + str(VR_SP_SRL)
    test_start = time.clock()

    # if re.search('NNRF_1e8', experiment_name) or re.search('MTRF_dev', experiment_name):
    #     test_gen = get_minibatch(DATA_PATH + "NN_test", net.unk_word_id, net.unk_role_id, net.missing_word_id,
    #             n_roles, random=False, batch_size=batch_size)
    # else:
    #     test_gen = generator(DATA_PATH + "NN_test", model_name, net.unk_word_id, net.unk_role_id, net.missing_word_id,
    #             n_roles, random=False, batch_size=batch_size)

    # # Test the model
    # test_result = net.model.evaluate_generator(
    #         generator = test_gen,
    #         steps = test_steps,
    #         max_q_size = 1,
    #         workers = 1,
    #         pickle_safe = False
    #     )
    # print ('test_result', test_result)

    # Compute confusion matrix
    metrics_names = net.model.metrics_names
    result_dict = {(x, 0) for x in metrics_names}
    batch_n = 0
    confusionM = np.zeros((n_roles, n_roles), dtype='int32')
    ppl_role_list = dict()

    result_list = []
    output_list = []
    for ([i_w, i_r, t_w, t_r], _) in data_gen(EVAL_PATH + test_name,
                                              model_name,
                                              net,
                                              batch_size,
                                              VR_SP_SRL=VR_SP_SRL):
        # zeros = np.zeros(t_r.shape)
        result_role = net.predict_role(i_w, i_r, t_w, t_r, batch_size)

        # word_emb, avg_emb, event_emb = net.avg_emb.predict([i_w, i_r, t_w, t_r], batch_size)
        # print word_emb.shape, avg_emb.shape, event_emb.shape
        # assert np.multiply(word_emb[0][0], avg_emb[0])[0] == event_emb[0][0][0]
        # assert np.multiply(word_emb[0][0], avg_emb[0])[1] == event_emb[0][0][1]

        # test role prediction of MTRF_dev, result: role prediction is useless
        # print i_r
        # print t_r.reshape(-1)
        # print result_role

        # result_word_likelihood = net.predict(i_w, i_r, t_w, t_r, batch_size)[0]
        # neg_log_likelihoods = -np.log(result_word_likelihood)

        # for i, row in enumerate(neg_log_likelihoods, start=0):
        #     target_word = t_w[i][0]
        #     target_role = t_r[i][0]
        #     neg_log_likelihood = row[target_word]
        #     ppl_role_list.setdefault(target_role, []).append(neg_log_likelihood)

        # print i_w, i_r, t_w, t_r

        for i, true_r in enumerate(t_r, start=0):
            # if reverse_role_vocabulary.get(t_r[0][0], '<unknown>') == 'AM-LOC':
            #     print ("input words", [reverse_word_vocabulary.get(w, '<unknown>') for w in i_w[0]])
            #     print ("input roles", [reverse_role_vocabulary.get(r, '<unknown>') for r in i_r[0]])
            #     print ("target word", [reverse_word_vocabulary.get(w, '<unknown>') for w in t_w[0]])
            #     print ("target role", [reverse_role_vocabulary.get(r, '<unknown>') for r in t_r[0]])
            #     print ("predicted role", [reverse_role_vocabulary.get(result_role[i], '<unknown>') for r in t_r[0]])
            #     print ''

            confusionM[true_r, result_role[i]] += 1
            if true_r == result_role[i]:
                result_list.append(1)
            output_list.append((true_r, result_role[i]))
        batch_n += 1
        if batch_n % 100 == 0:
            print(batch_n)
        if batch_n >= test_steps:
            break

    # ppl_role = dict()
    # for k, v in ppl_role_list.items():
    #     neg_log_likelihood_role = np.mean(np.array(v))
    #     ppl_role[k] = np.exp(neg_log_likelihood_role)

    # obtain ZeroR baseline
    print confusionM
    majority = 1
    if majority_baseline == True:
        for i in range(7):
            confusionM[i][majority] = confusionM[i][:].sum()
            confusionM[i][majority - 1] = 0
            confusionM[i][majority + 1:] = 0
    print confusionM

    dir_P, dir_R, dir_F1, precision, recall, F1 = stats(net, confusionM)
    print "Dir: %.2f \t %.2f \t %.2f" % (dir_P, dir_R, dir_F1)

    # np.savetxt('confusionM_' + experiment_name + '.' + test_name.strip('.dat') + '.csv', confusionM, delimiter = ',')
    # np.savetxt('output_' + experiment_name + '.' + test_name.strip('.dat') + '.csv', output_list, delimiter = ',')

    # with open(repr_file, 'w') as f_out:
    #     f_out.write('[')
    #     for i in range(n_roles):
    #         f_out.write('[')
    #         for j in range(n_roles):
    #             f_out.write(str(confusionM[i][j]) + ", ")
    #         f_out.write('] \n')
    #     f_out.write(']')

    # print "Loss(neg_log_likelihood) by role: "
    # for r in ppl_role.keys():
    #     print (reverse_role_vocabulary[r], np.log(ppl_role[r]))

    print("Result by role: ")
    for r in range(len(precision)):
        print('%s: \t %.2f \t %.2f \t %.2f' %
              (reverse_role_vocabulary[r], precision[r], recall[r], F1[r]))

    test_end = time.clock()
    print 'test time: %f, sps: %f' % (test_end - test_start, test_steps *
                                      batch_size / (test_end - test_start))

    if bootstrapping:
        P_mean, P_std, R_mean, R_std, F1_mean, F1_std = bootstrap(
            experiment_name, test_name, net, n_roles, output_list=output_list)

        return P_mean, P_std, R_mean, R_std, F1_mean, F1_std
def query(model_name, experiment_name, inputs, target):
    MODEL_NAME = experiment_name
    description = model_builder.load_description(MODEL_PATH, MODEL_NAME)

    net = model_builder.build_model(model_name, description)
    net.load(MODEL_PATH, MODEL_NAME, description)

    # net.model.summary()
    # print net.model.get_layer(name="embedding_2").get_weights()[0]

    print net.role_vocabulary
    print("unk_word_id", net.unk_word_id)
    print("missing_word_id", net.missing_word_id)
    # net.set_0_bias()

    net.model.summary()

    propbank_map = {
        "subj"  :   "A0",
        "obj"   :   "A1",
        "ARG0"  :   "A0",
        "ARG1"  :   "A1",
        "ARG2"  :   "A2",
    }

    # tr_map = {
    #     "A0": numpy.asarray([[net.role_vocabulary["A0"]]], dtype=numpy.int64),
    #     "A1": numpy.asarray([[net.role_vocabulary["A1"]]], dtype=numpy.int64),
    #     "A2": numpy.asarray([[net.role_vocabulary["<UNKNOWN>"]]], dtype=numpy.int64)
    # }

    # net.word_vocabulary["<NOTHING>"] = net.missing_word_id
    # net.role_vocabulary["<UNKNOWN>"] = net.unk_role_id    

    reverse_vocabulary = utils.get_reverse_map(net.word_vocabulary)
    reverse_role_vocabulary = utils.get_reverse_map(net.role_vocabulary)    

    print reverse_role_vocabulary

    raw_words = dict((reverse_role_vocabulary[r], reverse_vocabulary[net.missing_word_id]) for r in net.role_vocabulary.values())

    # print raw_words

    raw_words.update(inputs)
    
    # print raw_words
    # print len(raw_words)
    assert len(raw_words) == len(net.role_vocabulary)
    # print repr(raw_words)

    # n = int(sys.argv[3])    
    t_r = [net.role_vocabulary.get(r, net.unk_role_id) for r in target.keys()]
    t_w = [net.word_vocabulary.get(w, net.unk_word_id) for w in target.values()]

    input_roles_words = {}
    for r, w in raw_words.items():
        input_roles_words[net.role_vocabulary[r]] = utils.input_word_index(net.word_vocabulary, w, net.unk_word_id, warn_unk=True)

    print input_roles_words, t_r
    input_roles_words.pop(t_r[0])

    # default_roles_words = dict((r, net.missing_word_id) for r in (net.role_vocabulary.values()))
    # default_roles_words.update(input_roles_words)
    # input_roles_words = default_roles_words
        
    x_w_i = numpy.asarray([input_roles_words.values()], dtype=numpy.int64)
    x_r_i = numpy.asarray([input_roles_words.keys()], dtype=numpy.int64)
    y_w_i = numpy.asarray(t_w, dtype=numpy.int64)
    y_r_i = numpy.asarray(t_r, dtype=numpy.int64)

    topN=20
    predicted_word_indices = net.top_words(x_w_i, x_r_i, y_w_i, y_r_i, topN)
    # print predicted_word_indices
    # print len(predicted_word_indices)

    print(x_w_i, x_r_i, y_w_i, y_r_i)

    p_w = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i, batch_size=1, verbose=0)[0]
    print ('p_t_w: ', p_w)

    resultlist = predicted_word_indices
    # print resultlist

    for i, t_w_i in enumerate(resultlist):
        t_w = net.word_vocabulary.get(t_w_i, net.unk_word_id)
        y_w_i = numpy.asarray([t_w_i], dtype=numpy.int64)
        p = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i, batch_size=1, verbose=0)[0]
        n = numpy.round(p / 0.005)
        fb = numpy.floor(n)
        hb = n % 2
        print u"{:<5} {:7.6f} {:<20} ".format(i+1, float(p), reverse_vocabulary[int(t_w_i)]) + u"\u2588" * int(fb) + u"\u258C" * int(hb)
示例#7
0
def eval_pado_mcrae(model_name, experiment_name, evaluation, model=None, print_result=True):
    MODEL_NAME = experiment_name

    if model:
        net = model
    else:
        description = model_builder.load_description(MODEL_PATH, MODEL_NAME)
        net = model_builder.build_model(model_name, description)
        net.load(MODEL_PATH, MODEL_NAME, description)

    bias = net.set_0_bias()

    # net.model.summary()
    # print net.model.get_layer(name="embedding_2").get_weights()[0]

    # If no <UNKNOWN> role in the role vocabulary, add it.
    if net.role_vocabulary.get("<UNKNOWN>", -1) == -1:
        net.role_vocabulary["<UNKNOWN>"] = len(net.role_vocabulary) - 1

    if print_result:
        print net.role_vocabulary
        print("unk_word_id", net.unk_word_id)
        print("missing_word_id", net.missing_word_id)

    propbank_map = {
        "subj"  :   "A0",
        "obj"   :   "A1",
        "ARG0"  :   "A0",
        "ARG1"  :   "A1",
    }

    tr_map = {
        "A0": numpy.asarray([net.role_vocabulary["A0"]], dtype=numpy.int64),
        "A1": numpy.asarray([net.role_vocabulary["A1"]], dtype=numpy.int64),
        "<UNKNOWN>": numpy.asarray([net.role_vocabulary["<UNKNOWN>"]], dtype=numpy.int64)
    }

    if "A2" not in net.role_vocabulary.keys():
        propbank_map["ARG2"] = "<UNKNOWN>"
        tr_map["A2"] = numpy.asarray([net.role_vocabulary["<UNKNOWN>"]], dtype=numpy.int64)
    else:
        propbank_map["ARG2"] = "A2"
        tr_map["A2"] = numpy.asarray([net.role_vocabulary["A2"]], dtype=numpy.int64)

    fixed = False
    if evaluation == "pado":    
        eval_data_file = os.path.join(EVAL_PATH, 'pado_plausibility_pb.txt')
    elif evaluation == 'mcrae':
        eval_data_file = os.path.join(EVAL_PATH, 'mcrae_agent_patient_more.txt')
    else:
        fixed = True
        if evaluation == 'pado_fixed':
            eval_data_file = os.path.join(RV_EVAL_PATH, 'Pado-AsadFixes.txt')
        elif evaluation == 'mcrae_fixed':
            eval_data_file = os.path.join(RV_EVAL_PATH, 'McRaeNN-fixed.txt')
        else:
            eval_data_file = os.path.join(COMP_EVAL_PATH, 'compare-pado.txt')

    result_file = os.path.join(MODEL_PATH, MODEL_NAME + '_' + evaluation + '.txt')

    r_i = net.role_vocabulary["V"]

    probs = {}
    baseline = {}
    oov_count = {}
    blist=[]
    plist = []

    if print_result:
        print eval_data_file
        print "="* 60

    with open(eval_data_file, 'r') as f, \
            open(result_file, 'w') as f_out:
        for i, line in enumerate(f):

            line = line.strip()
            if line == "":
                continue

            w, tw, tr = line.split()[:3]  # input word, target word, role
            w = w[:-2] if fixed else w
            tw = tw[:-2] if fixed else tw

            w = wnl.lemmatize(w, wn.VERB)
            tw = wnl.lemmatize(tw, wn.NOUN)

            w_i = net.word_vocabulary.get(w, net.unk_word_id)
            tw_i = net.word_vocabulary.get(tw, net.unk_word_id)
            tr_i = net.role_vocabulary.get(propbank_map[tr], net.unk_role_id)

            if tw_i == net.unk_word_id:
                print w, tr, tw
                oov_count[tr] = oov_count.get(tr, 0) + 1
                f_out.write(line + "\tnan\n")
                continue

            b = float(line.split()[3])
            baseline.setdefault(tr, []).append(b)
            blist.append(b)

            sample = dict((r, net.missing_word_id) for r in (net.role_vocabulary.values() + [net.unk_role_id]))
            sample[r_i] = w_i

            sample.pop(net.role_vocabulary[propbank_map[tr]], None)

            x_w_i = numpy.asarray([sample.values()], dtype=numpy.int64)
            x_r_i = numpy.asarray([sample.keys()], dtype=numpy.int64)
            y_w_i = numpy.asarray([tw_i])
            y_r_i = tr_map[propbank_map[tr]]

            s = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i)
            # pr = net.p_roles(x_w_i, x_r_i, y_w_i, y_r_i)

            plist.append(s)

            probs.setdefault(tr, []).append(s)

            f_out.write(line + "\t%s\n" % s)

    result = dict()
    for r, b in baseline.iteritems():
        p = probs[r]
        rho, p_value = spearmanr(b, p)
        rating = len(p)
        oov = oov_count.get(r, 0)

        result[r] = round(rho, 4)
        if print_result:
            print "=" * 60
            print "ROLE: %s" % r
            print "-" * 60
            print "Spearman correlation: %f; 2-tailed p-value: %f" % (rho, p_value)
            print "Num ratings: %d (%d out of vocabulary)" % (rating, oov)

    rho, p_value = spearmanr(blist, plist)
    
    result['all'] = round(rho, 4)
    if print_result:
        print "Spearman correlation of %s: %f; 2-tailed p-value: %f" % (evaluation, rho, p_value)


    net.set_bias(bias)
    
    return result, plist, blist
示例#8
0
def eval_MNR_LOC(model_name,
                 experiment_name,
                 evaluation,
                 model=None,
                 print_result=True,
                 skip_header=False):
    MODEL_NAME = experiment_name

    if model:
        net = model
    else:
        description = model_builder.load_description(MODEL_PATH, MODEL_NAME)
        net = model_builder.build_model(model_name, description)
        net.load(MODEL_PATH, MODEL_NAME, description)

    bias = net.set_0_bias()

    if print_result:
        print net.role_vocabulary

    tr_map = {
        "ARG2": "A2",
        "ARG3": "A3",
        "ARGM-MNR": "AM-MNR",
        "ARGM-LOC": "AM-LOC",
    }

    if evaluation == "AM-MNR":
        eval_data_file = os.path.join(RV_EVAL_PATH,
                                      'McRaeInstr-fixed' + '.txt')
        remove_suffix = False
    elif evaluation == 'AM-LOC':
        eval_data_file = os.path.join(RV_EVAL_PATH, 'McRaeLoc-fixed' + '.txt')
        remove_suffix = True
    else:
        sys.exit('No such evaluation!!!')

    result_file = os.path.join(MODEL_PATH,
                               MODEL_NAME + '_' + evaluation + '.txt')

    probs = []
    baseline = []
    oov_count = 0

    r_i = net.role_vocabulary["V"]

    if print_result:
        print eval_data_file, evaluation
        print "=" * 60

    with open(eval_data_file, 'r') as f, \
         open(result_file, 'w') as f_out:
        for i, line in enumerate(f):
            if i == 0 and skip_header:
                # print line.strip() + "\tP(instrument|verb)"
                continue  #skip header
            line = line.strip()
            w, tw, temp1, temp2 = line.split(
            )[:4]  # input word, target word, other stuff
            w = w[:-2] if remove_suffix else w
            tw = tw[:-2] if remove_suffix else tw

            w = wnl.lemmatize(w.lower(), wn.VERB)
            tw = wnl.lemmatize(tw.lower(), wn.NOUN)

            w_i = net.word_vocabulary.get(w, net.unk_word_id)
            tw_i = net.word_vocabulary.get(tw, net.unk_word_id)

            if evaluation == "AM-MNR":
                r = temp2
            else:
                r = temp1

            # tr_i = net.role_vocabulary.get(evaluation, net.unk_role_id)
            tr_i = net.role_vocabulary.get(tr_map[r], net.unk_role_id)
            y_r_i = numpy.asarray([tr_i], dtype=numpy.int64)

            if tw_i == net.unk_word_id:
                oov_count += 1
                print w, tw
                f_out.write(line + "\tnan\n")
                continue

            b = float(line.split()[-1 if remove_suffix else -2])
            baseline.append(b)

            input_roles_words = dict(
                (r, net.missing_word_id)
                for r in (net.role_vocabulary.values() + [net.unk_role_id]))
            input_roles_words[r_i] = w_i
            input_roles_words.pop(tr_i, None)

            x_w_i = numpy.asarray([input_roles_words.values()],
                                  dtype=numpy.int64)
            x_r_i = numpy.asarray([input_roles_words.keys()],
                                  dtype=numpy.int64)

            y_w_i = numpy.asarray([tw_i], dtype=numpy.int64)

            p = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i)
            # pr = net.p_roles(x_w_i, x_r_i, y_w_i, y_r_i)

            probs.append(p)

            f_out.write(line + "\t%s\n" % p)

    rho, p_value = spearmanr(baseline, probs)
    rating = len(probs)

    if print_result:
        print "Spearman correlation: %f; 2-tailed p-value: %f" % (rho, p_value)
        print "Num ratings: %d (%d out of vocabulary)" % (rating, oov_count)

    net.set_bias(bias)

    return rho, p_value, oov_count, probs, baseline
示例#9
0
def pd_themfit(model_name,
               experiment_name,
               df,
               predict_role='V',
               input_roles='all_available_args',
               function='filler_prob',
               n=5,
               debug=False):
    """ Adds a column to a pandas df with a role filler probability.

    For each row in the pandas df, calculates the probability that a particular role filler will fill a
    particular role, given a set of input roles and fillers (from that row).
        
    Keyword arguments:
    model_name -- The name of the model
    experiment_name -- The name of the model plus the name of the experiment, separated by '_'
    df -- The pandas dataframe. Must include columns for all propbank labels in predict_role and input_roles
    predict_role -- the target role (in propbank labels) for which the filler will be predicted (default: 'V')
    input_roles -- the set of roles (in propbank labels) that should be used as inputs (default: 'all_args')
    """
    possible_roles = set(
        ['A0', 'A1', 'AM-LOC', 'AM-TMP', 'AM-MNR', '<UNKNOWN>', 'V'])
    try:
        assert predict_role in df.columns
        assert predict_role in possible_roles
        if input_roles != 'all_available_args':
            for r in input_roles:
                assert r in df.columns
                assert r in possible_roles
    except:
        print("NOT ALL ROLES ARE AVAILABLE AS DF COLUMNS")

    MODEL_NAME = experiment_name

    description = model_builder.load_description(MODEL_PATH, MODEL_NAME)
    net = model_builder.build_model(model_name, description)
    net.load(MODEL_PATH, MODEL_NAME, description)

    bias = net.set_0_bias()

    # net.model.summary()
    # print net.model.get_layer(name="embedding_2").get_weights()[0]

    # If no <UNKNOWN> role in the role vocabulary, add it.
    if net.role_vocabulary.get("<UNKNOWN>", -1) == -1:
        net.role_vocabulary["<UNKNOWN>"] = len(net.role_vocabulary) - 1

    print("Role vocabulary", net.role_vocabulary)
    print("unk_word_id", net.unk_word_id)
    print("missing_word_id", net.missing_word_id)

    reverse_vocabulary = utils.get_reverse_map(net.word_vocabulary)
    reverse_role_vocabulary = utils.get_reverse_map(net.role_vocabulary)

    print("Reverse role vocabulary", reverse_role_vocabulary)

    raw_words = dict(
        (reverse_role_vocabulary[r], reverse_vocabulary[net.missing_word_id])
        for r in net.role_vocabulary.values())

    if input_roles == 'all_available_args':
        possible_roles.remove(predict_role)
        input_roles = possible_roles.intersection(set(df.columns))

    all_roles = input_roles
    all_roles.add(predict_role)

    df = df.apply(
        lambda x: process_row(predict_role=predict_role,
                              role_fillers={i: x[i]
                                            for i in all_roles},
                              model=net,
                              raw_word_list=raw_words,
                              function=function,
                              n=n,
                              debug=debug),
        axis=1)

    return df
def eval_greenberg(model_name,
                   experiment_name,
                   evaluation,
                   model=None,
                   print_result=True):
    MODEL_NAME = experiment_name

    if model:
        net = model
    else:
        description = model_builder.load_description(MODEL_PATH, MODEL_NAME)
        net = model_builder.build_model(model_name, description)
        net.load(MODEL_PATH, MODEL_NAME, description)

    bias = net.set_0_bias()

    if print_result:
        print(net.role_vocabulary)  # Updated to python3 syntax (team1-change)

    eval_data_file = os.path.join(RV_EVAL_PATH, evaluation + '.txt')

    result_file = os.path.join(MODEL_PATH,
                               MODEL_NAME + '_' + evaluation + '.txt')

    probs = []
    baseline = []
    oov_count = 0

    tr = "A1"
    r_i = net.role_vocabulary["V"]
    tr_i = net.role_vocabulary["A1"]
    y_r_i = numpy.asarray([tr_i], dtype=numpy.int64)

    if print_result:
        print(eval_data_file)  # Updated to python3 syntax (team1-change)
        print("=" * 60)  # Updated to python3 syntax (team1-change)

    with open(eval_data_file, 'r') as f, \
         open(result_file, 'w') as f_out:
        for line in f:
            line = line.strip().lower()
            w, tw = line.split()[:2]  # input word, target word, other stuff
            w = w[:-2].strip()
            tw = tw[:-2].strip()

            w = wnl.lemmatize(w, wn.VERB)
            tw = wnl.lemmatize(tw, wn.NOUN)

            # a hack to fix some words
            # tw = word_fix.get(tw, tw)

            w_i = net.word_vocabulary.get(w, net.unk_word_id)
            tw_i = net.word_vocabulary.get(tw, net.unk_word_id)

            if tw_i == net.unk_word_id:
                oov_count += 1
                print(w, tr, tw)  # Updated to python3 syntax (team1-change)
                f_out.write(line + "\tnan\n")
                continue

            b = float(line.split()[-1])

            sample = dict(
                (r, net.missing_word_id)
                for r in (net.role_vocabulary.values() + [net.unk_role_id]))
            sample[r_i] = w_i
            sample.pop(tr_i, None)

            x_w_i = numpy.asarray([sample.values()], dtype=numpy.int64)
            x_r_i = numpy.asarray([sample.keys()], dtype=numpy.int64)
            y_w_i = numpy.asarray([tw_i], dtype=numpy.int64)

            p = net.p_words(x_w_i, x_r_i, y_w_i, y_r_i)
            # pr = net.p_roles(x_w_i, x_r_i, y_w_i, y_r_i)

            if tw_i == net.unk_word_id:
                print('OOV: %s' % w, b, p)
            baseline.append(b)
            probs.append(p)

            f_out.write(line + "\t%s\n" % p)

    rho, p_value = spearmanr(baseline, probs)
    if print_result:
        print(
            f"Spearman correlation of {evaluation}: {rho}; 2-tailed p-value: {p_value}"
        )  # Updated to python3 syntax and f-string (team1-change)
        print(f"Num ratings: {len(probs)} ({oov_count} out of vocabulary)"
              )  # Updated to python3 syntax and f-string (team1-change)

    net.set_bias(bias)

    return rho, p_value, oov_count, probs, baseline