def evaluate_baselines(resource_manager, evaluations, experiment):
    def extract_data(lexicon, annotations):
        lemma_poses = []
        frame_ids = []

        for annotation in annotations:
            lemma_poses.append(annotation.lemma_pos)
            frame_ids.append(lexicon.get_id(annotation.frame_name))

        return lemma_poses, frame_ids

    corpus_train = experiment.corpus_train
    corpus_val = experiment.corpus_validation
    corpus_test = experiment.corpus_test
    lexicon_name = experiment.lexicon_name

    train_annotations_file = resource_manager.get_frame_annotations_file(
        corpus_train)
    val_annotations_file = resource_manager.get_frame_annotations_file(
        corpus_val)
    test_annotations_file = resource_manager.get_frame_annotations_file(
        corpus_test)

    frame_to_id_mapping = build_frame_to_id_mapping(train_annotations_file,
                                                    val_annotations_file,
                                                    test_annotations_file)
    lexicon = load_lexicon(resource_manager, lexicon_name, frame_to_id_mapping)

    train_annotations = load_annotations(train_annotations_file)
    test_annotations = load_annotations(test_annotations_file)

    lemma_poses_train, frame_ids_train = extract_data(lexicon,
                                                      train_annotations)
    lemma_poses_test, frame_ids_test = extract_data(lexicon, test_annotations)

    baselines = [
        DataMajorityBaseline(lexicon),
        DataMajorityLexiconBaseline(lexicon),
        LexiconBaseline(lexicon)
    ]

    for baseline in baselines:
        start = timer()
        baseline.fit(lemma_poses_train, frame_ids_train)
        end = timer()
        predictions = baseline.predict(lemma_poses_test)
        evaluations.add_evaluation(
            type(baseline).__name__, '', corpus_train, corpus_test, lexicon,
            '', 'baseline', predictions, frame_ids_test, lemma_poses_test,
            baseline, end - start)
def evaluate_model(resource_manager, evaluation_manager, model_name, model_type, corpus_name, lexicon_name, embeddings):
    model, frame_to_id_mapping, features, metadata = load_trained_model(model_name, model_type)
    assert metadata.embedding_name == embeddings.vsm_name, 'Cannot use two different embeddings for train and eval'    

    lexicon = load_lexicon(resource_manager, lexicon_name, frame_to_id_mapping)


    dataset, X, Y = load_dataset(resource_manager, model_type, corpus_name, embeddings, lexicon, features)
        
    predictions = model.predict(X)
    predictions_with_lexicon = predict_with_lexicon(model, X, Y, dataset.lemma_pos, lexicon)

    evaluation = evaluation_manager.add_evaluation(model_name, features, metadata.corpus_train, corpus_name, lexicon, embeddings.vsm_name, True, Y, predictions, dataset.lemma_pos, None, 0)
    evaluation_with_lexicon = evaluation_manager.add_evaluation(model_name, features, metadata.corpus_train, corpus_name, lexicon, embeddings.vsm_name, False, Y, predictions_with_lexicon, dataset.lemma_pos, None, 0)            

    return evaluation, evaluation_with_lexicon    
def evaluate_mlp(resource_manager,
                 evaluations,
                 experiment,
                 mlp_config,
                 features,
                 runs=1,
                 name=None):
    logging.info('Running experiment: [%s]', experiment)
    corpus_train = experiment.corpus_train
    corpus_val = experiment.corpus_validation
    corpus_test = experiment.corpus_test
    lexicon_name = experiment.lexicon_name

    if isinstance(experiment.embeddings,
                  dict):  # embeddings are given as a dict
        dict_embeddings = experiment.embeddings
        embeddings_vsm_name_concatenatedString = ""
        if "embeddings" in dict_embeddings.keys():
            print(
                "Info: Token embeddings obtained via dict_embeddings['embeddings']"
            )
            embeddings_vsm_name_concatenatedString += dict_embeddings[
                "embeddings"].vsm_name
        if "synset_embeddings" in dict_embeddings.keys():
            print(
                "Info: Synset embeddings obtained via dict_embeddings['synset_embeddings']"
            )
            embeddings_vsm_name_concatenatedString += dict_embeddings[
                "synset_embeddings"].vsm_name
        if "imagined_embeddings" in dict_embeddings.keys():
            print(
                "Info: Imagined embeddings obtained via dict_embeddings['imagined_embeddings']"
            )
            embeddings_vsm_name_concatenatedString += dict_embeddings[
                "imagined_embeddings"].vsm_name
        embeddings_vsm_name = embeddings_vsm_name_concatenatedString
    else:  # embeddings are given directly as one textual embedding instance
        embeddings = experiment.embeddings
        embeddings_vsm_name = embeddings.vsm_name
        # make a dict out of it:
        dict_embeddings = {}
        dict_embeddings["embeddings"] = embeddings

    train_annotations_file = resource_manager.get_frame_annotations_file(
        corpus_train)
    val_annotations_file = resource_manager.get_frame_annotations_file(
        corpus_val)
    test_annotations_file = resource_manager.get_frame_annotations_file(
        corpus_test)

    frame_to_id_mapping = build_frame_to_id_mapping(train_annotations_file,
                                                    val_annotations_file,
                                                    test_annotations_file)
    lexicon = load_lexicon(resource_manager, lexicon_name, frame_to_id_mapping)

    dataset_train, x_train, y_train = load_dataset_mlp(resource_manager,
                                                       corpus_train,
                                                       dict_embeddings,
                                                       lexicon, features)
    dataset_val, x_val, y_val = load_dataset_mlp(resource_manager, corpus_val,
                                                 dict_embeddings, lexicon,
                                                 features)
    dataset_test, x_test, y_test = load_dataset_mlp(resource_manager,
                                                    corpus_test,
                                                    dict_embeddings, lexicon,
                                                    features)

    mlp_name = mlp_layers_to_name(mlp_config)
    logging.info('Evaluating [%s] with features [%s] and experiment [%s]',
                 mlp_name, features, experiment)
    for run in range(runs):
        logging.info('Run %d/%d', run + 1, runs)

        # Build model
        dict_dims = {}
        if "embeddings" in dict_embeddings.keys():
            dict_dims["embeddings_dim"] = dataset_train.dim
        if "synset_embeddings" in dict_embeddings.keys():
            dict_dims["synset_embeddings_dim"] = dataset_train.synset_dim
            embeddings_dims = dict_dims
        if "imagined_embeddings" in dict_embeddings.keys():
            dict_dims["imagined_embeddings_dim"] = dataset_train.imagined_dim
            embeddings_dims = dict_dims
        if "synset_embeddings" not in dict_embeddings.keys(
        ) and "imagined_embeddings" not in dict_embeddings.keys():  # unimodal
            embeddings_dims = dict_dims["embeddings_dim"]
        model = build_mlp_from_config(mlp_config, embeddings_dims,
                                      len(frame_to_id_mapping), features)

        start = timer()
        predictions, predictions_with_lexicon, baseline = evaluate_on_train_test_split(
            model, lexicon, dataset_train, x_train, y_train, x_val, y_val,
            dataset_test, x_test, y_test)
        end = timer()

        evaluations.add_evaluation(mlp_name, features, corpus_train,
                                   corpus_test, lexicon, embeddings_vsm_name,
                                   True, y_test, predictions,
                                   dataset_test.lemma_pos, baseline,
                                   end - start)
        evaluations.add_evaluation(mlp_name, features, corpus_train,
                                   corpus_test, lexicon, embeddings_vsm_name,
                                   False, y_test, predictions_with_lexicon,
                                   dataset_test.lemma_pos, baseline,
                                   end - start)

        if name:
            save_model(resource_manager, model, name, run, experiment, lexicon,
                       features)
def build_confusion_matrix(model_name, model_type, corpus_test, lexicon_name, embeddings, file_name):
    def save_confusion_matrix(cnf_matrix, lexicon, file_name, list_labels):
        rows, columns = cnf_matrix.shape
        assert rows == columns
#         n = lexicon.get_number_of_labels()
        n = len(list_labels)
        
        data = [['' for i in range(n+1)] for j in range(n+1)] # i = 0, ..., 895

        for idx in range(n): # idx = 0, ..., 894
#             label = lexicon.get_frame(idx)
            label = lexicon.get_frame(list_labels[idx])            
            data[0][idx+1] = label
            data[idx+1][0] = label

        for row in range(rows): # row = 0, ..., 531 / 515
            for col in range(columns):
                e = cnf_matrix[row][col]
                if e != 0:
                    data[row+1][col+1] = str(e)

        # Write data
        with open(resource_manager.get_statistics(file_name), 'w') as f:
            for idx in range(n+1):
                entry = '\t'.join(data[idx])
                f.write(entry)
                f.write('\n')

    model, frame_to_id_mapping, features, metadata = load_trained_model(model_name, model_type)
    
    if isinstance(embeddings, dict): # embeddings are given as a dict
        dict_embeddings = embeddings
        embeddings_vsm_name_concatenatedString = ""
        if "embeddings" in dict_embeddings.keys():
            embeddings_vsm_name_concatenatedString += dict_embeddings["embeddings"].vsm_name
        if "synset_embeddings" in dict_embeddings.keys():
            embeddings_vsm_name_concatenatedString += dict_embeddings["synset_embeddings"].vsm_name
        if "imagined_embeddings" in dict_embeddings.keys():
            embeddings_vsm_name_concatenatedString += dict_embeddings["imagined_embeddings"].vsm_name
        embeddings_vsm_name = embeddings_vsm_name_concatenatedString
    else: # embeddings are given directly as one textual embedding instance
        # make a dict out of it:
        dict_embeddings = {}
        dict_embeddings["embeddings"] = embeddings    
        embeddings = embeddings
        embeddings_vsm_name = embeddings.vsm_name
    
    assert metadata.embedding_name == embeddings_vsm_name, 'Cannot use two different embeddings for train and eval'   

    corpus_train = metadata.corpus_train

    lexicon = load_lexicon(resource_manager, lexicon_name, frame_to_id_mapping)  
    
    dataset_test, x_test, y_test = load_dataset(resource_manager, model_type, corpus_test, embeddings, lexicon, features)

    y_pred = model.predict(x_test)
    y_pred_lex = predict_with_lexicon(model, x_test, y_test, dataset_test.lemma_pos, lexicon)    

    labels = [lexicon.get_frame(i) for i in range(lexicon.get_number_of_labels())]

    y_true = remove_onehot(y_test)    
    y_pred = remove_onehot(y_pred)
    y_pred_lex = remove_onehot(y_pred_lex)
    
#     y_true = y_true[0:3]    
#     y_pred = y_pred[0:3]
#     y_pred_lex = y_pred_lex[0:3]
    
    j = 0
    labels_pred = []
    labels_pred_lex = []
    for y_true_i, y_pred_i, y_pred_lex_i in zip(y_true, y_pred, y_pred_lex):
#         if lexicon.get_frame(y_true_i) == "Performers_and_roles" or lexicon.get_frame(y_true_i) == "Competition":
#         if lexicon.get_frame(y_true_i) == "Ride_vehicle":
        if lexicon.get_frame(y_true_i) == "Statement":       
            j += 1
            print(j, lexicon.get_frame(y_true_i), lexicon.get_frame(y_pred_i), lexicon.get_frame(y_pred_lex_i))
        labels_pred.append(y_true_i)
        labels_pred.append(y_pred_i)
        labels_pred_lex.append(y_true_i)
        labels_pred_lex.append(y_pred_lex_i)
    list_labels_pred = list(set(labels_pred))
    labels_pred = np.asarray(list_labels_pred)
    list_labels_pred_lex = list(set(labels_pred_lex))
    labels_pred_lex = np.asarray(list_labels_pred_lex)
    
    confusion_matrix_nolex = confusion_matrix(y_true, y_pred, labels=labels_pred)
    confusion_matrix_lex = confusion_matrix(y_true, y_pred_lex, labels=labels_pred_lex)

    save_confusion_matrix(confusion_matrix_nolex, lexicon, '{0}-confusion-nolex.csv'.format(file_name), list_labels_pred)
    save_confusion_matrix(confusion_matrix_lex, lexicon, '{0}-confusion-lex.csv'.format(file_name), list_labels_pred_lex)
    def analyse_errors(model_name, model_type, corpus_test, lexicon_name, embeddings, corpus_traindev = None):       
        Entry = namedtuple('Entry', ['name', 'correct', 'unseen_at_training', 'seen_only_with_different_label', 'normal_error',
            'correct_lex', 'unseen_at_training_lex', 'seen_only_with_different_label_lex', 'normal_error_lex', 'wrong_by_lexicon'])

        model, frame_to_id_mapping, features, metadata = load_trained_model(model_name, model_type)
        
        lexicon = load_lexicon(resource_manager, lexicon_name, frame_to_id_mapping)  

        if isinstance(embeddings, dict): # embeddings are given as a dict
            dict_embeddings = embeddings
            embeddings_vsm_name_concatenatedString = ""
            if "embeddings" in dict_embeddings.keys():
                embeddings_vsm_name_concatenatedString += dict_embeddings["embeddings"].vsm_name
            if "synset_embeddings" in dict_embeddings.keys():
                embeddings_vsm_name_concatenatedString += dict_embeddings["synset_embeddings"].vsm_name
            if "imagined_embeddings" in dict_embeddings.keys():
                embeddings_vsm_name_concatenatedString += dict_embeddings["imagined_embeddings"].vsm_name
            embeddings_vsm_name = embeddings_vsm_name_concatenatedString
            
            dataset_traindev, x_traindev, y_traindev = load_dataset(resource_manager, model_type, corpus_traindev, embeddings, lexicon, features)
            dataset_test, x_test, y_test = load_dataset(resource_manager, model_type, corpus_test, embeddings, lexicon, features)
            
            train_annotations = defaultdict(set)
            test_annotations = defaultdict(set)
            test_labels = []
            
            y_traindev = remove_onehot(y_traindev)
            for y_traindev_i, lemma_pos in zip(y_traindev, dataset_traindev.lemma_pos):
                frame_name = lexicon.get_frame(y_traindev_i)
                train_annotations[frame_name].add(lemma_pos)
                
            y_test = remove_onehot(y_test)
            for y_test_i, lemma_pos in zip(y_test, dataset_test.lemma_pos):
                frame_name = lexicon.get_frame(y_test_i)
                test_annotations[frame_name].add(lemma_pos)
                test_labels.append(lemma_pos)
        
        else: # embeddings are given directly as one textual embedding instance
            # make a dict out of it:
            dict_embeddings = {}
            dict_embeddings["embeddings"] = embeddings    
            embeddings = embeddings
            embeddings_vsm_name = embeddings.vsm_name
            
            dataset_test, x_test, y_test = load_dataset(resource_manager, model_type, corpus_test, embeddings, lexicon, features)
            
            corpus_train = metadata.corpus_train
            train_annotations_file = resource_manager.get_frame_annotations_file(corpus_train)
            test_annotations_file = resource_manager.get_frame_annotations_file(corpus_test)
    
            train_annotations = defaultdict(set)
            test_annotations = defaultdict(set)
            test_labels = []
    
            for annotation in load_annotations(train_annotations_file):
                train_annotations[annotation.frame_name].add(annotation.lemma_pos)
            
            for annotation in load_annotations(test_annotations_file):
                test_annotations[annotation.frame_name].add(annotation.lemma_pos)
                test_labels.append(annotation.lemma_pos)
        
        assert metadata.embedding_name == embeddings_vsm_name, 'Cannot use two different embeddings for train and eval'    # embeddings.vsm_name, 'Cannot use two different embeddings for train and eval'   

        
        y_pred = model.predict(x_test)
        y_pred_lex = predict_with_lexicon(model, x_test, y_test, dataset_test.lemma_pos, lexicon)

        y_pred = remove_onehot(y_pred)
        y_pred_lex = remove_onehot(y_pred_lex)
        y_true = remove_onehot(y_test)

        n = len(test_labels)
        unseen_at_training = 0
        seen_only_with_different_label = 0
        normal_error = 0
        correct = 0

        unseen_at_training_lex = 0
        seen_only_with_different_label_lex = 0
        normal_error_lex = 0
        correct_lex = 0        
        wrong_by_lexicon = 0

        for i in range(n): # range(len(y_pred)): # 
            prediction = y_pred[i]
            prediction_lex = y_pred_lex[i]
            goldlabel = y_true[i]
#             lemma_pos = test_labels[i]
            lemma_pos = dataset_test.lemma_pos[i]

            predicted_frame = lexicon.get_frame(prediction)
            gold_frame = lexicon.get_frame(goldlabel)

            # Without lexicon 

            if prediction == goldlabel:
                correct += 1
            # No data in train but in test
            elif gold_frame not in train_annotations:
                assert gold_frame in test_annotations  
                unseen_at_training += 1
            # Different label in train than in test                
            elif gold_frame in train_annotations and lemma_pos not in train_annotations[gold_frame]:
                assert gold_frame in test_annotations                  
                seen_only_with_different_label += 1
            else:
                normal_error += 1

            # With lexicon
            if prediction_lex == goldlabel:            
                correct_lex += 1
            # Lexicon contains the lemma pos but not the right label
            elif goldlabel not in lexicon.get_available_frame_ids(lemma_pos):
                wrong_by_lexicon += 1
            # No data in train but in test
            elif gold_frame not in train_annotations:
                assert gold_frame in test_annotations  
                unseen_at_training_lex += 1
            # Different label in train than in test                
            elif gold_frame in train_annotations and lemma_pos not in train_annotations[gold_frame]:
                assert gold_frame in test_annotations                  
                seen_only_with_different_label_lex += 1
            else:
                normal_error_lex += 1
            
        logging.info('Correct: %.2f, Unseen: %.2f, Seen with different label: %.2f, Normal error: %.2f', correct * 100 / n, unseen_at_training * 100 / n, seen_only_with_different_label * 100 / n, normal_error * 100 / n)
        logging.info('Correct: %.2f, Wrong by lexicon: %.2f, Unseen: %.2f, Seen with different label: %.2f, Normal error: %.2f', correct_lex * 100 / n, wrong_by_lexicon * 100 / n, unseen_at_training_lex * 100 / n, seen_only_with_different_label_lex * 100 / n, normal_error_lex* 100 / n)

        return Entry(model_name, correct  * 100 / n, unseen_at_training  * 100 / n, seen_only_with_different_label  * 100 / n, normal_error * 100 / n,
            correct_lex  * 100 / n, unseen_at_training_lex  * 100 / n, seen_only_with_different_label_lex  * 100 / n, normal_error_lex * 100 / n, wrong_by_lexicon  * 100 / n)