def train_ensemble(model, acoustic_iterator, linguistic_iterator, optimizer, criterion, reg_ratio):
    model.train()

    epoch_loss = 0
    conf_mat = ConfusionMatrix(np.zeros((NUM_CLASSES, NUM_CLASSES)))

    assert len(acoustic_iterator) == len(linguistic_iterator)

    for acoustic_tuple, linguistic_tuple in zip(acoustic_iterator(), linguistic_iterator()):
        acoustic_batch = acoustic_tuple[0]
        labels = acoustic_tuple[1]
        linguistic_batch = linguistic_tuple[0]
        optimizer.zero_grad()

        predictions = model(acoustic_batch, linguistic_batch).squeeze(1)

        loss = criterion(predictions, labels)

        reg_loss = 0
        for param in model.parameters():
            reg_loss += param.norm(2)

        total_loss = loss + reg_ratio*reg_loss
        total_loss.backward()

        optimizer.step()

        epoch_loss += loss.item()

        conf_mat += ConfusionMatrix.from_predictions(predictions, labels)

    average_loss = epoch_loss / len(acoustic_iterator)

    return average_loss, conf_mat
def train(model, iterator, optimizer, criterion, reg_ratio):
    model.train()

    epoch_loss = 0
    conf_mat = ConfusionMatrix(np.zeros((NUM_CLASSES, NUM_CLASSES)))

    for batch, labels in iterator():
        optimizer.zero_grad()

        predictions = model(batch).squeeze(1)

        loss = criterion(predictions, labels)

        reg_loss = 0
        for param in model.parameters():
            reg_loss += param.norm(2)

        total_loss = loss + reg_ratio*reg_loss
        total_loss.backward()

        optimizer.step()

        epoch_loss += loss.item()

        conf_mat += ConfusionMatrix.from_predictions(predictions, labels)

    average_loss = epoch_loss / len(iterator)

    return average_loss, conf_mat
示例#3
0
def evaluate_sense(gold_list, predicted_list):
	"""Evaluate sense classifier

	The label 'no' is for the relations that are missed by the system
	because the arguments don't match any of the gold relations.
	"""
	sense_alphabet = Alphabet()
	for relation in gold_list:
		sense_alphabet.add(relation['Sense'][0])
	sense_alphabet.add('no')
	sense_cm = ConfusionMatrix(sense_alphabet)
	gold_to_predicted_map, predicted_to_gold_map = \
			_link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

	for i, gold_relation in enumerate(gold_list):
		if i in gold_to_predicted_map:
			predicted_sense = gold_to_predicted_map[i]['Sense'][0]
			if predicted_sense in gold_relation['Sense']:
				sense_cm.add(predicted_sense, predicted_sense)
			else:
				if not sense_cm.alphabet.has_label(predicted_sense):
					predicted_sense = 'no'
				sense_cm.add(predicted_sense, gold_relation['Sense'][0])
		else:
			sense_cm.add('no', gold_relation['Sense'][0])

	for i, predicted_relation in enumerate(predicted_list):
		if i not in predicted_to_gold_map:
			predicted_sense = predicted_relation['Sense'][0]
			if not sense_cm.alphabet.has_label(predicted_sense):
				predicted_sense = 'no'
			sense_cm.add(predicted_sense, 'no')
	return sense_cm
示例#4
0
def bootstrap_diff(df, ccp_estimator, rounds, sample_size):
    bootstrap_results = []
    for i in range(rounds):
        # Get first model parameters
        s1 = df.sample(sample_size, replace=True)
        bug_g = s1.groupby([classifier, concept],
                           as_index=False).agg({count: 'count'})
        bug_cm = ConfusionMatrix(g_df=bug_g,
                                 classifier=classifier,
                                 concept=concept,
                                 count=count)

        positive_rate = bug_cm.positive_rate()
        hit_rate = bug_cm.hit_rate()
        ccp = ccp_estimator.estimate_positives(hit_rate)
        ccp_diff = ccp - positive_rate

        # Find difference in given points
        bootstrap_results.append([positive_rate, hit_rate, ccp, ccp_diff])

        if (i % 100 == 0):
            print("finished " + str(i), datetime.datetime.now())

    results_df = pd.DataFrame(
        bootstrap_results,
        columns=['positive_rate', 'hit_rate', 'ccp', 'ccp_diff'])
    return results_df
    def fit_predict(self,
                    Gtr,
                    Ytr,
                    Gt,
                    Yt,
                    grid_search,
                    acc_param="F1Mean",
                    RS=0,
                    VERBOSE=False):
        """Learn the model on training dataset and predict on the testing dataset.
        
        Input:
            - Gtr (array): training subset
            - Ytr (array): true labels of training subset
            - Gt (array): testing subset
            - Yt (array): true labels of testing subset
            - grid_search (dict): grid search for the CV
            - acc (str): accuracy parameter for the cross-validation (default "F1Mean", otherwise it will be the OA)
            - RS (int) : random seed used for the stratified k-fold CV
            - VERBOSE (bool): verbose (default: False)
        
        Return:
            - confMatrix (object ConfusionMatrix): confusion matrix issued from the classification
            - yp (array): vector of predicted labels of the testing subset
            - grid.best_params_ (dict): combination of parameters that gave the best results during the CV
        
        TODO: implement own scoring parameter
        """

        if acc_param == "F1Mean":
            score = 'f1_macro'
        else:
            score = 'accuracy'

        ## Initialization of the stratifield K-CV
        cv = StratifiedKFold(Ytr, n_folds=self.n_folds, random_state=RS)

        #Implementation of a fit and a predict methods with parameters from grid search
        grid = GridSearchCV(self.pipe,
                            param_grid=grid_search,
                            scoring=score,
                            verbose=VERBOSE,
                            cv=cv,
                            n_jobs=3)
        grid.fit(Gtr, Ytr)

        model = grid.best_estimator_
        if VERBOSE:
            print grid.best_score_

        #Learn model
        model.fit(Gtr, Ytr)  #could use refit in version 0.19 of sklearn
        #Predict
        yp = model.predict(Gt)

        #Compute confusion matrix
        confMatrix = ConfusionMatrix()
        confMatrix.compute_confusion_matrix(yp, Yt)

        return confMatrix, yp, grid.best_params_
示例#6
0
def test_storing_loading():
    """Test store_preds and load_preds"""

    # Create confusion matrices for random classifiers
    yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    ypred1 = np.random.randint(3, size=12)

    cm1 = ConfusionMatrix(yactual, ypred1, "cls_1")

    yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    ypred2 = np.random.randint(3, size=12)

    cm2 = ConfusionMatrix(yactual, ypred2, "cls_2")

    yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    ypred3 = np.random.randint(3, size=12)

    cm3 = ConfusionMatrix(yactual, ypred3, "cls_3")

    yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    ypred4 = np.random.randint(3, size=12)

    cm4 = ConfusionMatrix(yactual, ypred4, "cls_4")

    preds = [ypred1, ypred2, ypred3, ypred4]

    print("Preds before saving", preds)

    store_preds(preds, yactual, 1)
    new_preds, actual = load_preds(1)

    print("Preds after saving", new_preds, "Actual after saving", actual)
def evaluate_performance(df, classification_column, concept_column):
    g = df.groupby([classification_column, concept_column],
                   as_index=False).agg({'commit': 'count'})
    cm = ConfusionMatrix(g_df=g,
                         classifier=classification_column,
                         concept=concept_column,
                         count='commit')

    return cm.summarize()
示例#8
0
def applyClassifiers(scst_pkl_path: str,
                     keras_pkl_path: str,
                     data_path: Optional[str] = None,
                     truth_path: Optional[str] = None,
                     start_second: Optional[float] = None,
                     end_second: Optional[float] = None) -> None:
    '''
    Apply both a ``SCSTClassifier`` and a ``TransientKerasClassifier`` to the data sequence
    specified by the inputs, and plot the results.

    :param scst_pkl_path: Full path to a pickle file containing a saved ``SCSTClassifier`` object.
    :param keras_pkl_path: Full path to a pickle file containing a saved
        ``TransientKerasClassifier`` object.
    :param data_path, truth_path, start_second, end_second: See ``data_utils.getPlotDataTuple``.
    '''

    # Define SCST classification parameters
    SIG_THRESH = 100
    NOISE_THRESH = 100

    # Load data and classifiers
    test_matrix, truth_array, start_second, title = getPlotDataTuple(
        truth_path, data_path, start_second, end_second)
    scst_classifier = SCSTClassifier.load(scst_pkl_path)
    keras_classifier = TransientKerasClassifier.load(keras_pkl_path)

    # Apply classifiers
    num_obs = test_matrix.shape[1]
    for test_idx in range(num_obs):
        scst_classifier.classify(test_matrix[:, test_idx], SIG_THRESH,
                                 NOISE_THRESH)
        keras_classifier.classify(test_matrix[:, test_idx])
        _printProgress(test_idx / float(num_obs), title)

    _printProgress(1.0, title)
    sys.stdout.write("\n")

    # Display results
    id_array_list = [
        scst_classifier.class_labels, keras_classifier.class_labels
    ]
    id_tag_list = ["SCST IDs", "Keras IDs"]

    if truth_array is not None:
        for classifier, name in zip([scst_classifier, keras_classifier],
                                    ["SCST", "Keras"]):
            conf_matrix = ConfusionMatrix(classifier.class_labels, truth_array,
                                          True, name)
            conf_matrix.display()

        id_array_list.append(truth_array)
        id_tag_list.append("Truth IDs")

    plotSequence(test_matrix, start_second, title, id_array_list, id_tag_list)
    plt.show()
示例#9
0
def evaluate_sense(gold_list, predicted_list):
	"""Evaluate sense classifier

	The label 'no' is for the relations that are missed by the system
	because the arguments don't match any of the gold relations.
	"""
	sense_alphabet = Alphabet()
	for relation in gold_list:
		sense_alphabet.add(relation['Sense'][0])
	sense_alphabet.add('no')
	sense_cm = ConfusionMatrix(sense_alphabet)
	gold_to_predicted_map, predicted_to_gold_map = \
			_link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

	for i, gold_relation in enumerate(gold_list):
		if i in gold_to_predicted_map:
			predicted_sense = gold_to_predicted_map[i]['Sense'][0]
			if predicted_sense in gold_relation['Sense']:
				sense_cm.add(predicted_sense, predicted_sense)
			else:
				if not sense_cm.alphabet.has_label(predicted_sense):
					predicted_sense = 'no'
				sense_cm.add(predicted_sense, gold_relation['Sense'][0])
		else:
			sense_cm.add('no', gold_relation['Sense'][0])

	for i, predicted_relation in enumerate(predicted_list):
		if i not in predicted_to_gold_map:
			predicted_sense = predicted_relation['Sense'][0]
			if not sense_cm.alphabet.has_label(predicted_sense):
				predicted_sense = 'no'
			sense_cm.add(predicted_sense, 'no')
	return sense_cm
示例#10
0
def two_years_analysis(two_years_df
                       , first_metric
                       , second_metric
                       , key):
    print()
    print("Co-change"
          , first_metric
          , second_metric)
    g = two_years_df.groupby([first_metric, second_metric]
                             , as_index=False).agg({key : 'count'})

    print(g)

    cm = ConfusionMatrix(g_df=g
                             , classifier=first_metric
                             , concept=second_metric, count=key)

    print(cm.summarize())
    print()
    print("Samples", cm.samples())
    print("Both metrics increment match", cm.accuracy())
    print(second_metric
            , " improvement given "
            , first_metric
            , " improvement", cm.precision(), "lift", cm.precision_lift())
    print(first_metric
            , " improvement given "
            , second_metric
            , "improvement",  cm.recall(), "lift", ifnull(safe_divide(ifnull(cm.recall()),cm.hit_rate())) - 1)
    print()
示例#11
0
 def run(self):
     """
     This is for assignment 6, the data is from 20 news groups
     """
     model = BuildModel("../data/features")
     count_vectors = model.count_vectors()
     cm = ConfusionMatrix(labels=model.labels)
     mm = MultinomialMixture(20, count_vectors, n_iterations=4, verbose=True, smoothing=True, confusion_matrix=cm,
                             document_types=model.document_types)
     mm.learn_parameters()
     cm.print_matrix()
def perfective_performance(df):
    perfective_g = df.groupby(
        ['perfective_pred', 'Is_Perfective'], as_index=False).agg({'commit' : 'count'})
    perfective_cm = ConfusionMatrix(g_df=perfective_g
                                  , classifier='perfective_pred'
                                  , concept='Is_Perfective'
                                  , count='commit')
    print("perfective commit performance")
    print(perfective_cm.summarize())

    return perfective_cm
def test_summrize(classifier
                         , concept
                         , count
                         , g_df
                          , expected):
    cm = ConfusionMatrix(classifier
                         , concept
                         , count
                         , g_df)
    actual = cm.summarize()
    assert expected == actual
def test_independent_prob(classifier
                  , concept
                  , count
                  , g_df
                  , expected):
    cm = ConfusionMatrix(classifier
                         , concept
                         , count
                         , g_df)
    actual = cm.independent_prob()
    assert expected == actual
示例#15
0
def corrective_performance(df):
    bug_g = df.groupby(['corrective_pred', 'Is_Corrective'],
                       as_index=False).agg({'commit': 'count'})
    bug_cm = ConfusionMatrix(g_df=bug_g,
                             classifier='corrective_pred',
                             concept='Is_Corrective',
                             count='commit')
    print("corrective commit performance")
    print(bug_cm.summarize())

    return bug_cm
示例#16
0
def refactor_performance(df):
    refactor_g = df.groupby(['is_refactor_pred', 'Is_Refactor'],
                            as_index=False).agg({'commit': 'count'})
    refactor_cm = ConfusionMatrix(g_df=refactor_g,
                                  classifier='is_refactor_pred',
                                  concept='Is_Refactor',
                                  count='commit')
    print("refactor commit performance")
    print(refactor_cm.summarize())

    return refactor_cm
示例#17
0
def adaptive_performance(df):
    adaptive_g = df.groupby(['adaptive_pred', 'Is_Adaptive'],
                            as_index=False).agg({'commit': 'count'})
    adaptive_cm = ConfusionMatrix(g_df=adaptive_g,
                                  classifier='adaptive_pred',
                                  concept='Is_Adaptive',
                                  count='commit')
    print("adaptive commit performance")
    print(adaptive_cm.summarize())

    return adaptive_cm
示例#18
0
def features_confusion_matrix_analysis(two_years_df
                       , first_metric
                       , second_metric
                       , keys):
    g = two_years_df.groupby([first_metric, second_metric]
                             , as_index=False).agg({keys[0] : 'count'})

    cm = ConfusionMatrix(g_df=g
                             , classifier=first_metric
                             , concept=second_metric, count=keys[0])

    return cm.summarize()
示例#19
0
def leave_one_out(examples, k):
    conf_matr = ConfusionMatrix()
    for ex in examples:
        # disable only this example
        ex.active = False
        # run the k-Nearest-Neighbor algorithm
        rank_list = knn.knn(k, examples, ex)
        # check the voting for correctness
        outcome = knn.voting(rank_list)
        conf_matr.inc_according_to(outcome, ex.outcome)
        ex.active = True
    # return the computed confusion matrix
    return conf_matr
示例#20
0
def leave_one_out(examples,k):
    conf_matr = ConfusionMatrix()
    for ex in examples:
        # disable only this example
        ex.active = False
        # run the k-Nearest-Neighbor algorithm
        rank_list = knn.knn(k,examples,ex)
        # check the voting for correctness
        outcome = knn.voting(rank_list)
        conf_matr.inc_according_to(outcome,ex.outcome)
        ex.active = True
    # return the computed confusion matrix
    return conf_matr
示例#21
0
def compute_binary_eval_metric(gold_list, predicted_list, matching_fn):
    """Compute binary evaluation metric

    """
    binary_alphabet = Alphabet()
    binary_alphabet.add('yes')
    binary_alphabet.add('no')
    cm = ConfusionMatrix(binary_alphabet)
    matched_predicted = [False for x in predicted_list]
    for gold_span in gold_list:
        found_match = False
        for i, predicted_span in enumerate(predicted_list):
            if matching_fn(gold_span,
                           predicted_span) and not matched_predicted[i]:
                cm.add('yes', 'yes')
                matched_predicted[i] = True
                found_match = True
                break
        if not found_match:
            cm.add('no', 'yes')
    # Predicted span that does not match with any
    for matched in matched_predicted:
        if not matched:
            cm.add('yes', 'no')
    return cm
def eval_ensemble(ensemble_model, acoustic_model_iterator, linguistic_model_iterator, criterion):
    epoch_losses = []
    conf_mat = ConfusionMatrix(np.zeros((NUM_CLASSES, NUM_CLASSES)))

    with torch.no_grad():
        for ((acoustic_batch, labels), (linguistic_batch, _)) in zip(acoustic_model_iterator(), linguistic_model_iterator()):
            predictions = ensemble_model(acoustic_batch, linguistic_batch)
            predictions = torch.Tensor(predictions)
            loss = criterion(predictions.float(), labels)
            epoch_losses.append(loss.item())
            conf_mat += ConfusionMatrix.from_predictions(predictions, labels)

    average_loss = sum(epoch_losses) / len(acoustic_model_iterator)
    return average_loss, conf_mat
示例#23
0
def evaluate_sense(relation_pairs, valid_senses):
    sense_alphabet = Alphabet()
    #for g_relation, _ in relation_pairs:
    #if g_relation is not None:
    #sense = g_relation['Sense'][0]
    #if sense in valid_senses:
    #sense_alphabet.add(sense)
    for sense in valid_senses:
        sense_alphabet.add(sense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)
    sense_alphabet.growing = False

    sense_cm = ConfusionMatrix(sense_alphabet)
    for g_relation, p_relation in relation_pairs:
        assert g_relation is not None or p_relation is not None
        if g_relation is None:
            predicted_sense = p_relation['Sense'][0]
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
        elif p_relation is None:
            gold_sense = g_relation['Sense'][0]
            if gold_sense in valid_senses:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)
        else:
            predicted_sense = p_relation['Sense'][0]
            gold_sense = g_relation['Sense'][0]
            if gold_sense in valid_senses:
                sense_cm.add(predicted_sense, gold_sense)
    return sense_cm
def adaptive_by_negation_performance(df):

    concept = 'Is_Adaptive'
    classifier_name = 'adaptive_by_negation_pred'

    adaptive_g = df.groupby(
        [classifier_name, concept], as_index=False).agg({'commit' : 'count'})
    adaptive_cm = ConfusionMatrix(g_df=adaptive_g
                                  , classifier=classifier_name
                                  , concept=concept
                                  , count='commit')
    print("adaptive_by_negation commit performance")
    print(adaptive_cm.summarize())

    return adaptive_cm
    def predict(
        self, dataset: Dataset
    ) -> dict[str, Union[str, list[str], float, ConfusionMatrix]]:
        """predicts the class labels of the given test set, based on a previously fitted model.

        :param dataset: dataset consisting of
        :return: list of predicted values
        """

        prediction_params: dict[str, Union[str, list[str], float,
                                           ConfusionMatrix]] = {
                                               "branches": self.__branches
                                           }
        predicted_values: list[str] = []
        for example, label in dataset:
            example: dict[str, str]
            label: str

            # predict the example
            predicted_values.append(self.__label_example(example, self.__root))

        prediction_params["predictions"]: list[str] = predicted_values
        prediction_params["accuracy"] = accuracy(dataset.label_sample,
                                                 predicted_values)
        prediction_params[
            "confusion_matrix"]: ConfusionMatrix = ConfusionMatrix(
                dataset.label_space, dataset.label_sample, predicted_values)

        return prediction_params
示例#26
0
 def setUp(self):
     label_names = ['banana', 'apple', 'orange']
     labels = np.array(
         [
             [0, 1, 0],
             [1, 0, 0],
             [1, 0, 0],
             [0, 1, 0],
             [0, 0, 1],
             [0, 0, 1],
             [1, 0, 0],
             [0, 1, 0],
             [1, 0, 0],
             [0, 0, 1],
             [0, 0, 1]
         ]
     )
     predictions = np.array(
         [
             [0.9, 0.1, 0.0],
             [0.7, 0.1, 0.2],
             [0.4, 0.3, 0.3],
             [0.2, 0.6, 0.2],
             [0.5, 0.2, 0.3],
             [0.0, 0.0, 1.0],
             [0.2, 0.1, 0.7],
             [0.1, 0.8, 0.1],
             [0.3, 0.5, 0.2],
             [0.1, 0.0, 0.9],
             [0.0, 1.0, 0.0]
         ]
     )
     self.confusion_matrix = ConfusionMatrix(predictions, labels, class_names=label_names)
def evaluate(model, iterator, criterion):
    model.eval()

    epoch_loss = 0
    conf_mat = ConfusionMatrix(np.zeros((NUM_CLASSES, NUM_CLASSES)))

    with torch.no_grad():
        for batch, labels in iterator():
            predictions = model(batch)

            loss = criterion(predictions.float(), labels)
            epoch_loss += loss.item()
            conf_mat += ConfusionMatrix.from_predictions(predictions, labels)

    average_loss = epoch_loss / len(iterator)

    return average_loss, conf_mat
示例#28
0
文件: model.py 项目: 2php/lipnet-1
 def evaluate(self, test_set):
     x = test_set.x
     x = self.preprocess_x(x)
     y = test_set.y
     y = np_utils.to_categorical(y, test_set.num_classes)
     y_pred = self.model.predict_proba(x, verbose=0)
     cf = ConfusionMatrix(y_pred, y)
     return cf
示例#29
0
def make_confusion_matrix(results):
    true_positives = [r for r in results if r.tp]
    true_negatives = [r for r in results if r.tn]
    false_positives = [r for r in results if r.fp]
    false_negatives = [r for r in results if r.fn]

    return ConfusionMatrix(len(true_positives), len(false_positives),
                           len(true_negatives), len(false_negatives))
示例#30
0
def make_pr_graph(entropies, correct, graph_name, title, mpl_figure=None):
    """
    Plot entropy as a PR curve predicting whether examples are correct
    or incorrect.
    """
    if mpl_figure is None:
        mpl_figure = mpl.figure()

    axes = mpl_figure.gca()
    assert len(entropies) == len(correct), (len(entropies), len(correct))
    pairs = zip(entropies, correct)
    pairs.sort()

    max_entropy = float(pairs[-1][0])
    min_entropy = float(pairs[0][0])
    num_segments = 20.0
    segment_size = (max_entropy - min_entropy)/num_segments
    if segment_size == 0:
        segment_size = 0.01
    thresholds = na.arange(min_entropy, max_entropy + 1, segment_size)
    X_recall = []
    Y_precision = []

    for threshold in thresholds:
        predicted_correct = [(entropy, correct)
                             for (entropy, correct) in pairs
                             if entropy < threshold]
        predicted_incorrect = [(entropy, correct)
                             for (entropy, correct) in pairs
                               if entropy >= threshold]
        tp = len([(entropy, correct) for entropy, correct in predicted_incorrect
                 if not correct])
        fp = len([(entropy, correct) for entropy, correct in predicted_incorrect
                 if correct])

        tn = len([(entropy, correct) for entropy, correct in predicted_correct
                 if correct])
        fn = len([(entropy, correct) for entropy, correct in predicted_correct
                 if not correct])
        if tp == 0:
            continue
        cm = ConfusionMatrix(tp, fp, tn, fn)
        X_recall.append(cm.recall)
        Y_precision.append(cm.precision)

    global marker_i
    graph_name_paper = ENTROPY_METRIC_PAPER_NAMES[graph_name]
    axes.plot(X_recall, Y_precision, label=graph_name_paper,
              marker=markers[marker_i])
    marker_i = (marker_i + 1) % len(markers)
    axes.legend(loc='upper right')
    axes.set_xlabel("Recall")
    axes.set_ylabel("Precision")
    axes.set_ylim(0, 1.1)
    axes.set_xlim(0, 1.1)
    axes.set_title("Precision vs Recall")
    mpl_figure.savefig(title.replace(" ", "_") + ".eps")
    mpl.show()
示例#31
0
def compute_binary_eval_metric(gold_list, predicted_list, matching_fn):
    """Compute binary evaluation metric

    """
    binary_alphabet = Alphabet()
    binary_alphabet.add('yes')
    binary_alphabet.add('no')
    cm = ConfusionMatrix(binary_alphabet)
    matched_predicted = [False for x in predicted_list]
    for gold_span in gold_list:
        found_match = False
        for i, predicted_span in enumerate(predicted_list):
            if matching_fn(gold_span, predicted_span) and \
               not matched_predicted[i]:
                cm.add('yes', 'yes')
                matched_predicted[i] = True
                found_match = True
                break
        if not found_match:
            cm.add('no', 'yes')
    # Predicted span that does not match with any
    for matched in matched_predicted:
        if not matched:
            cm.add('yes', 'no')
    return cm
示例#32
0
def evaluate_sense(relation_pairs, valid_senses):
    sense_alphabet = Alphabet()
    for g_relation, _ in relation_pairs:
        if g_relation is not None:
            sense = g_relation["Sense"][0]
            if sense in valid_senses:
                sense_alphabet.add(sense)
    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)
    sense_alphabet.growing = False

    sense_cm = ConfusionMatrix(sense_alphabet)
    for g_relation, p_relation in relation_pairs:
        assert g_relation is not None or p_relation is not None
        if g_relation is None:
            predicted_sense = p_relation["Sense"][0]
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
        elif p_relation is None:
            gold_sense = g_relation["Sense"][0]
            if gold_sense in valid_senses:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)
        else:
            predicted_sense = p_relation["Sense"][0]
            gold_sense = g_relation["Sense"][0]
            if gold_sense in valid_senses:
                sense_cm.add(predicted_sense, gold_sense)
    return sense_cm
    def train_and_test(self, train_set, test_set):
        self.classifier = self.classifier_class.train(train_set)

        predicted_polarities = [
            self.classify(document) for (document, polarity) in test_set
        ]
        actual_polarities = [polarity for (document, polarity) in test_set]

        return ConfusionMatrix(predicted_polarities, actual_polarities)
示例#34
0
def evaluate_objects(model, corpus_fname, state_type):
    corpus = annotationIo.load(corpus_fname)
    state_cls = state_type_from_name(state_type)
    from g3.inference import nodeSearch
    taskPlanner = nodeSearch.BeamSearch(model)
    predictions = []
    done = False
    phrases = set()
    for i, annotation in enumerate(corpus):
        start_state = state_cls.from_context(annotation.context)

        for esdc in annotation.esdcs:
            #if esdc.text != "the pallet of boxes":
            #    continue
            #if esdc.text in phrases:
            #    continue
            isCorrect = annotation.isGroundingCorrect(esdc)
            if isCorrect != None:
                ggg = ggg_from_esdc(esdc)
                groundings = annotation.getGroundings(esdc)
                assert len(groundings) == 1
                grounding = groundings[0]
                #if "generator" not in grounding.tags:
                #    continue
                prob = evaluate_ggg(ggg, grounding, start_state, taskPlanner)
                if prob > 0.7:
                    predicted_class = True
                else:
                    predicted_class = False
                predictions.append((predicted_class, isCorrect))
                #print "Query: Is object", " ".join(grounding.tags),
                #print "'" + esdc.text + "'?"
                #print "System: ",
                #if predicted_class:
                #    print "Yes."
                #else:
                #    print "No."
                #done = True
                phrases.add(esdc.text)
            if done:
                break
        if done:
            break

    tp = len([(p, l) for p, l in predictions if p and p == l])
    fp = len([(p, l) for p, l in predictions if p and p != l])
    tn = len([(p, l) for p, l in predictions if not p and p == l])
    fn = len([(p, l) for p, l in predictions if not p and p != l])
    cm = ConfusionMatrix(tp, fp, tn, fn)
    #cm.print_all()

    #if len(phrases) > 20:
    #    phrases = random.sample(phrases, 20)
    #for phrase in sorted(phrases):
    #    print phrase
    return cm
示例#35
0
 def __init__(self, raw_auc):
     if raw_auc is None:
         raise ValueError("Missing data for `raw_auc`.")
     self.AUC = raw_auc["AUC"]
     self.Gini = raw_auc["Gini"]
     self.confusion_matrices = ConfusionMatrix.read_cms(
         raw_auc["confusion_matrices"])
     # Two Dim Table
     self.thresholdsAndMetricScores = raw_auc["thresholdsAndMetricScores"]
     self.maxCriteriaAndMetricScores = raw_auc["maxCriteriaAndMetricScores"]
示例#36
0
    def tell_a_posteriori_feasibility(self, apos_feasibility):
        self._confusion_matrix = ConfusionMatrix(apos_feasibility)
        self._sp = self._confusion_matrix.success_probability()
        self._ppv = self._confusion_matrix.ppv()
        self._npv = self._confusion_matrix.npv()
        self._pending_apos_solutions = []

        # log all bindings
        self.logger.log()
        self._count_constraint_infeasibles = 0                
        self._count_repaired = 0
示例#37
0
def evaluate_sense(gold_list, predicted_list):
    print "In function: evaluate_sense";
    """Evaluate sense classifier

    The label ConfusionMatrix.NEGATIVE_CLASS is for the relations 
    that are missed by the system
    because the arguments don't match any of the gold relations.
    """
    sense_alphabet = Alphabet()
    valid_senses = validator.identify_valid_senses(gold_list)
    for relation in gold_list:
        sense = relation['Sense'][0]
        if sense in valid_senses:
            sense_alphabet.add(sense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)

    sense_cm = ConfusionMatrix(sense_alphabet)
    gold_to_predicted_map, predicted_to_gold_map = \
            _link_gold_predicted(gold_list, predicted_list, spans_exact_matching)

    for i, gold_relation in enumerate(gold_list):
        gold_sense = gold_relation['Sense'][0]
        if gold_sense in valid_senses:
            if i in gold_to_predicted_map:
                predicted_sense = gold_to_predicted_map[i]['Sense'][0]
                if predicted_sense in gold_relation['Sense']:
                    sense_cm.add(predicted_sense, predicted_sense)
                else:
                    if not sense_cm.alphabet.has_label(predicted_sense):
                        predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
                    sense_cm.add(predicted_sense, gold_sense)
            else:
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)

    for i, predicted_relation in enumerate(predicted_list):
        if i not in predicted_to_gold_map:
            predicted_sense = predicted_relation['Sense'][0]
            if not sense_cm.alphabet.has_label(predicted_sense):
                predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
    return sense_cm
示例#38
0
def compute_span_exact_match_metric(gold_list, predicted_list, verbose=False):
    """Compute binary evaluation metric

    """
    binary_alphabet = Alphabet()
    binary_alphabet.add('yes')
    binary_alphabet.add('no')
    cm = ConfusionMatrix(binary_alphabet)
    matched_predicted = [False for x in predicted_list]
    predicted = defaultdict(list)
    for i, pspan in enumerate(predicted_list):
        predicted[pspan].append(i)
    empty_list = []
    key = indices = None
    for gold in gold_list:
        found_match = False
        indices = predicted.get(gold, empty_list)
        for i in indices:
            if not matched_predicted[i]:
                cm.add('yes', 'yes')
                matched_predicted[i] = True
                found_match = True
                break
        if not found_match:
            if verbose:
                print('Span:')
                print('<<<\t{:s}'.format(gold).encode(ENCODING))
                print()
            cm.add('no', 'yes')
    # Predicted span that does not match with any
    for matched, pred in zip(matched_predicted, predicted_list):
        if not matched:
            if verbose:
                print('Span:')
                print('>>>\t{:s}'.format(pred).encode(ENCODING))
                print()
            cm.add('yes', 'no')
    return cm
示例#39
0
def evaluate(gold_file, pred_file):

    with codecs.open(gold_file, encoding="utf-8") as fin_gold, codecs.open(pred_file, encoding="utf-8") as fin_pred:

        dict_P_to_url_label = {}
        for line in fin_gold:
            P, url, label, _ = line.strip().split("\t")
            if P not in dict_P_to_url_label:
                dict_P_to_url_label[P] = set()
            dict_P_to_url_label[P].add((url.strip(), label))

        #
        predict_set = set()
        for line in fin_pred:
            url, s, p, o, confidence = line.strip().split("\t")
            predict_set.add((url.strip(), p))

        alphabet = Alphabet()
        alphabet.add("0")
        alphabet.add("1")

        # 评估

        marco_p, marco_r, marco_f = 0, 0, 0
        N = 0

        for P in sorted(dict_P_to_url_label.keys()):

            confusionMatrix = ConfusionMatrix(alphabet)

            recall_error_cases = []
            precision_error_cases= []

            for url, label in dict_P_to_url_label[P]:

                pred = "0"
                if (url, P) in predict_set:
                    pred = "1"

                if label != pred:

                    if label == "1" and pred == "0":
                        recall_error_cases.append("%s\t%s->%s" % (url, label, pred))

                    if label == "0" and pred == "1":
                        precision_error_cases.append("%s\t%s->%s" % (url, label, pred))

                confusionMatrix.add(pred, label)

            print "==" * 40
            print P
            print
            confusionMatrix.print_out()
            p, r, f = confusionMatrix.get_prf("1")
            marco_p += p
            marco_r += r
            marco_f += f
            N += 1

            print "\n==>recall error cases:"
            print "\n".join(recall_error_cases)
            print "\n==>precision error cases:"
            print "\n".join(precision_error_cases)

    print "**" * 40
    print "marco, P: %f; R: %f; F1: %f" % (marco_p / N, marco_r / N, marco_f / N)
示例#40
0
class ORIDSESSVC(EvolutionStrategy):

    description =\
        "Ori. Death Penalty Step Control Evolution Strategy (DSES) with SVC"

    description_short = "Ori. DSES with SVC"

    def __init__(self, mu, lambd, theta, pi, initial_sigma,\
        delta, tau0, tau1, initial_pos, beta, meta_model):

        super(ORIDSESSVC, self).__init__(mu, lambd)

        self._theta = theta
        self._pi = pi
        self._delta = delta
        self._infeasibles = 0
        self._init_pos = initial_pos
        self._init_sigma = initial_sigma
        self._tau0 = tau0
        self._tau1 = tau1

        # SVC Metamodel
        self.meta_model = meta_model
        self.meta_model_trained = False
        self._beta = beta

        self._current_population = [] 
        self._valid_solutions = [] 
        self._pending_apos_solutions = []

        self.logger.add_const_binding('_theta', 'theta')
        self.logger.add_const_binding('_pi', 'pi')
        self.logger.add_const_binding('_tau0', 'tau0')
        self.logger.add_const_binding('_tau1', 'tau1')
        self.logger.add_binding('_delta', 'delta')
        self.logger.add_binding('_sp', 'successprob')
        self.logger.add_binding('_ppv', 'ppv')
        self.logger.add_binding('_npv', 'npv')

        # log constants
        self.logger.const_log()
       
        # initialize population 
        self._initialize_population()

    # cPickle cannot serialize lambda functions 
    def _mat_mutate_sig(self, sig):
        mutate_sig = lambda sigma : sigma * exp(self._tau1 * normal(0, 1)) 
        _lmutatesig = vectorize(mutate_sig)
        return _lmutatesig(sig)

    # cPickle cannot serialize lambda functions 
    def _mat_mutate_pos(self, coord, sigma):
        mutate_pos = lambda coord, sigma : coord + normal(0, sigma) 
        _lmutatepos = vectorize(mutate_pos)
        return _lmutatepos(coord, sigma)

    # cPickle cannot serialize lambda functions 
    def _mat_reducer(self, x):
        reducer = lambda sigma : self._delta if sigma < self._delta else sigma
        _lmatreducer = vectorize(reducer)
        return _lmatreducer(x)

    def _initialize_population(self):
        init_pos, init_sigma = self._init_pos, self._init_sigma
        d = init_pos.size

        genpos = lambda pos, sigma : random.normal(pos, sigma)
        gensig = lambda sigma : sigma 
         
        # initial mu lambda population, with selection of pairing 
        # probability 1/mu. interval size is equally.
        s, i = 0.0, (1 / float(self._mu))
        while(len(self._current_population) < self._mu):
            sigma = self._mat_mutate_sig(init_sigma)
            pos = self._mat_mutate_pos(init_pos, sigma)
            individual = matrix([pos.getA1(), sigma.getA1()])  
            self._current_population.append((individual, s, s+i))
            s = s+i        

    def _generate_individual(self):
        # selection of pairing, anti-proportional selection using 
        # the intervals between [0, 1]
        parents = []
        while(len(parents) < 2): 
            x = random.random()
            for individual, start, end in self._current_population:
                if(start <= x < end):
                    parents.append(individual) 

        child = 0.5 * (parents[0] + parents[1])

        # mutation of sigma
        self._global_sigma_mutation = exp(self._tau0 * normal(0, 1))
        child[SIGMA] = self._mat_mutate_sig(child[SIGMA])
        child[SIGMA] = self._global_sigma_mutation * child[SIGMA]

        if(self._infeasibles % self._pi == 0):
            self._delta *= self._theta
 
        # minimum step size
        child[SIGMA] = self._mat_reducer(child[SIGMA])

        # mutation of position with new step size
        child[POS] = self._mat_mutate_pos(child[POS], child[SIGMA])
       
        return child

    def ask_pending_solutions(self):
        """ ask pending solutions; solutions which need a checking for true 
            feasibility """
        
        individuals = []
        while(len(individuals) < 1):
            if((random.random() < self._beta) and self.meta_model_trained):
                individual = self._generate_individual() 
                if(self.meta_model.check_feasibility(individual[POS])):
                    individuals.append(individual)
                    # appending meta-feasible solution to a_posteriori pending
                    self._pending_apos_solutions.append((individual, True))
                else:
                    # appending meta-infeasible solution to a_posteriori pending 
                    self._pending_apos_solutions.append((individual, False))

                #individual[POS] = self.meta_model.repair(individual[POS])
                #self._count_repaired += 1
                #pending_meta_feasible.append(individual)

                # appending meta-feasible solution to a_posteriori pending
                #self._pending_apos_solutions.append((individual, True))
            else: 
                individual = self._generate_individual()
                individuals.append(individual)

        return individuals           
   
    def tell_feasibility(self, feasibility_information):
        """ tell feasibilty; return True if there are no pending solutions, 
            otherwise False """

        for (child, feasibility) in feasibility_information:
            if(feasibility):
                self._valid_solutions.append(child)
                self._infeasibles = 0
            else:
                self._count_constraint_infeasibles += 1
                self._infeasibles += self._infeasibles
                self.meta_model.add_infeasible(child[POS])

        if(len(self._valid_solutions) < self._lambd):
            return False
        else:            
           return True

    def ask_valid_solutions(self):
        return self._valid_solutions

    def ask_a_posteriori_solutions(self):
        return self._pending_apos_solutions        

    def tell_fitness(self, fitnesses):
        fitness = lambda (child, fitness) : fitness
        child = lambda (child, fitness) : child
        position = lambda (child, fitness) : child[POS]

        sorted_fitnesses = sorted(fitnesses, key = fitness)
        sorted_children = map(child, sorted_fitnesses)      
        selected_sorted_fitnesses = sorted_fitnesses[:self._mu]

        # update meta model sort self._valid_solutions by fitness and 
        # unsorted self._sliding_infeasibles
        sorted_feasibles = map(position, sorted_fitnesses)        
        self.meta_model.add_sorted_feasibles(sorted_feasibles)       
        self.meta_model_trained = self.meta_model.train()

        """ update the selection probabilites according to 
            anti-proportional fitness. """      
        probabilities = []
        s, a_prop_sum, sum_of_fitnesses = 0.0, 0.0, 0.0

        for individual, fitness in selected_sorted_fitnesses:
            sum_of_fitnesses += fitness
        for individual, fitness in selected_sorted_fitnesses:
            a_prop_sum += 1.0 / (fitness / float(sum_of_fitnesses))
        for individual, fitness in selected_sorted_fitnesses: 
            p = (1.0 / (fitness / float(sum_of_fitnesses))) / a_prop_sum
            probabilities.append((individual, p))
        probabilities.reverse()
    
        """ update the current population """            
        self._current_population = []
        start = 0
        for individual, prob in probabilities:
            self._current_population.append((individual, start, start + prob))
            start = s + prob
        self._current_population.reverse() 

        ### UPDATE FOR NEXT ITERATION
        self._valid_solutions = []
        
        ### STATISTICS
        self._selected_children = self._current_population  
        self._best_child, self._best_fitness = selected_sorted_fitnesses[0]
        self._worst_child, self._worst_fitness = selected_sorted_fitnesses[-1]
        self._mean_fitness = array(map(lambda (c,f) : f, selected_sorted_fitnesses)).mean()

        return self._best_child, self._best_fitness

    def tell_a_posteriori_feasibility(self, apos_feasibility):
        self._confusion_matrix = ConfusionMatrix(apos_feasibility)
        self._sp = self._confusion_matrix.success_probability()
        self._ppv = self._confusion_matrix.ppv()
        self._npv = self._confusion_matrix.npv()
        self._pending_apos_solutions = []

        # log all bindings
        self.logger.log()
        self._count_constraint_infeasibles = 0                
        self._count_repaired = 0
示例#41
0
def learn(n_vow, N_reservoir=100, leaky=True, classification=True, **kwargs):
    """ function to perform supervised learning on an ESN
         data: data to be learned (ndarray including AN activations and teacher signals) OLD VERSION
         n_vow: total number of vowels used
         N_reservoir: size of ESN
         leaky: boolean defining if leaky ESN is to be used
         plots: boolean defining if results are to be plotted
         output: boolean defining if progress messages are to be displayed
         testdata: provide test data for manual testing (no cross validation) OLD VERSION
         separate: boolean defining if infant data is used as test set or test set is drawn randomly from adult+infant (n_vow=3)
         n_channels: number of channels used
         classification: boolean defining if sensory classification is performed instead of motor prediction"""

    output_folder = kwargs['output_folder']
    regularization = kwargs['regularization']
    logistic = kwargs['logistic']
    leak_rate = kwargs['leak_rate']
    spectral_radius = kwargs['spectral_radius']
    n_channels = kwargs['n_channels']
    n_vow = kwargs['n_vowel']
    n_samples = kwargs['n_samples']
    n_training = kwargs['n_training']
    output = kwargs['verbose']
    flow = kwargs['flow']
    rank = kwargs['rank']

    training_set, test_set = get_training_and_test_sets(n_samples, n_training, n_vow)

    if output:
        print('samples_test = '+str(test_set))
        print('len(samples_train) = '+str(len(training_set)))

    N_classes = n_vow+1                  # number of classes is total number of vowels + null class
    input_dim = n_channels              # input dimension is number of used channels

    if output:
        print('constructing reservoir')

    # construct individual nodes
    if leaky:                           # construct leaky reservoir
        reservoir = Oger.nodes.LeakyReservoirNode(input_dim=input_dim, output_dim=N_reservoir, input_scaling=1., 
            spectral_radius=spectral_radius, leak_rate=leak_rate)
                                        # call LeakyReservoirNode with appropriate number of input units and 
                                        #  given number of reservoir units
    else:                               # construct non-leaky reservoir
        reservoir = Oger.nodes.ReservoirNode(input_dim=input_dim, output_dim=N_reservoir, input_scaling=1.)
                                        # call ReservoirNode with appropriate number of input units and given number of reservoir units

    if logistic:
        readout = Oger.nodes.LogisticRegressionNode()
    else:
        readout = Oger.nodes.RidgeRegressionNode(regularization)
                                        # construct output units with Ridge Regression training method

    flow = mdp.Flow([reservoir, readout])
                                        # connect reservoir and output nodes


    if output:
        print("Training...")

    import pdb
    pdb.set_trace()
    flow.train([[], training_set])
                                        # train flow with input files provided by file iterator


    ytest = []                          # initialize list of test output

    if output:
        print("Applying to testset...")

    losses = []                         # initiate list for discrete recognition variable for each test item
    ymean = []                          # initiate list for true class of each test item
    ytestmean = []                      # initiate list for class vote of trained flow for each test item

    for i_sample in xrange(len(test_set)):       # loop over all test samples
        if output:
            print('testing with sample '+str(i_sample))

        xtest = test_set[i_sample][0]
                                        # load xtest and ytarget as separate numpy arrays
        ytarget = test_set[i_sample][1]
        ytest = flow(xtest)             # evaluate trained output units' responses for current test item

        mean_sample_vote = mdp.numx.mean(ytest, axis=0)
                                        # average each output neurons' response over time
        if output:
            print('mean_sample_vote = '+str(mean_sample_vote))
        target = mdp.numx.mean(ytarget, axis=0)
                                        # average teacher signals over time
        if output:
            print('target = '+str(target))

        argmax_vote = sp.argmax(mean_sample_vote)
                                        # winner-take-all vote for final classification
        ytestmean.append(argmax_vote)   # append current vote to votes list of all items
        argmax_target = sp.argmax(target)
                                        # evaluate true class of current test item
        ymean.append(argmax_target)     # append current true class to list of all items

        loss = Oger.utils.loss_01(mdp.numx.atleast_2d(argmax_vote), mdp.numx.atleast_2d(argmax_target))
                                        # call loss_01 to compare vote and true class, 0 if match, 1 else
        if output:
            print('loss = '+str(loss))
        losses.append(loss)             # append current loss to losses of all items

        xtest = None                    # destroy xtest, ytest, ytarget, current_data to free up memory
        ytest = None
        ytarget = None

    error = mdp.numx.mean(losses)       # error rate is average number of mismatches
    if output:
        print('error = '+str(error))

    if output:
        print("error: "+str(error))
        print('ymean: '+str(ymean))
        print('ytestmean: '+str(ytestmean))

    ytestmean = np.array(ytestmean)     # convert ytestmean and ymean lists to numpy array for confusion matrix
    ymean = np.array(ymean)

    confusion_matrix = ConfusionMatrix.from_data(N_classes, ytestmean, ymean) # 10 classes
                                        # create confusion matrix from class votes and true classes
    c_matrix = confusion_matrix.balance()
                                        # normalize confusion matrix
    c_matrix = np.array(c_matrix)

    if output:
      print('confusion_matrix = '+str(c_matrix))


    save_flow(flow, N_reservoir, leaky, rank, output_folder)

    return error, c_matrix              # return current error rate and confusion matrix
def compute_binary_eval_metric(predicted_list, gold_list, binary_alphabet):
    cm = ConfusionMatrix(binary_alphabet)
    for (predicted_span, gold_span) in zip( predicted_list, gold_list):
        cm.add(predicted_span, gold_span)
    return cm
示例#43
0
def evaluate_sense(gold_list, predicted_list, verbose=False):
    """Evaluate sense classifier

    The label ConfusionMatrix.NEGATIVE_CLASS is for the relations

    that are missed by the system
    because the arguments don't match any of the gold relations.

    """
    sense_alphabet = Alphabet()
    valid_senses = validator.identify_valid_senses(gold_list)

    isense = None
    for relation in gold_list:
        isense = relation['Sense'][0]
        if isense in valid_senses:
            sense_alphabet.add(isense)

    sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS)

    sense_cm = ConfusionMatrix(sense_alphabet)
    gold_to_predicted_map, predicted_to_gold_map = \
        _link_gold_predicted(gold_list, predicted_list,
                             spans_exact_matching)

    for i, gold_relation in enumerate(gold_list):
        gold_sense = gold_relation['Sense'][0]
        if gold_sense in valid_senses:
            if i in gold_to_predicted_map:
                predicted_sense = gold_to_predicted_map[i]['Sense'][0]
                if predicted_sense in gold_relation['Sense']:
                    sense_cm.add(predicted_sense, predicted_sense)
                else:
                    if not sense_cm.alphabet.has_label(predicted_sense):
                        predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
                    if verbose:
                        print('Sense:')
                        print('<<<\t{:s}'.format(gold_sense).encode(ENCODING))
                        print('>>>\t{:s}'.format(predicted_sense).encode(
                            ENCODING))
                        print('Arg1:\t{:s}'.format(
                            gold_relation['Arg1']['RawText']).encode(ENCODING))
                        print('Arg2:\t{:s}'.format(
                            gold_relation['Arg2']['RawText']).encode(ENCODING))
                        print()
                    sense_cm.add(predicted_sense, gold_sense)
            else:
                if verbose:
                    print('Sense:')
                    print('<<<\t{:s}'.format(gold_sense).encode(ENCODING))
                    print('>>>\t{:s}'.format(
                        ConfusionMatrix.NEGATIVE_CLASS).encode(
                        ENCODING))
                    print('Arg1:\t{:s}'.format(
                        gold_relation['Arg1']['RawText']).encode(ENCODING))
                    print('Arg2:\t{:s}'.format(
                        gold_relation['Arg2']['RawText']).encode(ENCODING))
                    print()
                sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense)

    for i, predicted_relation in enumerate(predicted_list):
        if i not in predicted_to_gold_map:
            predicted_sense = predicted_relation['Sense'][0]
            if not sense_cm.alphabet.has_label(predicted_sense):
                predicted_sense = ConfusionMatrix.NEGATIVE_CLASS
            if verbose:
                print('Sense:')
                print('<<<\t{:s}'.format(gold_sense).encode(ENCODING))
                print('>>>\t{:s}'.format(
                    ConfusionMatrix.NEGATIVE_CLASS).encode(
                    ENCODING))
                print('Arg1:\t{:s}'.format(
                    gold_relation['Arg1']['RawText']).encode(ENCODING))
                print('Arg2:\t{:s}'.format(
                    gold_relation['Arg2']['RawText']).encode(ENCODING))
                print()
            sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS)
    return sense_cm