def test(self, model, data, filePath=None, writeOutput=False):
     Pr_micro = 0.000001
     Re_micro = 0.000001
     Pr_macro = 0.000001
     Re_macro = 0.000001
     if writeOutput:
         f = open(filePath, 'w')
     for x, char_seq, y, seq_length, sentences in data:
         #print('Predictions are:')
         y_pred = model(x, char_seq, seq_length)
         y_pred = torch.argmax(y_pred, 2)
         if writeOutput:
             #self.writeOutput(f, sentences, y, y_pred)
             for index in range(y.shape[0]):
                 sentence = sentences[index].split('\n')
                 pred_labels = [
                     self.labels_inverse[i.item()] for i in y_pred[index]
                 ]
                 for i in range(len(sentence)):
                     line = sentence[i]
                     f.write(line + ' ' + pred_labels[i] + '\n')
                 f.write('\n')
         y_flat = y.view(y.shape[0] * y.shape[1])
         y_pred_flat = y_pred.view(y.shape[0] * y.shape[1])
         index = np.where(y_flat > 1)
         index_pred = np.where(y_pred_flat > 1)
         pr = scorer(y_flat[index_pred],
                     y_pred_flat[index_pred],
                     average='macro')[0]
         re = scorer(y_flat[index], y_pred_flat[index], average='macro')[1]
         Pr_macro += pr
         Re_macro += re
         # pr, re, f1, _ = scorer(y_flat, y_pred_flat, average='micro')
         re = scorer(y_flat[index], y_pred_flat[index], average='micro')[1]
         pr = scorer(y_flat[index_pred],
                     y_pred_flat[index_pred],
                     average='micro')[0]
         Pr_micro += pr
         Re_micro += re
         #pdb.set_trace()
     print("Micro PR, Re, F1")
     Pr_micro /= len(data)
     Re_micro /= len(data)
     F1_micro = (2 * Pr_micro * Re_micro) / (Pr_micro + Re_micro)
     print(Pr_micro, Re_micro, F1_micro)
     print("Macro PR, Re, F1")
     Pr_macro /= len(data)
     Re_macro /= len(data)
     F1_macro = (2 * Pr_macro * Re_macro) / (Pr_macro + Re_macro)
     print(Pr_macro, Re_macro, F1_macro)
     if writeOutput:
         f.close()
     print(len(data))
     return F1_micro, F1_macro
示例#2
0
def score_B(labels, idx, val_set):
    """
    Strategy B: Loop over the predictions.

    Pros:
        Focus on the correctness of the clusters themselves.
        Reward homogeneous clusters.
    Cons:
        Ignore images that should have been clustered together but were not.
        Rewards smaller clusters with few errors in them.
    """
    y_true, y_pred = [], []
    for label in set(labels):
        if label == -1: continue
        label_idx = [
            idx[i] for i in range(labels.shape[0]) if labels[i] == label
        ]
        for i in range(len(label_idx) - 1):
            for j in range(i, len(label_idx)):
                pair = list(filter(lambda r: \
                        (r[0] == int(label_idx[i]) and r[1] == int(label_idx[j])) or \
                        (r[0] == int(label_idx[j]) and r[1] == int(label_idx[i]))
                , val_set))
                if len(pair) == 0:
                    continue
                else:
                    together = pair.pop()[2]
                    y_true.append(1)
                    y_pred.append(together)

    sB = scorer(y_true, y_pred)
    return sB
示例#3
0
def score_A(labels, idx, val_set):
    """
    Strategy A: Loop over the annotations.

    Pros:
        Correspond to a traditional classification performance score.
        Reward clustering that excatly matches human annotations.
    Cons:
        Penalize even if clusters themselves are homogeneous.
        Highly dependent on the size and quality of annotations.
    """
    label_lookup = {int(idx[i]): label for i, label in enumerate(labels)}
    y_true, y_pred = [], []
    for id1, id2, together in val_set:
        # Check if id1 and id2 have been predicted
        label_of_id1 = label_lookup.get(id1, None)
        label_of_id2 = label_lookup.get(id2, None)
        if label_of_id1 is None or label_of_id2 is None:
            continue

        # Check if they are in the same cluster, except if they are in -1
        y_true.append(together)
        if label_of_id1 == label_of_id2 and label_of_id1 != -1:
            y_pred.append(1)
        else:
            y_pred.append(0)

    sA = scorer(y_true, y_pred)
    return sA
示例#4
0
 def evaluate_model(self, data):
     le = LabelEncoder()
     #drop non_clustered points from dataframe
     data['cluster'] = data['cluster'].where(data['cluster'].str.len() > 0,
                                             np.nan)
     data.dropna(subset=['cluster'], inplace=True)
     #separate cluster labels from datapoints
     points = data.as_matrix(columns=data.columns[1:-1])
     labels = data.as_matrix(columns=data.columns[-1:])
     le.fit_transform(labels)
     labels = le.transform(labels)
     #returns numpy float64 score value
     return scorer(points, labels)
示例#5
0
def cross_val_score(estimator, X, y=None, scoring=None, cv=None):
    """Run cross-validation like normal but return (scores, predictions)"""
    ### TODO: Test this code. It's 100% untested!
    X, y = sklearn.utils.validation.indexable(X, y)

    cv = sklearn.cross_validation.check_cv(cv, X, y, classifier=sklearn.base.is_classifier(estimator))
    scorer = sklearn.metrics.scorer.check_scoring(estimator, scoring=scoring)

    y_pred = numpy.zeros_like(y)

    scores = []
    for train, test in cv:
        current_est = sklearn.base.clone(estimator).fit(X[train], y[train])
        predictions = current_est.predict(X[test])
        scores.append(scorer(y[test], predictions))
        y_pred[test] = predictions

    return numpy.asarray(scores), y_pred
示例#6
0
parser = argparse.ArgumentParser()
parser.add_argument("test_data", help="formatted test data filename")
parser.add_argument("predictions",
                    help="predicted classification data filename")
args = parser.parse_args()

with open(args.test_data, "r") as f:
    true_data = json.load(f)

with open(args.predictions, "r") as f:
    predicted_data = json.load(f)

true_labels = []

for obs in true_data:
    true_labels.append(obs[1]["NEtag"])

predicted_me = []
predicted_nb = []
predicted_dt = []

for obs in predicted_data:
    predicted_me.append(obs[1]["me_pred"])
    predicted_nb.append(obs[1]["nb_pred"])
    predicted_dt.append(obs[1]["dt_pred"])

for labels in [predicted_me, predicted_nb, predicted_dt]:
    print(scorer(true_labels, labels, average="micro"))

    print(classification_report(true_labels, labels))
示例#7
0
        +---------+-----------+----------------+\n\
        | MAPE    | {:.4f}    | {:.4f}         |\n\
        | R²      | {:.4f}    | {:.4f}         |\n\
        +---------+-----------+----------------+\n\
    '.format(mape(y_true=train_Y, y_pred=predicted_train_Y),
             mape(y_true=validation_Y, y_pred=predicted_validation_Y),
             r2_score(y_true=train_Y, y_pred=predicted_train_Y),
             r2_score(y_true=validation_Y, y_pred=predicted_validation_Y)))

    logging.info('Generating learning curves')

    train_sizes, train_scores, valid_scores = learning_curve(
        TransformedLinearRegression(1500),
        train.iloc[:, :-1],
        train.iloc[:, -1],
        scoring=scorer(mape),
        cv=5)

    # plot configuration
    fig = plt.gcf()
    fig.canvas.set_window_title(
        'Learning curves of Linear Regression model with Quantile Transformation'
    )
    plt.plot(train_sizes,
             train_scores.mean(axis=1),
             color='r',
             label='Training Score')
    plt.plot(train_sizes,
             valid_scores.mean(axis=1),
             color='g',
             label='Validation Score')