Пример #1
0
    def _validate_model(self, x: np.ndarray, y: np.ndarray, validation_file_name: str = "validation.json") -> dict:
        logging.info("Creating predictions ...")
        y_predicted_categories = self._model.predict(x, batch_size=self._batch_size)
        gc.collect()

        from sklearn.metrics.classification import accuracy_score, precision_recall_fscore_support
        y_expected_1dim = self._label_enc.max_category(y)
        y_predicted_1dim = self._label_enc.max_category(y_predicted_categories)
        logging.info("Results:")
        logging.info("{}".format(precision_recall_fscore_support(y_true=y_expected_1dim, y_pred=y_predicted_1dim)))
        accuracy = accuracy_score(y_true=y_expected_1dim, y_pred=y_predicted_1dim)
        logging.info("{}".format(accuracy))

        from sklearn.metrics.classification import classification_report
        logging.info("\n{}".format(classification_report(y_true=y_expected_1dim,
                                                         y_pred=y_predicted_1dim,
                                                         target_names=["neg", "pos"],
                                                         )))

        results = classification_report(y_true=y_expected_1dim,
                                        y_pred=y_predicted_1dim,
                                        target_names=["neg", "pos"],
                                        output_dict=True)
        results["accuracy"] = accuracy
        write_text_file(
            file_path=self._experiment_folder / validation_file_name,
            text=json.dumps(results))

        return results
Пример #2
0
 def get_cv_metrics(self, cv):
     fold_avg_p = []
     fold_avg_r = []
     fold_avg_f1 = []
     fold_accuracy = []
     fold_test_support = []
     fold_train_support = []
     for i, (train, test) in enumerate(cv):
         train_df, train_y = self.X.iloc[train], self.y.iloc[train]
         test_df, test_y = self.X.iloc[test], self.y.iloc[test]
         estimator = clone(self.pipeline)
         estimator.fit(train_df, train_y)
         y_pred = estimator.predict(test_df)
         p, r, f1, s = precision_recall_fscore_support(test_y, y_pred)
         accuracy = accuracy_score(test_y, y_pred)
         # support weighted average precision,recall,f1,support across classes
         avg_p, avg_r, avg_f1 = (np.average(p, weights=s),
                                 np.average(r, weights=s),
                                 np.average(f1, weights=s))
         test_support = test_y.shape[0]
         train_support = train_y.shape[0]
         fold_avg_p.append(avg_p)
         fold_avg_r.append(avg_r)
         fold_avg_f1.append(avg_f1)
         fold_accuracy.append(accuracy)
         fold_test_support.append(test_support)
         fold_train_support.append(train_support)
     return np.average(fold_avg_p), np.average(fold_avg_r), np.average(
         fold_avg_f1), np.average(fold_accuracy), np.average(
             test_support), np.average(train_support)
Пример #3
0
 def compute(indices, all_truth ,all_scores, pdb_ids, all_thresholds, precision, recall):
     print all_thresholds[indices[0]]
     for t in indices :
         for i in xrange(len(pdb_ids)) :
             p, r, _, _ = precision_recall_fscore_support(all_truth[i], [copysign(1, x - all_thresholds[t]) for x in all_scores[i]], average='binary')
             precision[t] += p
             recall[t] += r
         precision[t] /= len(pdb_ids)
         recall[t] /= len(pdb_ids)
def pandas_classification_report(y_true, y_pred):
    metrics_summary = precision_recall_fscore_support(y_true=y_true,
                                                      y_pred=y_pred)

    avg = list(
        precision_recall_fscore_support(y_true=y_true,
                                        y_pred=y_pred,
                                        average='weighted'))
    metrics_sum_index = ['precision', 'recall', 'f1-score', 'support']
    class_report_df = pd.DataFrame(list(metrics_summary),
                                   index=metrics_sum_index)

    support = class_report_df.loc['support']
    total = support.sum()
    avg[-1] = total

    class_report_df['avg / total'] = avg

    return class_report_df.T
Пример #5
0
    def calculate(self) -> None:
        """
        Calculates all of the metrics
        (precision, recall, F score and support)
        and stores them
        in the results dictionary.
        Note: This function may eat up a lot of memory
        if it's used on a large file.

        :return:
        """
        print('\nCalculating metrics...')
        ftr_all = []
        fpr_all = []

        gen = generate_tuples_from_file(self.fpath,
                                        encodings=self.encodings,
                                        first_layer=self.first_layer,
                                        batch_size=self.batch_size)

        if tqdm:
            for _ in tqdm(range(self.steps)):
                x, y = next(gen)

                y_pred = self.model.predict_classes(x, verbose=0)
                y_true = y.argmax(2)

                ftr, fpr = self._score(y_true, y_pred)
                ftr_all.extend(ftr)
                fpr_all.extend(fpr)
        else:
            print('[!] For progress logging during metrics calculation '
                  'install tqdm.')
            for _ in range(self.steps):
                x, y = next(gen)

                y_pred = self.model.predict_classes(x, verbose=0)
                y_true = y.argmax(2)

                ftr, fpr = self._score(y_true, y_pred)
                ftr_all.extend(ftr)
                fpr_all.extend(fpr)

        confusion = confusion_matrix(ftr_all, fpr_all)
        p, r, f, s = precision_recall_fscore_support(ftr_all, fpr_all)

        self.results = {
            'confusion_matrix': confusion,
            'precision': p,
            'recall': r,
            'fscore': f,
            'f1mean': np.mean(f),
            'support': s
        }
Пример #6
0
def evaluate_to_stats(net, testloader):
    net.eval()
    
    predictions = []
    
    with torch.no_grad():
        for batch_idx, (inputs, label, filename) in enumerate(testloader):
    
            if inputs.shape[0] > 1:
                raise NotImplementedError('Please choose a batch size of 1. Saving the results is not compatible with larger batch sizes in this version.')        
            inputs = inputs.to(device)
            inputs = Variable(inputs)
            output_1, output_2, output_3, output_concat= net(inputs)
            outputs_com = output_1 + output_2 + output_3 + output_concat
    
            _, predicted_com = torch.max(outputs_com.data, 1)
            
            y_pred = predicted_com[0].flatten().cpu().numpy()
            
            y_fn = filename[0]
            
            if args.calc_perf:
                predictions.append({'filename':y_fn,'prediction':y_pred, 'ground truth':label.cpu().numpy()})
            else:
                predictions.append({'filename':y_fn,'prediction':y_pred})
                
               
            if batch_idx % 50 == 0:
                print('Testing image {} from {}'.format(batch_idx,len(testloader)))
                
        if args.calc_perf:
            y_gt = []
            y_pred = []
            for prediction in predictions:
                y_gt.append(prediction['ground truth'])
                y_pred.append(prediction['prediction'])
            
            y_gt = np.array(y_gt,dtype=np.uint8)
            y_pred = np.array(y_pred,dtype=np.uint8)
            
            precision, recall, f1, support = precision_recall_fscore_support(y_gt,
                                                                         y_pred, average='macro')
                    
            confusion_matrix = sklearn.metrics.confusion_matrix(y_gt, y_pred, labels=range(len(testloader.dataset.classes)))
            
            cm_fig = construct_confusion_matrix_image(testloader.dataset.classes, confusion_matrix)
        
            cm_fig.savefig('result_confusion_matrix.png',dpi=300)
            
            print('F1 {}, precision, {}, recall, {}'.format(f1,precision,recall))   

    return predictions
Пример #7
0
def run_grid_search(grid_search, show_evaluation=True):
    """ Run the GridSearch algorithm and compute evaluation metrics """
    X_train, X_test, y_train, y_test = split_dataset()

    grid_search.fit(X_train, y_train)
    # for key, value in grid_search.cv_results_.items():
    #     print key, value

    predictions = grid_search.predict(X_test)

    if show_evaluation:
        logger.debug("macro_recall: %s", recall_score(y_test, predictions, average="macro"))
        logger.debug(precision_recall_fscore_support(y_test, predictions))
        logger.debug(confusion_matrix(y_test, predictions))
def compute_scores(o, n_iterations, pdb_ids, ab_truth, ab_coord, ab_X, ab_X_weights, precision, recall):
    print outlier_fractions[o]
    forest = IsolationForest(contamination=outlier_fractions[o], n_jobs=4)
    for i in xrange(len(pdb_ids)) :
        print pdb_ids[i]
        current_precision = 0
        current_recall = 0
        for _ in xrange(n_iterations) :
            forest.fit(ab_X[i], sample_weight=ab_X_weights[i])
            patch_pred_no_outliers = forest.predict(ab_coord[i])
            p, r, _, _ = precision_recall_fscore_support(ab_truth[i], patch_pred_no_outliers, average='binary')
            current_precision += p
            current_recall += r
        current_precision /= n_iterations
        current_recall /= n_iterations
        precision[o] += current_precision
        recall[o] += current_recall
    precision[o] /= len(pdb_ids)
    recall[o] /= len(pdb_ids)
Пример #9
0
 def recall_0(self,
              y_true,
              y_pred,
              labels=None,
              average='binary',
              sample_weight=None):
     '''
     :param y_true:
     :param y_pred:
     :param labels:
     :param average:
     :param sample_weight:
     :return: calculate recall for neg class
     '''
     _, r, _, _ = precision_recall_fscore_support(
         y_true,
         y_pred,
         beta=1,
         labels=labels,
         pos_label=0,
         average=average,
         warn_for=('f-score', ),
         sample_weight=sample_weight)
     return r
Пример #10
0
def classification_report_imbalanced(y_true,
                                     y_pred,
                                     labels=None,
                                     target_names=None,
                                     sample_weight=None,
                                     digits=2,
                                     alpha=0.1):
    """Build a classification report based on metrics used with imbalanced
    dataset

    Specific metrics have been proposed to evaluate the classification
    performed on imbalanced dataset. This report compiles the
    state-of-the-art metrics: precision/recall/specificity, geometric
    mean, and index balanced accuracy of the
    geometric mean.

    Parameters
    ----------
    y_true : ndarray, shape (n_samples, )
        Ground truth (correct) target values.

    y_pred : ndarray, shape (n_samples, )
        Estimated targets as returned by a classifier.

    labels : list, optional
        The set of labels to include when ``average != 'binary'``, and their
        order if ``average is None``. Labels present in the data can be
        excluded, for example to calculate a multiclass average ignoring a
        majority negative class, while labels not present in the data will
        result in 0 components in a macro average.

    target_names : list of strings, optional
        Optional display names matching the labels (same order).

    sample_weight : ndarray, shape (n_samples, )
        Sample weights.

    digits : int, optional (default=2)
        Number of digits for formatting output floating point values

    alpha : float, optional (default=0.1)
        Weighting factor.

    Returns
    -------
    report : string
        Text summary of the precision, recall, specificity, geometric mean,
        and index balanced accuracy.

    Examples
    --------
    >>> import numpy as np
    >>> from imblearn.metrics import classification_report_imbalanced
    >>> y_true = [0, 1, 2, 2, 2]
    >>> y_pred = [0, 0, 2, 2, 1] # doctest : +NORMALIZE_WHITESPACE
    >>> target_names = ['class 0', 'class 1', \
    'class 2'] # doctest : +NORMALIZE_WHITESPACE
    >>> print(classification_report_imbalanced(y_true, y_pred, \
    target_names=target_names))
                       pre       rec       spe        f1       geo       iba\
       sup
    <BLANKLINE>
        class 0       0.50      1.00      0.75      0.67      0.71      0.48\
         1
        class 1       0.00      0.00      0.75      0.00      0.00      0.00\
         1
        class 2       1.00      0.67      1.00      0.80      0.82      0.69\
         3
    <BLANKLINE>
    avg / total       0.70      0.60      0.90      0.61      0.63      0.51\
         5
    <BLANKLINE>

    """

    if labels is None:
        labels = unique_labels(y_true, y_pred)
    else:
        labels = np.asarray(labels)

    last_line_heading = 'avg / total'

    if target_names is None:
        target_names = ['%s' % l for l in labels]
    name_width = max(len(cn) for cn in target_names)
    width = max(name_width, len(last_line_heading), digits)

    headers = ["pre", "rec", "spe", "f1", "geo", "iba", "sup"]
    fmt = '%% %ds' % width  # first column: class name
    fmt += '  '
    fmt += ' '.join(['% 9s' for _ in headers])
    fmt += '\n'

    headers = [""] + headers
    report = fmt % tuple(headers)
    report += '\n'

    # Compute the different metrics
    # Precision/recall/f1
    precision, recall, f1, support = precision_recall_fscore_support(
        y_true,
        y_pred,
        labels=labels,
        average=None,
        sample_weight=sample_weight)
    # Specificity
    specificity = specificity_score(
        y_true,
        y_pred,
        labels=labels,
        average=None,
        sample_weight=sample_weight)
    # Geometric mean
    geo_mean = geometric_mean_score(
        y_pred,
        y_true,
        labels=labels,
        average=None,
        sample_weight=sample_weight)
    # Index balanced accuracy
    iba_gmean = make_index_balanced_accuracy(
        alpha=alpha, squared=True)(geometric_mean_score)
    iba = iba_gmean(
        y_pred,
        y_true,
        labels=labels,
        average=None,
        sample_weight=sample_weight)

    for i, label in enumerate(labels):
        values = [target_names[i]]
        for v in (precision[i], recall[i], specificity[i], f1[i], geo_mean[i],
                  iba[i]):
            values += ["{0:0.{1}f}".format(v, digits)]
        values += ["{0}".format(support[i])]
        report += fmt % tuple(values)

    report += '\n'

    # compute averages
    values = [last_line_heading]
    for v in (np.average(
            precision, weights=support), np.average(
                recall, weights=support), np.average(
                    specificity, weights=support), np.average(
                        f1, weights=support), np.average(
                            geo_mean, weights=support), np.average(
                                iba, weights=support)):
        values += ["{0:0.{1}f}".format(v, digits)]
    values += ['{0}'.format(np.sum(support))]
    report += fmt % tuple(values)
    return report
Пример #11
0
from collections import Counter
import numpy
import Features_manager
import Database_manager
from sklearn.metrics.classification import precision_recall_fscore_support, accuracy_score
from sklearn.cross_validation import KFold

database_manager = Database_manager.make_database_manager()
feature_manager = Features_manager.make_feature_manager()

tweets = numpy.array(database_manager.return_tweets())
stance = numpy.array(feature_manager.get_stance(tweets))

count = Counter(stance)
print(count.most_common())
majority_class = count.most_common()[0][0]
test_predict = [majority_class] * len(stance)

prec, recall, f, support = precision_recall_fscore_support(stance,
                                                           test_predict,
                                                           beta=1)

accuracy = accuracy_score(stance, test_predict)

print("f:", (f))
print("p:", (prec))
print("r:", (recall))

print('"MClass"' + '\t' + str(((f[1] + f[2]) / 2)) + '\t' +
      str(((f[0] + f[1] + f[2]) / 3)) + '\n')
def main(args):
    """
    Process the tweets, returning an output file
    """
    # read the config file
    cfg = fh.read_config_file(args.cfg)
    file_type = cfg['file_type']

    # check if should produce a report
    report = cfg.get('report_file', None)
    report_f = None
    if report:
        # open the file
        report_f = open(report, 'w')

    # open the TweetsDB connections
    db_train = TweetsDB('sa_train', drop_db=True)
    db_test = TweetsDB('sa_test')

    # read the train file
    print "Processing train file",
    train_file_name = cfg['train_file_name']
    train_feat, labels = create_features(train_file_name, file_type, cfg,
                                         db_train)

    # save the dictionary info to db
    tr_stats = r.get_lexs_stats()
    r.reset_lexs_stats()
    print "done"

    if report:
        print >> report_f, r.pd.DataFrame(tr_stats)

    # read the test file
    print "Processing test file",
    test_file_name = cfg['test_file_name']
    test_feat, gold = create_features(test_file_name, file_type, cfg, db_test)

    # get the test stats
    ts_stats = r.get_lexs_stats()
    print "done"

    # create the feature vector
    print "Training model",
    vec = DictVectorizer()
    X = vec.fit_transform(train_feat)
    y = np.array(labels)
    X_test = vec.transform(test_feat)

    # train the model
    clf = SGDClassifier(penalty='elasticnet',
                        alpha=0.0001,
                        l1_ratio=0.85,
                        n_iter=1000,
                        n_jobs=-1)
    clf.fit(X, y)
    print "done"

    print "Predicting",
    pred = clf.predict(X_test)
    print "done"

    # calculate score
    print "Saving information to db",
    score = f1_score(gold, pred, labels=[-1, 1], average='macro')
    prfs = classification.precision_recall_fscore_support(gold, pred)

    # save run time
    run = RunWrapper(pred, gold, score, tr_stats, ts_stats, prfs, clf)
    run_info = run.save()
    print "done"

    # save model
    print "Saving model to file",
    date = run_info['date'].strftime('%Y%m%d.%H%M%S')
    model = run_info['model'].split('.')[-1]
    file_name = '{}.{}.pkl'.format(date, model)
    joblib.dump(clf, path.join(cfg['model_out_dir'], file_name))
    print "done"

    # save the predicted values
    tr = fh.TweetReader(test_file_name, file_type)
    output_filename = cfg['output']
    print "Saving output to file,", output_filename,
    with codecs.open(output_filename, 'w', 'utf8') as out:
        for tweet, label in zip(tr, pred):
            line = '\t'.join([
                tweet["sid"], tweet["uid"], p.decode_label[label],
                tweet["text"]
            ]) + '\n'
            out.write(line)

            db_test.save_tweet_pred_sent(tweet["sid"], p.decode_label[label])

    print "done"
    print "\n\nRun the following code on shell"
    print "python scorer.py b %s ../1-Input/twitter-test-GOLD-B.tsv" % output_filename
Пример #13
0
def train_and_test(alpha,
                   predictors,
                   predictor_params,
                   x_filename,
                   y_filename,
                   n_users,
                   percTest,
                   featureset_to_use,
                   diff_weighting,
                   phi,
                   force_balanced_classes,
                   do_scaling,
                   optimise_predictors,
                   report,
                   conf_report=None):
    # all_X = numpy.loadtxt(x_filename, delimiter=",")
    all_X = numpy.load(x_filename + ".npy")
    all_y = numpy.loadtxt(y_filename, delimiter=",")

    print("loaded X and y files", x_filename, y_filename)

    if numpy.isnan(all_X.any()):
        print("nan in", x_filename)
        exit()

    if numpy.isnan(all_y.any()):
        print("nan in", y_filename)
        exit()

    #print("selecting balanced subsample")
    print("t t split")
    X_train, X_test, y_train, y_test = train_test_split(all_X,
                                                        all_y,
                                                        test_size=percTest,
                                                        random_state=666)

    # feature extraction
    # test = SelectKBest(score_func=chi2, k=100)
    # kb = test.fit(X_train, y_train)
    # # summarize scores
    # numpy.set_printoptions(precision=3)
    # print(kb.scores_)
    # features = kb.transform(X_train)
    # mask = kb.get_support()
    # # summarize selected features
    # print(features.shape)
    # X_train = X_train[:,mask]
    # X_test = X_test[:,mask]

    scaler = StandardScaler()
    rdim = FeatureAgglomeration(n_clusters=100)
    if do_scaling:
        # input(X_train.shape)
        X_train = rdim.fit_transform(X_train)
        X_test = rdim.transform(X_test)
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        with open('../../../isaac_data_files/qutor_scaler.pkl',
                  'wb') as output:
            pickle.dump(scaler, output, pickle.HIGHEST_PROTOCOL)
        with open('../../../isaac_data_files/qutor_rdim.pkl', 'wb') as output:
            pickle.dump(rdim, output, pickle.HIGHEST_PROTOCOL)

    # print("feature reduction...")
    # pc = PCA(n_components=100)
    # X_train = pc.fit_transform(X_train)
    # X_test = pc.transform(X_test)

    classes = numpy.unique(y_train)
    sample_weights = None
    if (force_balanced_classes):
        X_train, y_train = balanced_subsample(X_train, y_train, 1.0)  #0.118)

    print("X_train shape:", X_train.shape)
    print("X_test shape:", X_test.shape)

    print("tuning classifier ...")
    for ix, p in enumerate(predictors):
        print(type(p))
        print(p.get_params().keys())

        if optimise_predictors == True and len(predictor_params[ix]) > 1:
            pbest = run_random_search(p, X_train, y_train,
                                      predictor_params[ix])
        else:
            pbest = p.fit(X_train, y_train)
        predictors[ix] = pbest

    print("pickling classifier ...")
    for ix, p in enumerate(predictors):
        p_name = predictor_params[ix]['name']
        with open(
                '../../../isaac_data_files/p_{}_{}_{}.pkl'.format(
                    p_name, alpha, phi), 'wb') as output:
            pickle.dump(p, output, pickle.HIGHEST_PROTOCOL)
    print("done!")

    # report.write("* ** *** |\| \` | |  |) /; `|` / |_| *** ** *\n")
    # report.write("* ** *** | | /_ |^|  |) ||  |  \ | | *** ** *\n")
    #report.write("RUNS,P,FB,WGT,ALPHA,PHI,SCL,0p,0r,0F,0supp,1p,1r,1F,1supp,avg_p,avg_r,avg_F,#samples\n")
    for ix, p in enumerate(predictors):

        report.write(",".join(
            map(str, (all_X.shape[0], str(p).replace(",", ";").replace(
                "\n", ""), force_balanced_classes, diff_weighting, alpha, phi,
                      do_scaling))))

        y_pred_tr = p.predict(X_train)
        y_pred = p.predict(X_test)

        # for x,y,yp in zip(X_train, y_test, y_pred):

        if conf_report:
            conf_report.write(
                str(p).replace(",", ";").replace("\n", "") + "\n")
            conf_report.write(str(alpha) + "," + str(phi) + "\n")
            conf_report.write(str(confusion_matrix(y_test, y_pred)) + "\n")
            conf_report.write("\n")
        # p = precision_score(y_test, y_pred, average=None, labels=classes)
        # r = recall_score(y_test, y_pred, average=None, labels=classes)
        # F = f1_score(y_test, y_pred, average=None, labels=classes)
        p, r, F, s = precision_recall_fscore_support(y_test,
                                                     y_pred,
                                                     labels=classes,
                                                     average=None,
                                                     warn_for=('precision',
                                                               'recall',
                                                               'f-score'))
        avp, avr, avF, _ = precision_recall_fscore_support(
            y_test,
            y_pred,
            labels=classes,
            average='weighted',
            warn_for=('precision', 'recall', 'f-score'))
        for ix, c in enumerate(classes):
            report.write(",{},{},{},{},{},".format(c, p[ix], r[ix], F[ix],
                                                   s[ix]))
        report.write("{},{},{},{}\n".format(avp, avr, avF, numpy.sum(s)))

        # report.write(classification_report(y_test, y_pred)+"\n")
        # report.write("------END OF CLASSIFIER------\n")
        report.flush()
    return X_train, X_test, y_pred_tr, y_pred, y_test, scaler
Пример #14
0


for i in range(0,len(training)):

    for key, clf in clfs.items():
        print(key,label[i])

        tweets_training=training[i]
        tweets_test=test[i]
        stance_training=numpy.array(feature_manager.get_stance(tweets_training))
        stance_test=numpy.array(feature_manager.get_stance(tweets_test))


        prec, recall, f, support = precision_recall_fscore_support(
        stance_test,
        [Counter(stance_training).most_common()[0][0]]*len(stance_test),
        beta=1)
        
        
        accuracy = accuracy_score(
        stance_test,
        [Counter(stance_training).most_common()[0][0]]*len(stance_test)
        )
        fmacro=(f[0]+f[1])/2
        

        
        
        
        feature_names=numpy.array(singlefeature[label[i]])
        
Пример #15
0
 real_values_binary = [
     1 if value >= 0.5 else 0 for value in real_values
 ]
 pred_values_binary = [
     1 if value >= 0.5 else 0 for value in predicted_values
 ]
 number_of_zeros = len(
     [value for value in real_values_binary if value == 0])
 number_of_ones = len(
     [value for value in real_values_binary if value == 1])
 majority_label = 0 if number_of_zeros > number_of_ones else 1
 majority_baseline = [
     majority_label for i in range(len(real_values))
 ]
 acc = accuracy_score(real_values_binary, pred_values_binary)
 metrics = precision_recall_fscore_support(real_values_binary,
                                           pred_values_binary)
 f1 = f1_score(real_values_binary,
               pred_values_binary,
               average="weighted")
 roc = roc_auc_score(real_values_binary, predicted_values)
 acc_maj = accuracy_score(real_values_binary, majority_baseline)
 metrics_maj = precision_recall_fscore_support(
     real_values_binary, majority_baseline)
 f1_maj = f1_score(real_values_binary,
                   majority_baseline,
                   average="weighted")
 roc_maj = roc_auc_score(real_values_binary, majority_baseline)
 b, c = compute_correct_predictions(majority_baseline,
                                    pred_values_binary,
                                    real_values_binary)
 p_value = mcnemar_midp(b, c)
Пример #16
0
    def load_results_from_raw_labels(self,
                                     y_true: np.array,
                                     y_pred: np.array,
                                     conf=None):
        """
        Load results from true labels and predicted labels

        Args:
            y_true: numpy array, true labels
            y_pred: numpy array, predicted labels
            conf: numpy array, confidence scores of predicted labels

        """
        accuracy = accuracy_score(y_pred=y_pred, y_true=y_true)
        auc = None
        self.conf = conf

        if len(self.labels) == 2:
            precision, recall, f_score, true_sum = precision_recall_fscore_support(
                y_true=y_true, y_pred=y_pred, labels=[1])
            if self.conf is not None:
                auc = roc_auc_score(y_true=y_true, y_score=conf)
        else:
            _precision, _recall, _f_score, _ = precision_recall_fscore_support(
                y_true=y_true,
                y_pred=y_pred,
                labels=list(range(len(self.labels))))
            class_precision = {
                key: value
                for key, value in list(zip(self.labels, _precision.tolist()))
            }
            precision = {
                'class': class_precision,
                'average': np.mean(_precision)
            }

            class_recall = {
                key: value
                for key, value in list(zip(self.labels, _recall.tolist()))
            }
            recall = {'class': class_recall, 'average': np.mean(_recall)}

            class_f_score = {
                key: value
                for key, value in list(zip(self.labels, _f_score.tolist()))
            }
            f_score = {'class': class_f_score, 'average': np.mean(_f_score)}

            class_accuracy = {key: '-' for key in self.labels}
            accuracy = {'class': class_accuracy, 'average': accuracy}

            class_auc = {key: '-' for key in self.labels}
            auc = {'class': class_auc, 'average': 'N.A.'}

        if METRIC_PRECISION in self.metric_list:
            self.metric_scores_[METRIC_PRECISION] = precision
        if METRIC_RECALL in self.metric_list:
            self.metric_scores_[METRIC_RECALL] = recall
        if METRIC_F1 in self.metric_list:
            self.metric_scores_[METRIC_F1] = f_score
        if METRIC_ACCURACY in self.metric_list:
            self.metric_scores_[METRIC_ACCURACY] = accuracy
        if METRIC_CM in self.metric_list:
            self.metric_scores_[METRIC_CM] = {
                'labels': self.labels,
                'values': confusion_matrix(y_true=y_true, y_pred=y_pred)
            }
        if METRIC_AUC in self.metric_list:
            self.metric_scores_[METRIC_AUC] = auc
                [list(feature_names).index(f) for f in feature_filtered])
            feature_index_filtered = numpy.concatenate(
                feature_index_global[list(feature_index_filtered)])
            #print(feature_name_global[feature_index_filtered])
            X_filter = X[:, feature_index_filtered]
            #print(feature_filtered,X.shape,X_filter.shape)
            predict = []
            golden = []
            for index_train, index_test in kf:

                X_train = X_filter[index_train]
                X_test = X_filter[index_test]

                clf = SVC(kernel='linear')

                clf.fit(X_train, stance[index_train])
                test_predict = clf.predict(X_test)
                predict = numpy.concatenate((predict, test_predict))
                golden = numpy.concatenate((golden, stance[index_test]))

            prec, recall, f, support = precision_recall_fscore_support(golden,
                                                                       predict,
                                                                       beta=1)

            accuracy = accuracy_score(golden, predict)

            print('"' + (' '.join(feature_filtered)) + '"' + '\t' +
                  str(((f[0] + f[1] + f[2]) / 3)) + '\t' +
                  str(((f[1] + f[2]) / 2)) + '\t' + str(prec) + '\t' +
                  str(recall) + '\t' + str(f) + '\n')
Пример #18
0
def classification_report_imbalanced(y_true,
                                     y_pred,
                                     labels=None,
                                     target_names=None,
                                     sample_weight=None,
                                     digits=2,
                                     alpha=0.1):
    """Build a classification report based on metrics used with imbalanced
    dataset

    Specific metrics have been proposed to evaluate the classification
    performed on imbalanced dataset. This report compiles the
    state-of-the-art metrics: precision/recall/specificity, geometric
    mean, and index balanced accuracy of the
    geometric mean.

    Parameters
    ----------
    y_true : ndarray, shape (n_samples, )
        Ground truth (correct) target values.

    y_pred : ndarray, shape (n_samples, )
        Estimated targets as returned by a classifier.

    labels : list, optional
        The set of labels to include when ``average != 'binary'``, and their
        order if ``average is None``. Labels present in the data can be
        excluded, for example to calculate a multiclass average ignoring a
        majority negative class, while labels not present in the data will
        result in 0 components in a macro average.

    target_names : list of strings, optional
        Optional display names matching the labels (same order).

    sample_weight : ndarray, shape (n_samples, )
        Sample weights.

    digits : int, optional (default=2)
        Number of digits for formatting output floating point values

    alpha : float, optional (default=0.1)
        Weighting factor.

    Returns
    -------
    report : string
        Text summary of the precision, recall, specificity, geometric mean,
        and index balanced accuracy.

    Examples
    --------
    >>> import numpy as np
    >>> from imblearn.metrics import classification_report_imbalanced
    >>> y_true = [0, 1, 2, 2, 2]
    >>> y_pred = [0, 0, 2, 2, 1] # doctest : +NORMALIZE_WHITESPACE
    >>> target_names = ['class 0', 'class 1', \
    'class 2'] # doctest : +NORMALIZE_WHITESPACE
    >>> print(classification_report_imbalanced(y_true, y_pred, \
    target_names=target_names))
                       pre       rec       spe        f1       geo       iba\
       sup
    <BLANKLINE>
        class 0       0.50      1.00      0.75      0.67      0.87      0.77\
         1
        class 1       0.00      0.00      0.75      0.00      0.00      0.00\
         1
        class 2       1.00      0.67      1.00      0.80      0.82      0.64\
         3
    <BLANKLINE>
    avg / total       0.70      0.60      0.90      0.61      0.66      0.54\
         5
    <BLANKLINE>

    """

    if labels is None:
        labels = unique_labels(y_true, y_pred)
    else:
        labels = np.asarray(labels)

    last_line_heading = 'avg / total'

    if target_names is None:
        target_names = ['%s' % l for l in labels]
    name_width = max(len(cn) for cn in target_names)
    width = max(name_width, len(last_line_heading), digits)

    headers = ["pre", "rec", "spe", "f1", "geo", "iba", "sup"]
    fmt = '%% %ds' % width  # first column: class name
    fmt += '  '
    fmt += ' '.join(['% 9s' for _ in headers])
    fmt += '\n'

    headers = [""] + headers
    report = fmt % tuple(headers)
    report += '\n'

    # Compute the different metrics
    # Precision/recall/f1
    precision, recall, f1, support = precision_recall_fscore_support(
        y_true,
        y_pred,
        labels=labels,
        average=None,
        sample_weight=sample_weight)
    # Specificity
    specificity = specificity_score(
        y_true,
        y_pred,
        labels=labels,
        average=None,
        sample_weight=sample_weight)
    # Geometric mean
    geo_mean = geometric_mean_score(
        y_true,
        y_pred,
        labels=labels,
        average=None,
        sample_weight=sample_weight)
    # Index balanced accuracy
    iba_gmean = make_index_balanced_accuracy(
        alpha=alpha, squared=True)(geometric_mean_score)
    iba = iba_gmean(
        y_true,
        y_pred,
        labels=labels,
        average=None,
        sample_weight=sample_weight)

    for i, label in enumerate(labels):
        values = [target_names[i]]
        for v in (precision[i], recall[i], specificity[i], f1[i], geo_mean[i],
                  iba[i]):
            values += ["{0:0.{1}f}".format(v, digits)]
        values += ["{0}".format(support[i])]
        report += fmt % tuple(values)

    report += '\n'

    # compute averages
    values = [last_line_heading]
    for v in (np.average(precision, weights=support), np.average(
            recall, weights=support), np.average(specificity, weights=support),
              np.average(f1, weights=support), np.average(
                  geo_mean, weights=support), np.average(iba,
                                                         weights=support)):
        values += ["{0:0.{1}f}".format(v, digits)]
    values += ['{0}'.format(np.sum(support))]
    report += fmt % tuple(values)
    return report
labels_test = numpy.array(feature_manager.get_label(tweets_test))

#feature_type=feature_manager.get_availablefeaturetypes()

feature_type = [
    "numhashtag",
    "puntuactionmarks",
    "length",
]

X, X_test, feature_name, feature_index = feature_manager.create_feature_space(
    tweets_training, feature_type, tweets_test)

print(feature_name)
print("feature space dimension X:", X.shape)
print("feature space dimension X_test:", X_test.shape)

clf = SVC(kernel="linear")

clf.fit(X, labels_training)
test_predict = clf.predict(X_test)

prec, recall, f, support = precision_recall_fscore_support(labels_test,
                                                           test_predict,
                                                           beta=1)

accuracy = accuracy_score(test_predict, labels_test)

print(prec, recall, f, support)
print(accuracy)
Пример #20
0
def classification_report_imbalanced_values(y_true,
                                            y_pred,
                                            labels,
                                            target_names=None,
                                            sample_weight=None,
                                            digits=2,
                                            alpha=0.1):
    """Copy of imblearn.metrics.classification_report_imbalanced to have
    access to the raw values. The code is mostly the same except the
    formatting code and generation of the report which haven removed. Copied
    from version 0.4.3. The original code is living here:
    https://github.com/scikit-learn-contrib/imbalanced-learn/blob/master/imblearn/metrics/_classification.py#L750


    """
    labels = np.asarray(labels)

    if target_names is None:
        target_names = ["%s" % l for l in labels]

    # Compute the different metrics
    # Precision/recall/f1
    precision, recall, f1, support = precision_recall_fscore_support(
        y_true,
        y_pred,
        labels=labels,
        average=None,
        sample_weight=sample_weight)
    # Specificity
    specificity = specificity_score(y_true,
                                    y_pred,
                                    labels=labels,
                                    average=None,
                                    sample_weight=sample_weight)
    # Geometric mean
    geo_mean = geometric_mean_score(y_true,
                                    y_pred,
                                    labels=labels,
                                    average=None,
                                    sample_weight=sample_weight)
    # Index balanced accuracy
    iba_gmean = make_index_balanced_accuracy(
        alpha=alpha, squared=True)(geometric_mean_score)
    iba = iba_gmean(y_true,
                    y_pred,
                    labels=labels,
                    average=None,
                    sample_weight=sample_weight)

    result = {"targets": {}}

    for i, label in enumerate(labels):
        result["targets"][target_names[i]] = {
            "precision": precision[i],
            "recall": recall[i],
            "specificity": specificity[i],
            "f1": f1[i],
            "geo_mean": geo_mean[i],
            "iba": iba[i],
            "support": support[i],
        }

    result["average"] = {
        "precision": np.average(precision, weights=support),
        "recall": np.average(recall, weights=support),
        "specificity": np.average(specificity, weights=support),
        "f1": np.average(f1, weights=support),
        "geo_mean": np.average(geo_mean, weights=support),
        "iba": np.average(iba, weights=support),
        "support": np.sum(support),
    }

    return result
Пример #21
0
    for name_c, c in classifiers.items():
        c.init_run_variables()

    for (train, test), color in zip(cv.split(X_tt, y_tt), colors):
        #predicted_ = clf.fit(X_tt[train], y_tt[train]).predict(X_tt[test])
        #print(metrics.classification_report(y[test], predicted_) )

        for name_c, c in classifiers.items():
            #model = c.clf.fit(X_tt[train], y_tt[train]).best_estimator_
            model = c.clf.fit(X_tt[train], y_tt[train])
            #print "\n",c.clf.get_params(),"\n"
            probas_ = model.predict_proba(X_tt[test])
            y_test_split = model.predict(X_tt[test])

            # precision, recall, F-measure and support
            precision, recall, f1, support = precision_recall_fscore_support(
                y_tt[test], y_test_split)

            c.negclass_f1_sum += f1[0]
            c.negclass_precision_sum += precision[0]
            c.negclass_recall_sum += recall[0]

            c.posclass_f1_sum += f1[1]
            c.posclass_precision_sum += precision[1]
            c.posclass_recall_sum += recall[1]

            # Evaluating over the test set
            y_test_split = model.predict(X_ts)

            # precision, recall, F-measure and support
            precision, recall, f1, support = precision_recall_fscore_support(
                y_ts, y_test_split)