Пример #1
0
def evaluation(RMSEP, RMSEP_other, y, y_predict, y_other_predict):
    h = (1 - RMSEP / RMSEP_other) * 100
    #     print y_predict.shape,y.shape
    y_new = np.subtract(y_predict, y).ravel()

    y_old = np.subtract(y_other_predict, y).ravel()
    p = wilcoxon(y_new, y_old)
    #     print p[1] , 666
    return p[1], h
Пример #2
0
def codon_bias(outdir):
    # Data source: https://elifesciences.org/articles/41043
    df = read_pickle('./resource/Proc_strains_codons.df')
    thr = df.loc[['ACT', 'ACA', 'ACC', 'ACG']]
    thr = thr.truediv(thr.sum()).T
    ile = df.loc[['ATT', 'ATA', 'ATC']]
    ile = ile.truediv(ile.sum()).T
    print(wilcoxon(thr['ACT'], thr['ACA']))
    print(wilcoxon(thr['ACC'], thr['ACG']))
    print(wilcoxon(ile['ATT'], ile['ATA']))
    _, ax = plt.subplots(1, figsize=(8.2, 5.2), dpi=144)
    bp = ax.violinplot(concat([thr, ile], axis=1).T,
                       positions=[1, 2, 3, 4, 6, 7, 8])
    for partname in ('cbars', 'cmins', 'cmaxes'):
        vp = bp[partname]
        vp.set_edgecolor('k')
        vp.set_linewidth(1)

    [m.set_color('#0d4c7c') for m in bp['bodies'][:4]]
    [m.set_color('#891919') for m in bp['bodies'][-3:]]
    ax.set_ylim(0, 1.)
    ax.set_xticks(range(1, 9))
    plt.savefig(join(outdir, 'Codon_usage.png'), dpi=144)
Пример #3
0
def pairwise_compare_frame(df, with_p_vals=False):
    table_vals = []
    table_indices = []
    param_keys = set(df.keys()) - set(['test', 'time', 'train',
        'test_sample', 'train_sample'])
    for key in param_keys:
        if key == 'dataset_filename' or key == 'test_filename' or key == 'subject_id':
            continue
        possible_vals = df[key].unique()
        for i_value_a in range(0, len(possible_vals) - 1):
            for i_value_b in range(i_value_a + 1, len(possible_vals)):
                val_a = possible_vals[i_value_a]
                val_b = possible_vals[i_value_b]
                frame_1 = df[df[key] == val_a]
                frame_2 = df[df[key] == val_b]
                other_param_keys = list(param_keys - set([key]))
                joined_frame = frame_1.merge(frame_2, on=other_param_keys)
                if joined_frame.size == 0:
                    continue
                accuracies_a = np.array(joined_frame.test_x,
                    dtype=np.float64)
                accuracies_b = np.array(joined_frame.test_y,
                    dtype=np.float64)
                mean_a = np.mean(accuracies_a)
                mean_b = np.mean(accuracies_b)
                # Always put better value first in table
                if mean_a >= mean_b:
                    accuracies_1 = accuracies_a
                    accuracies_2 = accuracies_b
                    mean_1 = mean_a 
                    mean_2 = mean_b 
                    val_1 = val_a
                    val_2 = val_b
                else:
                    accuracies_1 = accuracies_b
                    accuracies_2 = accuracies_a
                    mean_1 = mean_b 
                    mean_2 = mean_a 
                    val_1 = val_b
                    val_2 = val_a
                if with_p_vals:
                    if len(accuracies_1) <= 18:
                        diff_perm = perm_mean_diff_test(accuracies_1,
                            accuracies_2) * 100
                    elif len(accuracies_1) <= 62:
                        diff_perm = perm_mean_diff_test(accuracies_1,
                            accuracies_2, n_diffs=2**17) * 100
                    else:
                        _, diff_perm = wilcoxon(accuracies_1,
                            accuracies_2)
                        diff_perm *= 100

                diffs = accuracies_2 - accuracies_1
                diff_std = np.std(diffs)
                diff_mean = np.mean(diffs)
                this_vals = [len(accuracies_1), str(val_1), str(val_2),
                    mean_1, mean_2, diff_mean, diff_std]
                if with_p_vals:
                    this_vals.append(diff_perm)
                table_vals.append(this_vals)
                table_indices.append(key)

    if len(table_vals) == 0:
        return None
    table_vals = np.array(table_vals)
    compare_headers = ['n_exp', 'val_1', 'val_2', 'acc_1', 'acc_2',
                       'diff', 'std']
    if with_p_vals:
        compare_headers.append('p_val')
    compare_frame = pd.DataFrame(table_vals, columns=compare_headers,  
                                 index=(table_indices))
    compare_frame = to_numeric_where_possible(compare_frame)
    compare_frame = round_numeric_columns(compare_frame, 1)
    return compare_frame
                elementwise_sum = b_rec + appro_rec
                nz_ids = elementwise_sum.nonzero()
                elementwise_rpd = np.zeros(b_rec.shape)
                elementwise_rpd[nz_ids] = np.divide(elementwise_me[nz_ids],
                                                    elementwise_sum[nz_ids])

                approachi_rpd.append(elementwise_rpd)
                approachi_shapiro.append(shapiro(appro_rec.T.toarray()[0])[1])

                me_improvement.append(
                    len(np.nonzero(elementwise_rpd <= 0)[0]) /
                    elementwise_rpd.shape[0])
                if approachi_label != baseline_label:
                    h_test = b_rec - appro_rec
                    approachi_wilcoxon.append(
                        wilcoxon(h_test.T.toarray()[0])[1])
                    approachi_wilcoxon_pratt.append(
                        wilcoxon(h_test.T.toarray()[0],
                                 zero_method="pratt")[1])
                    del h_test
                else:
                    approachi_wilcoxon.append(1)
                    approachi_wilcoxon_pratt.append(1)

                del b_rec, appro_rec

            approachi_rpd = np.hstack(approachi_rpd)
            results_dataframe.loc[:, "%sMe_std" %
                                  (approachi_label)] = (approachi_rpd /
                                                        2).std(axis=0)
            results_dataframe.loc[:, "%sRPD_mean" %
Пример #5
0
def compute_metrics(expt_settings,
                    methods,
                    datasets,
                    feature_types,
                    embeddings_types,
                    accuracy=1.0,
                    di=0,
                    npairs=0,
                    tag='',
                    remove_seen_from_mean=False,
                    max_no_folds=32,
                    min_folds_desired=0,
                    compute_tr_performance=False,
                    flip_labels=[]):

    expt_settings['acc'] = accuracy
    expt_settings['di'] = di

    row_index = np.zeros(len(methods) * len(datasets), dtype=object)
    columns = np.zeros(len(feature_types) * len(embeddings_types),
                       dtype=object)

    row = 0

    if expt_settings['di'] == 0 or np.ceil(
            np.float(npairs) / np.float(expt_settings['di'])) == 0:
        AL_rounds = np.array([0]).astype(int)
    else:
        AL_rounds = np.arange(expt_settings['di'],
                              npairs + expt_settings['di'],
                              expt_settings['di'],
                              dtype=int)
        #np.arange( np.ceil(np.float(npairs) / np.float(expt_settings['di'])), dtype=int)

    if tag == '':
        ts = time.time()
        tag = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d-%H-%M-%S')

    for d, dataset_next in enumerate(datasets):

        docids = None

        if expt_settings['dataset'] != dataset_next or expt_settings[
                'folds'] is None:
            expt_settings['dataset'] = dataset_next
            expt_settings['folds'], expt_settings[
                'folds_regression'], _, _, _ = load_train_test_data(
                    expt_settings['dataset'])

        for m, expt_settings['method'] in enumerate(methods):

            if d == 0 and m == 0:

                if expt_settings['di'] == 0:
                    results_shape = (len(methods) * len(datasets),
                                     len(feature_types) *
                                     len(embeddings_types),
                                     len(expt_settings['folds']) + 1, 1)
                else:
                    results_shape = (len(methods) * len(datasets),
                                     len(feature_types) *
                                     len(embeddings_types),
                                     len(expt_settings['folds']) + 1,
                                     int(npairs / expt_settings['di']))

                results_f1 = np.zeros(results_shape)
                results_acc = np.zeros(results_shape)
                results_logloss = np.zeros(results_shape)
                results_auc = np.zeros(results_shape)

                results_pearson = np.zeros(results_shape)
                results_spearman = np.zeros(results_shape)
                results_kendall = np.zeros(results_shape)

                tr_results_f1 = np.zeros(results_shape)
                tr_results_acc = np.zeros(results_shape)
                tr_results_logloss = np.zeros(results_shape)
                tr_results_auc = np.zeros(results_shape)

            row_index[row] = expt_settings['method'] + ', ' + expt_settings[
                'dataset']
            col = 0

            for expt_settings['feature_type'] in feature_types:
                if expt_settings['feature_type'] == 'ling':
                    embeddings_to_use = ['']
                else:
                    embeddings_to_use = embeddings_types
                for expt_settings['embeddings_type'] in embeddings_to_use:
                    data, nFolds, resultsdir, resultsfile = load_results_data(
                        data_root_dir, resultsfile_template, expt_settings,
                        max_no_folds)

                    min_folds = min_folds_desired

                    for f in range(nFolds):
                        print("Processing fold %i" % f)
                        if expt_settings[
                                'fold_order'] is None:  # fall back to the order on the current machine
                            fold = list(expt_settings['folds'].keys())[f]
                        else:
                            fold = expt_settings['fold_order'][f]
                            if fold[-2] == "'" and fold[0] == "'":
                                fold = fold[1:-2]
                            elif fold[-1] == "'" and fold[0] == "'":
                                fold = fold[1:-1]
                            expt_settings['fold_order'][f] = fold

                        # look for new-style data in separate files for each fold. Prefer new-style if both are found.
                        foldfile = resultsdir + '/fold%i.pkl' % f
                        if os.path.isfile(foldfile):
                            with open(foldfile, 'rb') as fh:
                                data_f = pickle.load(fh, encoding='latin1')
                        else:  # convert the old stuff to new stuff
                            if data is None:
                                min_folds = f + 1
                                print('Skipping fold with no data %i' % f)
                                print("Skipping results for %s, %s, %s, %s" %
                                      (expt_settings['method'],
                                       expt_settings['dataset'],
                                       expt_settings['feature_type'],
                                       expt_settings['embeddings_type']))
                                print(
                                    "Skipped filename was: %s, old-style results file would be %s"
                                    % (foldfile, resultsfile))
                                continue

                            if not os.path.isdir(resultsdir):
                                os.mkdir(resultsdir)
                            data_f = []
                            for thing in data:
                                if f in thing:
                                    data_f.append(thing[f])
                                else:
                                    data_f.append(thing)
                            with open(foldfile, 'wb') as fh:
                                pickle.dump(data_f, fh)

                        gold_disc, pred_disc, gold_prob, pred_prob, gold_rank, pred_rank, pred_tr_disc, \
                                                    pred_tr_prob, postprocced = get_fold_data(data_f, f, expt_settings,
                                                                                          flip_labels=m in flip_labels)
                        if postprocced:  # data was postprocessed and needs saving
                            with open(foldfile, 'wb') as fh:
                                pickle.dump(data_f, fh)
                        if pred_tr_disc is not None:
                            print(
                                str(pred_tr_disc.shape) + ', ' +
                                str(pred_prob.shape) + ', ' +
                                str(pred_tr_disc.shape[0] +
                                    pred_prob.shape[0]))

                        for AL_round, _ in enumerate(AL_rounds):
                            #print "fold %i " % f
                            #print AL_round
                            if AL_round >= pred_disc.shape[1]:
                                continue
                            results_f1[row, col, f, AL_round] = f1_score(
                                gold_disc[gold_disc != 1],
                                pred_disc[gold_disc != 1, AL_round],
                                average='macro')
                            #skip the don't knows
                            results_acc[row, col, f,
                                        AL_round] = accuracy_score(
                                            gold_disc[gold_disc != 1],
                                            pred_disc[gold_disc != 1,
                                                      AL_round])

                            results_logloss[row, col, f, AL_round] = log_loss(
                                gold_prob[gold_disc != 1],
                                pred_prob[gold_disc != 1, AL_round])

                            results_auc[row, col, f, AL_round] = roc_auc_score(
                                gold_prob[gold_disc != 1],
                                pred_prob[gold_disc != 1, AL_round])  # macro

                            if gold_rank is None and expt_settings[
                                    'folds_regression'] is not None:
                                if docids is None:
                                    _, docids = load_ling_features(
                                        expt_settings['dataset'])
                                # ranking data was not saved in original file. Get it from the expt_settings['folds_regression'] here
                                _, rankscores_test, _, _ = expt_settings[
                                    'folds_regression'].get(fold)["test"]
                                gold_rank = np.array(rankscores_test)

                            if gold_rank is not None and pred_rank is not None:
                                results_pearson[row, col, f,
                                                AL_round] = pearsonr(
                                                    gold_rank,
                                                    pred_rank[:, AL_round])[0]
                                results_spearman[row, col, f,
                                                 AL_round] = spearmanr(
                                                     gold_rank,
                                                     pred_rank[:, AL_round])[0]
                                results_kendall[row, col, f,
                                                AL_round] = kendalltau(
                                                    gold_rank,
                                                    pred_rank[:, AL_round])[0]

                            def mean_unseen(result, remove_seen_from_mean):

                                if not remove_seen_from_mean:
                                    return result

                                N = len(gold_tr)
                                Nseen = (AL_round + 1) * expt_settings['di']
                                Nunseen = (N - Nseen)
                                return (result * N - Nseen) / Nunseen

                            if pred_tr_prob is not None and AL_round < pred_tr_disc.shape[
                                    1] and compute_tr_performance:
                                _, _, gold_tr, _, _, _, _ = expt_settings[
                                    'folds'].get(fold)["training"]
                                gold_tr = np.array(gold_tr)

                                if (gold_tr !=
                                        1).shape[0] != pred_tr_disc.shape[0]:
                                    print("Mismatch in fold %s! %i, %i" %
                                          (fold, (gold_tr != 1).shape[0],
                                           pred_tr_disc.shape[0]))

                                gold_tr_prob = gold_tr / 2.0

                                tr_results_f1[
                                    row, col, f, AL_round] = mean_unseen(
                                        f1_score(gold_tr[gold_tr != 1],
                                                 pred_tr_disc[gold_tr != 1,
                                                              AL_round],
                                                 average='macro'),
                                        remove_seen_from_mean)
                                #skip the don't knows
                                tr_results_acc[
                                    row, col, f, AL_round] = mean_unseen(
                                        accuracy_score(
                                            gold_tr[gold_tr != 1],
                                            pred_tr_disc[gold_tr != 1,
                                                         AL_round]),
                                        remove_seen_from_mean)

                                tr_results_logloss[
                                    row, col, f, AL_round] = mean_unseen(
                                        log_loss(
                                            gold_tr_prob[gold_tr != 1],
                                            pred_tr_prob[gold_tr != 1,
                                                         AL_round]),
                                        remove_seen_from_mean)

                                tr_results_auc[
                                    row, col, f, AL_round] = mean_unseen(
                                        roc_auc_score(
                                            gold_tr_prob[gold_tr != 1],
                                            pred_tr_prob[gold_tr != 1,
                                                         AL_round]),
                                        remove_seen_from_mean)
                            elif pred_tr_prob is not None and AL_round >= pred_tr_disc.shape[
                                    1]:
                                tr_results_f1[row, col, f, AL_round] = 1
                                tr_results_acc[row, col, f, AL_round] = 1
                                tr_results_auc[row, col, f, AL_round] = 1
                                tr_results_logloss[row, col, f, AL_round] = 0

                        for AL_round in range(results_f1.shape[3]):
                            foldrange = np.arange(
                                min_folds, max_no_folds
                            )  # skip any rounds that did not complete when taking the mean
                            foldrange = foldrange[results_f1[row, col,
                                                             foldrange,
                                                             AL_round] != 0]

                            results_f1[row, col, -1, AL_round] = np.mean(
                                results_f1[row, col, foldrange, AL_round],
                                axis=0)
                            results_acc[row, col, -1, AL_round] = np.mean(
                                results_acc[row, col, foldrange, AL_round],
                                axis=0)
                            results_logloss[row, col, -1, AL_round] = np.mean(
                                results_logloss[row, col, foldrange, AL_round],
                                axis=0)
                            results_auc[row, col, -1, AL_round] = np.mean(
                                results_auc[row, col, foldrange, AL_round],
                                axis=0)

                            results_pearson[row, col, -1, AL_round] = np.mean(
                                results_pearson[row, col, foldrange, AL_round],
                                axis=0)
                            results_spearman[row, col, -1, AL_round] = np.mean(
                                results_spearman[row, col, foldrange,
                                                 AL_round],
                                axis=0)
                            results_kendall[row, col, -1, AL_round] = np.mean(
                                results_kendall[row, col, foldrange, AL_round],
                                axis=0)

                            tr_results_f1[row, col, -1, AL_round] = np.mean(
                                tr_results_f1[row, col, foldrange, AL_round],
                                axis=0)
                            tr_results_acc[row, col, -1, AL_round] = np.mean(
                                tr_results_acc[row, col, foldrange, AL_round],
                                axis=0)
                            tr_results_logloss[
                                row, col, -1, AL_round] = np.mean(
                                    tr_results_logloss[row, col, foldrange,
                                                       AL_round],
                                    axis=0)
                            tr_results_auc[row, col, -1, AL_round] = np.mean(
                                tr_results_auc[row, col, foldrange, AL_round],
                                axis=0)

                    print('p-values for %s, %s, %s, %s:' %
                          (expt_settings['dataset'], expt_settings['method'],
                           expt_settings['feature_type'],
                           expt_settings['embeddings_type']))

                    print(
                        wilcoxon(results_f1[0, 0, foldrange, AL_round],
                                 results_f1[row, col, foldrange, AL_round])[1])
                    print(
                        wilcoxon(results_acc[0, 0, foldrange, AL_round],
                                 results_acc[row, col, foldrange,
                                             AL_round])[1])
                    print(
                        wilcoxon(
                            results_logloss[0, 0, foldrange, AL_round],
                            results_logloss[row, col, foldrange, AL_round])[1])
                    print(
                        wilcoxon(results_auc[0, 0, foldrange, AL_round],
                                 results_auc[row, col, foldrange,
                                             AL_round])[1])
                    print(
                        wilcoxon(
                            results_pearson[0, 0, foldrange, AL_round],
                            results_pearson[row, col, foldrange, AL_round])[1])
                    print(
                        wilcoxon(
                            results_spearman[0, 0, foldrange, AL_round],
                            results_spearman[row, col, foldrange,
                                             AL_round])[1])
                    print(
                        wilcoxon(
                            results_kendall[0, 0, foldrange, AL_round],
                            results_kendall[row, col, foldrange, AL_round])[1])

                    if row == 0:  # set the column headers
                        columns[col] = expt_settings[
                            'feature_type'] + ', ' + expt_settings[
                                'embeddings_type']

                    col += 1

            row += 1

    combined_labels = []
    for row in row_index:
        for col in columns:
            combined_labels.append(str(row) + '_' + str(col))

    mean_results = []
    mean_results.append(
        collate_AL_results(AL_rounds, results_f1, combined_labels,
                           "Macro-F1 scores for round %i: "))
    mean_results.append(
        collate_AL_results(AL_rounds, results_acc, combined_labels,
                           "Accuracy (excl. don't knows), round %i:")
    )  # for UKPConvArgStrict don't knows are already ommitted)
    mean_results.append(
        collate_AL_results(AL_rounds, results_auc, combined_labels,
                           "AUC ROC, round %i:"))
    #if AUC is higher than accuracy and F1 score, it suggests that decision boundary is not calibrated or that
    #accuracy may improve if we exclude data points close to the decision boundary
    mean_results.append(
        collate_AL_results(AL_rounds, results_logloss, combined_labels,
                           "Cross Entropy classification error, round %i: "))
    #(quality of the probability labels is taken into account)
    mean_results.append(
        collate_AL_results(AL_rounds, results_pearson, combined_labels,
                           "Pearson's r for round %i: "))
    mean_results.append(
        collate_AL_results(AL_rounds, results_spearman, combined_labels,
                           "Spearman's rho for round %i: "))
    mean_results.append(
        collate_AL_results(AL_rounds, results_kendall, combined_labels,
                           "Kendall's tau for round %i: "))

    if np.any(tr_results_acc):
        mean_results.append(
            collate_AL_results(AL_rounds, tr_results_f1, combined_labels,
                               "(TR) Macro-F1 scores for round %i: "))
        mean_results.append(
            collate_AL_results(AL_rounds, tr_results_acc, combined_labels,
                               "(TR) Accuracy for round %i: "))
        mean_results.append(
            collate_AL_results(AL_rounds, tr_results_auc, combined_labels,
                               "(TR) AUC ROC for round %i: "))
        mean_results.append(
            collate_AL_results(AL_rounds, tr_results_logloss, combined_labels,
                               "(TR) Cross Entropy Error for round %i: "))


#     metricsfile = data_root_dir + 'outputdata/expt_root_dir' + \
#                     'metrics_%s.pkl' % (tag)
#     with open(metricsfile, 'w') as fh:
#         pickle.dump((results_f1, results_acc, results_auc, results_logloss, results_pearson, results_spearman,
#                      results_kendall), fh)

# TODO: Correlations between reasons and features?

# TODO: Correlations between reasons and latent argument features found using preference components?

    return results_f1, results_acc, results_auc, results_logloss, results_pearson, results_spearman, results_kendall,\
            tr_results_f1, tr_results_acc, tr_results_auc, tr_results_logloss, mean_results, combined_labels
from scipy.stats.morestats import wilcoxon
import os
import matplotlib.pyplot as plt

os.chdir('C:/Users/ikdem/PycharmProjects/Thesis_Analysis/Social_Media_Files')

amt_lags = 12

tweet_polarity = 'polarity'

companies = [
    'aNike Stock lagged correlations',
    'bSteven Madden Stock lagged correlations',
    'cSketchers Stock lagged correlations',
    'dWolverine World Wide Stock lagged correlations'
]

for c in companies:
    for lag in range(0, amt_lags + 1):
        list_correlations = []
        for f in range(0, 30):
            try:
                df = pd.read_excel('file' + str(f) + '_laggedcorrelations' +
                                   tweet_polarity + '.xlsx')
                list_correlations.append(df[c][lag])
            except:
                continue
        print(str(f), c, tweet_polarity, lag * 5, wilcoxon(list_correlations))

# for every lag, look at all the files and append all of the correlations for every lag to a list
# create a list of 0's with len(list_from_above), then wilcoxon(list(correlations), list(zeros) for each lag
Пример #7
0
for test in tests:
    for individual_pickle in os.listdir('pickles'):
        if test in individual_pickle:
            print('\n{}\n'.format(re.sub('.pickle', '', individual_pickle)))
            final_results = defaultdict(list)
            with open('pickles/{}'.format(individual_pickle), 'rb') as input:
                evaluations = pickle.load(input)[0]
                for test_type, test_results in evaluations.items():
                    for single_novel in test_results:
                        median_within_novel = numpy.median(single_novel)
                        #var_within_novel = numpy.var(single_novel)
                        final_results[test_type].append(median_within_novel)
                        #final_results[test_type].append(var_within_novel)

            print('Median for common nouns: {}'.format(
                numpy.median(final_results['common_nouns'])))
            print('Median for proper names: {}'.format(
                numpy.median(final_results['proper_names'])))
            z_value, p_value = wilcoxon(final_results['common_nouns'][:59],
                                        final_results['proper_names'][:59])
            print('\nP-value: {}\nEffect size: {}'.format(
                p_value,
                abs(z_value /
                    numpy.sqrt(len(final_results['common_nouns'][:59])))))

            ### STD
            #print('Variance for common nouns: {}'.format(numpy.var(final_results['common_nouns'])))
            #print('Variance for proper names: {}'.format(numpy.var(final_results['proper_names'])))
            #z_value, p_value = wilcoxon(final_results['common_nouns'][:59], final_results['proper_names'][:59])
            #print('\nP-value: {}\nEffect size: {}'.format(p_value, abs(z_value/numpy.sqrt(len(final_results['common_nouns'][:59])))))
Пример #8
0
from scipy.stats.morestats import wilcoxon
from numpy.lib.function_base import average, median

#tree = [96.19047619047619, 96.28571428571429, 95.61904761904762, 96.0, 96.57142857142857, 96.57142857142857, 95.14285714285714, 96.0, 96.19047619047619, 95.9047619047619, 96.28571428571429, 95.61904761904762, 97.33333333333333, 94.85714285714286, 95.14285714285714, 94.28571428571429, 94.19047619047619, 96.38095238095238, 95.04761904761905, 94.85714285714286, 96.19047619047619, 96.38095238095238, 96.38095238095238, 96.47619047619048, 96.19047619047619, 94.57142857142857, 96.38095238095238, 96.47619047619048, 96.47619047619048, 94.95238095238095, 96.19047619047619, 95.14285714285714, 95.71428571428571, 94.85714285714286, 95.9047619047619, 96.66666666666667, 94.95238095238095, 94.57142857142857, 94.19047619047619, 94.19047619047619, 95.9047619047619, 95.9047619047619, 94.66666666666667, 96.0952380952381, 96.0, 95.9047619047619, 97.14285714285714, 96.19047619047619, 96.28571428571429, 96.19047619047619]
#C457NN = [96.0, 94.95238095238095, 94.28571428571429, 95.61904761904762, 95.23809523809524, 96.38095238095238, 95.23809523809524, 96.0, 95.04761904761905, 96.0952380952381, 96.19047619047619, 94.57142857142857, 96.47619047619048, 96.38095238095238, 95.42857142857143, 94.85714285714286, 94.57142857142857, 96.19047619047619, 94.76190476190476, 94.47619047619048, 94.95238095238095, 95.61904761904762, 96.19047619047619, 96.0, 95.04761904761905, 95.33333333333333, 96.28571428571429, 95.52380952380952, 96.47619047619048, 95.61904761904762, 95.80952380952381, 95.14285714285714, 96.0952380952381, 95.04761904761905, 95.33333333333333, 96.85714285714286, 95.23809523809524, 96.0952380952381, 93.52380952380952, 95.52380952380952, 95.71428571428571, 96.0, 94.57142857142857, 96.66666666666667, 96.0, 95.61904761904762, 96.38095238095238, 95.61904761904762, 96.0, 95.61904761904762]


tree = [96.18604651162791, 96.18604651162791, 96.37209302325581, 96.46511627906976, 96.74418604651163, 96.09302325581395, 95.90697674418605, 96.18604651162791, 96.27906976744185, 96.27906976744185, 96.09302325581395, 96.18604651162791, 96.65116279069767, 96.18604651162791, 95.81395348837209, 96.0, 96.55813953488372, 96.18604651162791, 95.72093023255815, 95.72093023255815, 96.0, 95.90697674418605, 96.09302325581395, 96.0, 96.46511627906976, 96.18604651162791, 96.09302325581395, 95.90697674418605, 95.81395348837209, 96.55813953488372, 96.0, 96.46511627906976, 96.0, 96.09302325581395, 96.18604651162791, 95.72093023255815, 96.27906976744185, 95.62790697674419, 94.79069767441861, 95.81395348837209, 96.09302325581395, 96.18604651162791, 96.37209302325581, 96.37209302325581, 96.27906976744185, 96.09302325581395, 96.46511627906976, 96.74418604651163, 96.0, 96.18604651162791]

C457NN = [94.13953488372093, 95.53488372093024, 96.46511627906976, 95.44186046511628, 95.72093023255815, 95.53488372093024, 95.06976744186046, 96.0, 94.88372093023256, 95.81395348837209, 94.97674418604652, 94.69767441860465, 96.37209302325581, 95.25581395348837, 94.79069767441861, 95.53488372093024, 96.46511627906976, 96.0, 95.90697674418605, 95.62790697674419, 95.81395348837209, 94.32558139534883, 95.16279069767442, 94.4186046511628, 94.97674418604652, 96.0, 95.81395348837209, 95.34883720930233, 95.72093023255815, 95.90697674418605, 95.53488372093024, 95.72093023255815, 95.25581395348837, 95.62790697674419, 96.55813953488372, 96.37209302325581, 96.09302325581395, 94.51162790697674, 95.16279069767442, 94.79069767441861, 95.25581395348837, 94.69767441860465, 96.46511627906976, 95.44186046511628, 95.81395348837209, 96.55813953488372, 95.25581395348837, 96.46511627906976, 94.97674418604652, 94.97674418604652]


print 'average tree = ',average(tree)
print 'average C4.5(7NN) = ', average(C457NN)
print 'median tree = ',median(tree)
print 'median C4.5(7NN) = ', median(C457NN)


print 'wilcoxon test for J48 or C4.5(7NN):', wilcoxon(tree, C457NN)


import pandas as pd
from scipy.stats.morestats import wilcoxon

amt_lags = 48
file = 'Own_Classifier'
tweet_stock = 'polarity'

for lag in range(0,amt_lags+1):
    list_correlations = []
    for f in range(2,55):
        try:
            df = pd.read_excel('../thesis_files/Correlations_4_hour_lag/' + \
                               file + str(f) +'_laggedcorrelations_' + tweet_stock + '.xlsx')
            list_correlations.append(df['SP500 lagged correlations'][lag])
        except:
            continue
    print(file, tweet_stock, lag * 5, wilcoxon(list_correlations))


# for every lag, look at all the files and append all of the correlations for every lag to a list
# create a list of 0's with len(list_from_above), then wilcoxon(list(correlations), list(zeros) for each lag
Пример #10
0
def pairwise_compare_frame(df,
                           with_p_vals=False,
                           result_cols=('test', 'time', 'train', 'test_sample',
                                        'train_sample'),
                           compare_col='test'):
    table_vals = []
    table_indices = []
    param_keys = set(df.keys()) - set(list(result_cols))
    for key in param_keys:
        if key == 'dataset_filename' or key == 'test_filename' or key == 'subject_id':
            continue
        possible_vals = df[key].unique()
        for i_value_a in range(0, len(possible_vals) - 1):
            for i_value_b in range(i_value_a + 1, len(possible_vals)):
                val_a = possible_vals[i_value_a]
                val_b = possible_vals[i_value_b]
                frame_1 = df[df[key] == val_a]
                frame_2 = df[df[key] == val_b]
                other_param_keys = list(param_keys - set([key]))
                joined_frame = frame_1.merge(frame_2, on=other_param_keys)
                if joined_frame.size == 0:
                    continue
                accuracies_a = np.array(joined_frame[compare_col + '_x'],
                                        dtype=np.float64)
                accuracies_b = np.array(joined_frame[compare_col + '_y'],
                                        dtype=np.float64)
                mean_a = np.mean(accuracies_a)
                mean_b = np.mean(accuracies_b)
                # Always put better value first in table
                if mean_a >= mean_b:
                    accuracies_1 = accuracies_a
                    accuracies_2 = accuracies_b
                    mean_1 = mean_a
                    mean_2 = mean_b
                    val_1 = val_a
                    val_2 = val_b
                else:
                    accuracies_1 = accuracies_b
                    accuracies_2 = accuracies_a
                    mean_1 = mean_b
                    mean_2 = mean_a
                    val_1 = val_b
                    val_2 = val_a
                if with_p_vals:
                    if len(accuracies_1) <= 18:
                        diff_perm = perm_mean_diff_test(
                            accuracies_1, accuracies_2) * 100
                    elif len(accuracies_1) <= 62:
                        diff_perm = perm_mean_diff_test(
                            accuracies_1, accuracies_2, n_diffs=2**17) * 100
                    else:
                        _, diff_perm = wilcoxon(accuracies_1, accuracies_2)
                        diff_perm *= 100

                diffs = accuracies_2 - accuracies_1
                diff_std = np.std(diffs)
                diff_mean = np.mean(diffs)
                this_vals = [
                    len(accuracies_1),
                    str(val_1),
                    str(val_2), mean_1, mean_2, diff_mean, diff_std
                ]
                if with_p_vals:
                    this_vals.append(diff_perm)
                table_vals.append(this_vals)
                table_indices.append(key)

    if len(table_vals) == 0:
        return None
    table_vals = np.array(table_vals)
    compare_headers = [
        'n_exp', 'val_1', 'val_2', 'acc_1', 'acc_2', 'diff', 'std'
    ]
    if with_p_vals:
        compare_headers.append('p_val')
    compare_frame = pd.DataFrame(table_vals,
                                 columns=compare_headers,
                                 index=(table_indices))
    compare_frame = to_numeric_where_possible(compare_frame)
    compare_frame = round_numeric_columns(compare_frame, 1)
    return compare_frame
Пример #11
0
    def signif(self, files, bucktype='none'):
        """Compute signification of 3 input sets: test, system_output1, system_output2
        """
        if len(files) != 3:
            raise ValueError(
                "You must supply 3 input files for `signif` command")

        if bucktype not in ['none', 'dialog']:
            raise ValueError("Unknown `bucktype`: %r" % bucktype)

        self.logger.debug("Importing scipy")
        from scipy.stats import median, mean, tvar, tstd
        from scipy.stats.morestats import wilcoxon
        from scipy.stats.distributions import norm, t as t
        from scipy import sqrt

        forest1, forest2, forest3 = self.loadForestFiles(files)

        self.logger.info("Processing forests 1 and 2")

        diff1 = {}
        for fn, tree1, tree2, dist, script in self.forestProcessor(
                forest1, forest2):
            H, D, I, S = script.HDIS
            n_errors = D + I + S
            fn = self.filenameKey(fn, bucktype)
            diff1.setdefault(fn, 0.)
            diff1[fn] += n_errors

        self.logger.info("Processing forests 1 and 3")

        diff2 = {}
        for fn, tree1, tree2, dist, script in self.forestProcessor(
                forest1, forest3):
            H, D, I, S = script.HDIS
            n_errors = D + I + S
            fn = self.filenameKey(fn, bucktype)
            diff2.setdefault(fn, 0.)
            diff2[fn] += n_errors

        def mapsswe(x, y):
            xm = mean(x)
            ym = mean(y)
            s = 0.
            n = 0.
            for xi, yi in izip(w1, w2):
                s += ((xi - yi) - (xm - ym))**2
                n += 1

            t_stat = sqrt(n) * abs(xm - ym) / sqrt(s / (n - 1.))
            p_value = t.sf(t_stat, n - 1) * 2
            return t_stat, p_value

        Z_values = []
        w1 = []
        w2 = []
        for key in sorted(diff1.keys()):
            if key not in diff2:
                self.logger.error("Unmatched utterance: %r", key)
                continue
            Na = diff1.pop(key)
            Nb = diff2.pop(key)
            w1.append(Na)
            w2.append(Nb)
            Z_values.append(Na - Nb)

        Z_mean = mean(Z_values)
        Z_median = median(Z_values)
        Z_tvar = tvar(Z_values)
        Z_tstd = tstd(Z_values)

        wilcoxon_t_stat, wilcoxon_p_value = wilcoxon(w1, w2)

        mapsswe_w_stat, mapsswe_p_value = mapsswe(w1, w2)

        fw = sys.stdout
        fw.write("Z stats:\n")
        fw.write("========\n")
        fw.write("  - mean:     %9.3f\n" % Z_mean)
        fw.write("  - median:   %9.3f\n" % Z_median)
        fw.write("  - tvar:     %9.3f\n" % Z_tvar)
        fw.write("  - tstd:     %9.3f\n\n" % Z_tstd)
        fw.write("Wilcoxon test:\n")
        fw.write("==============\n")
        fw.write(
            "  - p-value:  %9.3f (two-tailed) [significant if <= 0.05]\n" %
            wilcoxon_p_value)
        fw.write("  - t-stat:   %9.3f\n\n" % wilcoxon_t_stat)
        fw.write("MAPSSWE test:\n")
        fw.write("=============\n")
        fw.write(
            "  - p-value:  %9.3f (two-tailed) [significant if <= 0.05]\n" %
            mapsswe_p_value)
        fw.write("  - t-stat:   %9.3f\n\n" % mapsswe_w_stat)
useAllAverages.append(average(useAllAttributes1NN))
useAllAverages.append(average(useAllAttributes3NN))
useAllAverages.append(average(useAllAttributes5NN))
useAllAverages.append(average(useAllAttributes7NN))
useAllAverages.append(average(useAllAttributes9NN))
useAllAverages.append(average(useAllAttributes11NN))
useAllAverages.append(average(useAllAttributes13NN))
useAllAverages.append(average(useAllAttributes15NN))
useAllAverages.append(average(useAllAttributes17NN))
useAllAverages.append(average(useAllAttributes19NN))
useAllAverages.append(average(useAllAttributes21NN))
useAllAverages.append(average(useAllAttributes23NN))
useAllAverages.append(average(useAllAttributes25NN))


print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes3NN, useAllAttributes3NN)
print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes5NN, useAllAttributes5NN)
print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes7NN, useAllAttributes7NN)
print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes9NN, useAllAttributes9NN)
print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes11NN, useAllAttributes11NN)
print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes13NN, useAllAttributes13NN)
print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes15NN, useAllAttributes15NN)
print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes17NN, useAllAttributes17NN)

print 'shapiro normality test', shapiro(ignoreAttributes7NN)

createXYSpreadGraph(ignoreAverages, useAllAverages, 'ignore used attributes in KNN', 'use all attributes in KNN')


#the histogram is not very educational... results are not distributed normally. 
createHistogram(ignoreAttributes7NN, useAllAttributes7NN, 'histogram: % correcteness for required data set','7NN ignore attributes used by tree', '7NN use all attributes')
Пример #13
0
#new range:
new_range_ig_naive_bayse = [0.7169642857142857, 0.6879464285714286, 0.7044642857142858, 0.6910714285714286, 0.6941964285714286, 0.6955357142857143, 0.6959821428571429, 0.6915178571428572, 0.6901785714285714, 0.6745535714285714, 0.6821428571428572, 0.6790178571428571, 0.6683035714285714, 0.6669642857142857, 0.6486607142857143, 0.646875, 0.6464285714285715, 0.6455357142857143, 0.6450892857142857, 0.6495535714285714, 0.6508928571428572, 0.6508928571428572, 0.6513392857142857, 0.6504464285714285, 0.646875, 0.646875, 0.6477678571428571, 0.6464285714285715, 0.6464285714285715, ]
new_range_ig_linear_svm = [0.7098214285714286, 0.6861607142857142, 0.6991071428571428, 0.6959821428571429, 0.6825892857142857, 0.6915178571428572, 0.6785714285714286, 0.6959821428571429, 0.6799107142857143, 0.6745535714285714, 0.6763392857142857, 0.6785714285714286, 0.6772321428571428, 0.6638392857142857, 0.6651785714285714, 0.6638392857142857, 0.6642857142857143, 0.6575892857142858, 0.6580357142857143, 0.6598214285714286, 0.6540178571428571, 0.6544642857142857, 0.6571428571428571, 0.6665178571428572, 0.6647321428571429, 0.6616071428571428, 0.6638392857142857, 0.6651785714285714, 0.6584821428571429, ]
new_range_ig_hyperbolic_svm = [0.6964285714285714, 0.6870535714285714, 0.6977678571428572, 0.6941964285714286, 0.6866071428571429, 0.6986607142857143, 0.6848214285714286, 0.6834821428571428, 0.6901785714285714, 0.671875, 0.6700892857142857, 0.6790178571428571, 0.6566964285714286, 0.6611607142857143, 0.6709821428571429, 0.6598214285714286, 0.6678571428571428, 0.6696428571428571, 0.6709821428571429, 0.6602678571428572, 0.6629464285714286, 0.6669642857142857, 0.6714285714285714, 0.6665178571428572, 0.6700892857142857, 0.6584821428571429, 0.6633928571428571, 0.6709821428571429, 0.665625, ]

stochastic_naive_bayse = [0.5205357142857143, 0.565625, 0.6017857142857143, 0.6200892857142857, 0.6352678571428572, 0.6142857142857143, 0.6348214285714285, 0.6433035714285714, 0.6303571428571428, 0.6334821428571429, 0.640625, 0.6410714285714286, 0.6508928571428572, 0.6495535714285714, 0.6486607142857143, 0.659375, 0.6575892857142858, 0.6607142857142857, 0.6714285714285714, 0.6741071428571429, 0.6683035714285714, 0.6776785714285715, 0.6651785714285714, 0.6830357142857143, 0.6852678571428571, 0.6700892857142857, 0.6732142857142858, 0.6803571428571429, 0.6741071428571429, ]
stochastic_linear_svm = [0.5294642857142857, 0.5428571428571428, 0.6013392857142857, 0.6334821428571429, 0.6272321428571429, 0.603125, 0.6129464285714286, 0.640625, 0.6419642857142858, 0.6508928571428572, 0.6544642857142857, 0.6580357142857143, 0.6633928571428571, 0.66875, 0.6714285714285714, 0.6464285714285715, 0.6723214285714286, 0.6785714285714286, 0.6772321428571428, 0.6803571428571429, 0.6803571428571429, 0.6879464285714286, 0.6830357142857143, 0.6839285714285714, 0.7008928571428571, 0.6982142857142857, 0.69375, 0.6745535714285714, 0.6910714285714286, ]
stochastic_hyperbolic_svm = [0.5482142857142858, 0.5732142857142857, 0.6022321428571429, 0.628125, 0.6196428571428572, 0.6366071428571428, 0.6142857142857143, 0.634375, 0.65, 0.6540178571428571, 0.6522321428571428, 0.6763392857142857, 0.6736607142857143, 0.66875, 0.6736607142857143, 0.6660714285714285, 0.6736607142857143, 0.6696428571428571, 0.6607142857142857, 0.6741071428571429, 0.7008928571428571, 0.6857142857142857, 0.6799107142857143, 0.6852678571428571, 0.6830357142857143, 0.6794642857142857, 0.690625, 0.6799107142857143, 0.6897321428571429, ]

pca_hyperbolic_svm = [0.6571428571428571, 0.6352678571428572, 0.6160714285714286, 0.5513392857142857, 0.6169642857142857, 0.6223214285714286, 0.5508928571428572, 0.6352678571428572, 0.6321428571428571, 0.634375, 0.6450892857142857, 0.6267857142857143, 0.6410714285714286, 0.646875, 0.6361607142857143, 0.6459821428571428, 0.6348214285714285, 0.6375, 0.6433035714285714, 0.6424107142857143, 0.6388392857142857, 0.6303571428571428, 0.6419642857142858, 0.6334821428571429, 0.6285714285714286, 0.6375, 0.6330357142857143, 0.6379464285714286, 0.6375, ]
pca_linear_svm = [0.6316964285714286, 0.6196428571428572, 0.5370535714285715, 0.35401785714285716, 0.603125, 0.6017857142857143, 0.60625, 0.6160714285714286, 0.6013392857142857, 0.6053571428571428, 0.621875, 0.6330357142857143, 0.6410714285714286, 0.63125, 0.6241071428571429, 0.6366071428571428, 0.6339285714285714, 0.6370535714285714, 0.6223214285714286, 0.6339285714285714, 0.6348214285714285, 0.6392857142857142, 0.6263392857142858, 0.6223214285714286, 0.6415178571428571, 0.6339285714285714, 0.6357142857142857, 0.6392857142857142, 0.6223214285714286, ]
pca_naive_bayse = [0.3665178571428571, 0.3879464285714286, 0.590625, 0.5785714285714286, 0.378125, 0.5794642857142858, 0.6040178571428572, 0.35625, 0.5785714285714286, 0.5785714285714286, 0.5803571428571429, 0.6236607142857142, 0.5790178571428571, 0.5763392857142857, 0.5816964285714286, 0.5763392857142857, 0.5741071428571428, 0.625, 0.5665178571428572, 0.5696428571428571, 0.5919642857142857, 0.63125, 0.5852678571428571, 0.5816964285714286, 0.5803571428571429, 0.5910714285714286, 0.5848214285714286, 0.5803571428571429, 0.5803571428571429, ]

list1 = new_range_ig_naive_bayse +  new_range_ig_linear_svm + new_range_ig_hyperbolic_svm
list2 = stochastic_naive_bayse + stochastic_linear_svm + stochastic_hyperbolic_svm
list3 = pca_hyperbolic_svm + pca_linear_svm + pca_naive_bayse

print 'ig VS stochastic'
print wilcoxon(list1, list2)
print 'avg ig=', average(list1), ' avg stochastic=', average(list2)

print 'ig VS PCA'
print wilcoxon(list1, list3)
print 'avg ig=', average(list1), ' avg PCA=', average(list3)

print 'stochastic VS PCA'
print wilcoxon(list2, list3)
print 'avg stochastic=', average(list2), ' avg PCA=', average(list3)

    def signif(self, files, bucktype='none'):
        """Compute signification of 3 input sets: test, system_output1, system_output2
        """
        if len(files) != 3:
            raise ValueError("You must supply 3 input files for `signif` command")

        if bucktype not in ['none', 'dialog']:
            raise ValueError("Unknown `bucktype`: %r" % bucktype)

        self.logger.debug("Importing scipy")
        from scipy.stats import median, mean, tvar, tstd
        from scipy.stats.morestats import wilcoxon
        from scipy.stats.distributions import norm, t as t
        from scipy import sqrt

        forest1, forest2, forest3 = self.loadForestFiles(files)

        self.logger.info("Processing forests 1 and 2")

        diff1 = {}
        for fn, tree1, tree2, dist, script in self.forestProcessor(forest1, forest2):
            H, D, I, S = script.HDIS
            n_errors = D+I+S
            fn = self.filenameKey(fn, bucktype)
            diff1.setdefault(fn, 0.)
            diff1[fn] += n_errors

        self.logger.info("Processing forests 1 and 3")

        diff2 = {}
        for fn, tree1, tree2, dist, script in self.forestProcessor(forest1, forest3):
            H, D, I, S = script.HDIS
            n_errors = D+I+S
            fn = self.filenameKey(fn, bucktype)
            diff2.setdefault(fn, 0.)
            diff2[fn] += n_errors

        def mapsswe(x, y):
            xm = mean(x)
            ym = mean(y)
            s = 0.
            n = 0.
            for xi, yi in izip(w1, w2):
                s += ((xi-yi) - (xm-ym))**2
                n += 1

            t_stat = sqrt(n) * abs(xm-ym) / sqrt(s/(n-1.))
            p_value = t.sf(t_stat, n-1) * 2
            return t_stat, p_value

        Z_values = []
        w1 = []
        w2 = []
        for key in sorted(diff1.keys()):
            if key not in diff2:
                self.logger.error("Unmatched utterance: %r", key)
                continue
            Na = diff1.pop(key)
            Nb = diff2.pop(key)
            w1.append(Na)
            w2.append(Nb)
            Z_values.append(Na-Nb)

        Z_mean = mean(Z_values)
        Z_median = median(Z_values)
        Z_tvar = tvar(Z_values)
        Z_tstd = tstd(Z_values)

        wilcoxon_t_stat, wilcoxon_p_value = wilcoxon(w1, w2)

        mapsswe_w_stat, mapsswe_p_value = mapsswe(w1, w2)

        fw = sys.stdout
        fw.write("Z stats:\n")
        fw.write("========\n")
        fw.write("  - mean:     %9.3f\n" % Z_mean)
        fw.write("  - median:   %9.3f\n" % Z_median)
        fw.write("  - tvar:     %9.3f\n" % Z_tvar)
        fw.write("  - tstd:     %9.3f\n\n" % Z_tstd)
        fw.write("Wilcoxon test:\n")
        fw.write("==============\n")
        fw.write("  - p-value:  %9.3f (two-tailed) [significant if <= 0.05]\n" % wilcoxon_p_value)
        fw.write("  - t-stat:   %9.3f\n\n" % wilcoxon_t_stat)
        fw.write("MAPSSWE test:\n")
        fw.write("=============\n")
        fw.write("  - p-value:  %9.3f (two-tailed) [significant if <= 0.05]\n" % mapsswe_p_value)
        fw.write("  - t-stat:   %9.3f\n\n" % mapsswe_w_stat)