Пример #1
0
def get_rel_responder_abs_df(treatment, con, dfs=None, use_percentage=None, use_labels=None):
    if SELECTION != "resp":
        return
    df_prop, df_resp = get_prop_resp(treatment)
    df_prop[df_resp.columns] = df_resp

    df_prop_full, df_resp_ref = get_prop_resp("t12a")
    resp_values = metrics.get_data(metrics.get_rel_responder_abs_df(df_prop))
    resp_ref_values = metrics.get_data(metrics.get_rel_responder_abs_df(df_prop_full))

    table, res = rp.ttest(pd.Series(resp_values), pd.Series(resp_ref_values), paired=False)
    s = res.results[2]
    p = res.results[3]
    r = res.results[9]
    diff = res.results[0] 
    dof = res.results[1]
    s = res.results[2]
    p = res.results[3]
    r = res.results[9]

    
    print("Conclusion: ", generate_stat_sentence(np.mean(resp_ref_values), np.std(resp_ref_values), np.mean(resp_values), np.std(resp_values), s, p, dof, diff=diff, label1="t12.dss",  label2=treatment+".dss"))


    print("Table:", table)        
    print("Res:", res)

    res = {
        "rel. min_offer T12": metrics.get_mean(resp_ref_values),
        "rel. min_offer T13": metrics.get_mean(resp_values),

        # "rejection_ratio": rejection_ratio(df_prop)
        }
    test_label = f"(ttest independent) H0: equal"
    res = {k: (f"{v:.3f}" if pd.notnull(v) and v!= int(v) else v) for k,v in res.items()}
    res["min_offer" + test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})"


    print()

    return res
Пример #2
0
import os
import os.path
import numpy as np
import xgboost as xgb
from metrics import F1, get_data, apply_PCA, feature_selection

if __name__ == "__main__":
    training_samples, validation_samples, test_samples, training_labels, validation_labels = get_data(
    )
    # training_samples = feature_selection(training_samples, training_labels)
    # validation_samples = feature_selection(validation_samples, validation_labels)
    # test_samples = feature_selection(test_samples)
    # training_samples = apply_PCA(training_samples)
    # validation_samples = apply_PCA(validation_samples)
    # test_samples = apply_PCA(test_samples)
    dtrain = xgb.DMatrix(training_samples, training_labels)
    # dtest = xgb.DMatrix(validation_samples)
    dtest = xgb.DMatrix(test_samples)
    # max_depths = [32, 64, 128, 256, 512]
    # num_rounds = [32, 64, 128, 256, 512]
    # learning_rates = [0.8, 1]
    max_depths = [64]
    num_rounds = [64]
    learning_rates = [0.8]
    for max_depth in max_depths:
        for num_round in num_rounds:
            for learning_rate in learning_rates:
                param = {
                    'max_depth': max_depth,
                    'eta': learning_rate,
                    'silent': 1,
Пример #3
0
def _get_prop_vs_prop_dss_score(treatment,
                                con,
                                dfs=None,
                                use_percentage=None,
                                use_labels=None,
                                metric=None,
                                as_percentage=None,
                                is_categorical=None,
                                alternative=None):
    df_prop, df_resp = get_prop_resp(treatment)
    df_prop_ref, df_resp_ref = get_prop_resp("t11a")

    print(metric.__name__)

    metric_values = metric(df_prop)
    metric_value = metrics.get_mean(metric_values)

    metric_ref_values = metric(df_prop_ref)
    metric_value_ref = metrics.get_mean(metric_ref_values)

    metric_values = metrics.get_data(metric_values)
    metric_ref_values = metrics.get_data(metric_ref_values)

    dof = 0
    diff = None
    if is_categorical:
        table, res = rp.crosstab(pd.Series(metric_ref_values),
                                 pd.Series(metric_values),
                                 test='chi-square')
        s, p, r = res.results.values

        print(
            "Conclusion: ",
            generate_cat_stat_sentence(np.mean(metric_ref_values),
                                       np.std(metric_ref_values),
                                       np.mean(metric_values),
                                       np.std(metric_values),
                                       s,
                                       p,
                                       dof,
                                       diff=diff,
                                       label1="t11a.dss",
                                       label2=treatment + ".dss"))
        test_label = f"(pearson chi2)"
    else:

        #print("Ranksums", stats.ranksums(metric_ref_values, metric_values))

        table, res = rp.ttest(pd.Series(metric_ref_values),
                              pd.Series(metric_values),
                              paired=False)
        s = res.results[2]
        if alternative == "greater":
            p = res.results[4]
        elif alternative == "less":
            p = res.results[5]
        elif alternative in (None, 'two-sided'):
            p = res.results[3]
        r = res.results[9]
        diff = res.results[0]
        dof = res.results[1]
        s = res.results[2]
        p = res.results[3]
        r = res.results[9]

        print(
            "Conclusion: ",
            generate_stat_sentence(np.mean(metric_ref_values),
                                   np.std(metric_ref_values),
                                   np.mean(metric_values),
                                   np.std(metric_values),
                                   s,
                                   p,
                                   dof,
                                   diff=diff,
                                   label1="t11a.dss",
                                   label2=treatment + ".dss"))

        test_label = f"(ttest independent) H0: {'equal' if alternative in {None, 'two-sided'} else alternative}"
    print("RESUME: ", res)
    print("TABLE: ", table)

    if as_percentage:
        res = {
            # "Proposer": f'{100 * prop_value:.2f} %',
            "Proposer + DSS":
            f'{100 * metric_value:.2f} %',
            "T11A ":
            f'{100 * metric_value_ref:.2f} %',
            "prop:dss - auto prop":
            f'{100 * (metric_value - metric_value_ref):.2f} %',
        }
    else:
        res = {
            # "Proposer": f'{prop_value:.2f}',
            "Proposer + DSS": f'{metric_value:.2f}',
            "T11A": f'{metric_value_ref:.2f}',
            "prop:dss - auto prop":
            f'{(metric_value - metric_value_ref):.2f} %',
        }
    res[test_label] = f"{s:.3f} ({p:.3f})"
    if is_categorical:
        res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})"
    else:
        res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})"
    return res
Пример #4
0
def _get_prop_vs_prop_dss_score(treatment,
                                con,
                                dfs=None,
                                use_percentage=None,
                                use_labels=None,
                                metric=None,
                                as_percentage=None,
                                is_categorical=None,
                                alternative=None):
    df_prop, df_resp = get_prop_resp(treatment)
    df_prop_t10, df_resp_t10 = get_prop_resp("t10a")

    metric_values = metric(df_prop)
    metric_value = metrics.get_mean(metric_values)

    metric_t10_values = metric(df_prop_t10)
    metric_value_t10 = metrics.get_mean(metric_t10_values)

    metric_values = metrics.get_data(metric_values)
    metric_t10_values = metrics.get_data(metric_t10_values)
    #print(stats.chisquare(metric_values[:103], metric_t10_values[:103]))

    dof = 0
    diff = None

    print(metric.__name__)
    if is_categorical:
        #table, res = rp.crosstab(pd.Series(metric_values), pd.Series(metric_t10_values), test='g-test')
        table, res = rp.crosstab(pd.Series(metric_values),
                                 pd.Series(metric_t10_values),
                                 test='fisher')
        #print(table, res)
        #s, p, r = res.results
        s = res.results[0]
        p = res.results[1]
        r = res.results[4]

        test_label = f"(g-test chi2)"

        print(
            "Conclusion: ",
            generate_cat_stat_sentence(np.mean(metric_t10_values),
                                       np.std(metric_t10_values),
                                       np.mean(metric_values),
                                       np.std(metric_values),
                                       s,
                                       p,
                                       dof,
                                       diff=diff,
                                       label1="t10a.dss",
                                       label2=treatment + ".dss"))
        print(
            pd.crosstab(pd.Series(metric_t10_values),
                        pd.Series(metric_values)))
    else:

        table, res = rp.ttest(pd.Series(metric_t10_values),
                              pd.Series(metric_values),
                              paired=False)
        s = res.results[2]
        if alternative == "greater":
            p = res.results[4]
        elif alternative == "less":
            p = res.results[5]
        elif alternative in (None, 'two-sided'):
            p = res.results[3]
        r = res.results[9]

        diff = res.results[0]
        dof = res.results[1]
        s = res.results[2]
        p = res.results[3]
        r = res.results[9]

        print(
            "Conclusion: ",
            generate_stat_sentence(np.mean(metric_t10_values),
                                   np.std(metric_t10_values),
                                   np.mean(metric_values),
                                   np.std(metric_values),
                                   s,
                                   p,
                                   dof,
                                   diff=diff,
                                   label1="t10a.dss",
                                   label2=treatment + ".dss"))
        test_label = f"(ttest independent) H0: {'equal' if alternative in {None, 'two-sided'} else alternative}"
    print("TABLE: ", table)
    print("TEST: ", res)

    if as_percentage:
        res = {
            "Proposer + DSS": f'{100 * metric_value:.2f} %',
            "T10": f'{100 * metric_value_t10:.2f} %',
        }
    else:
        res = {
            "Proposer + DSS": f'{metric_value:.2f}',
            "T10": f'{metric_value_t10:.2f}',
        }
    if is_categorical:
        res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})"
    else:
        res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})"
    return res