Пример #1
0
def has_normal_distribution(dataset, var_data, alpha):
    assert (len(var_data) == 1)
    assert (isinstance(var_data[0], VarData))

    # Must be continuous to be normally distributed
    assert (is_continuous_var(dataset, var_data, alpha))
    # Get data from datasest using var_data's query
    data = get_data(dataset, var_data[0])
    norm_test_results = compute_normal_distribution(data)

    return (norm_test_results[1] > alpha), norm_test_results
Пример #2
0
 def test_compute_normal_distribution(self):
     np.random.seed(0)
     data = [
         3.03319599, -1.9392511, 5.19637956, 4.93035686, 6.67777232,
         3.33221652, 4.48231727, 5.7451167, 1.74927197, -1.13835557,
         4.71136013, 0.94368067, 3.45836918, 4.80279979, 6.41550484,
         5.19735433, 3.68366857, 5.19261866, 6.08280386, 1.21590325,
         4.67839939, 2.15511797, 8.16360168, 4.04091618, 6.77094548
     ]
     returned_value = compute_normal_distribution(data)
     self.assertAlmostEqual(0.2147399485, returned_value.p_value, 10)
     self.assertAlmostEqual(0.9470322132, returned_value.W, 10)
Пример #3
0
def has_groups_normal_distribution(dataset, var_data, alpha):
    xs = []
    ys = []
    cat_xs = []
    cont_ys = []
    grouped_data = []
    result = None

    if isinstance(var_data, CombinedData):
        xs = var_data.get_explanatory_variables()
        ys = var_data.get_explained_variables()

    else:
        for var in var_data:
            if var.role == iv_identifier or var.role == contributor_identifier:
                xs.append(var)
            if var.role == dv_identifier or var.role == outcome_identifier:
                ys.append(var)

    for x in xs:
        if x.is_categorical():
            cat_xs.append(x)

    for y in ys:
        if y.is_continuous():
            cont_ys.append(y)

    if cat_xs and cont_ys:
        for y in ys:
            for x in xs:
                cat = [k for k, v in x.metadata[categories].items()]
                for c in cat:
                    data = dataset.select(
                        y.metadata[name],
                        where=[f"{x.metadata[name]} == '{c}'"])
                    grouped_data.append(data)

                for group in grouped_data:
                    result = compute_normal_distribution(group)
                    if result[1] <= alpha:
                        return False, result

    return True, result