示例#1
0
def test_fast_cvm(n_samples=1000):
    random = RandomState()
    data1 = random.uniform(size=n_samples)
    weights1 = random.uniform(size=n_samples)
    mask = random.uniform(size=n_samples) > 0.5
    data2 = data1[mask]
    weights2 = weights1[mask]
    a = cvm_2samp(data1, data2, weights1, weights2)
    prepared_data1, prepared_weights1, F1 = prepare_distribution(data1, weights1)
    b = _cvm_2samp_fast(prepared_data1, data2, prepared_weights1, weights2, cdf1=F1)
    assert numpy.allclose(a, b)
示例#2
0
def test_ks2samp_fast(size=1000):
    y1 = RandomState().uniform(size=size)
    y2 = y1[RandomState().uniform(size=size) > 0.5]
    a = ks_2samp(y1, y2)[0]
    prep_data, prep_weights, prep_F = prepare_distribution(y1, numpy.ones(len(y1)))
    b = _ks_2samp_fast(prep_data, y2, prep_weights, numpy.ones(len(y2)), cdf1=prep_F)
    c = _ks_2samp_fast(prep_data, y2, prep_weights, numpy.ones(len(y2)), cdf1=prep_F)
    d = ks_2samp_weighted(y1, y2, numpy.ones(len(y1)) / 3, numpy.ones(len(y2)) / 4)
    assert numpy.allclose(a, b, rtol=1e-2, atol=1e-3)
    assert numpy.allclose(b, c)
    assert numpy.allclose(b, d)
    print('ks2samp is ok')
def group_based_cvm(y_pred, mask, sample_weight, groups_indices):
    y_pred = column_or_1d(y_pred)
    sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight)
    group_weights = compute_group_weights_by_indices(groups_indices, sample_weight=sample_weight)

    result = 0.
    global_data, global_weight, global_F = prepare_distribution(y_pred[mask], weights=sample_weight[mask])
    for group, group_weight in zip(groups_indices, group_weights):
        local_distribution = y_pred[group]
        local_weights = sample_weight[group]
        result += group_weight * _cvm_2samp_fast(global_data, local_distribution,
                                                 global_weight, local_weights, global_F)
    return result
def groups_based_ks(y_pred, mask, sample_weight, groups_indices):
    """Kolmogorov-Smirnov flatness on groups """
    assert len(y_pred) == len(sample_weight) == len(mask)
    group_weights = compute_group_weights_by_indices(groups_indices, sample_weight=sample_weight)
    prepared_data, prepared_weight, prep_F = prepare_distribution(y_pred[mask], weights=sample_weight[mask])

    result = 0.
    for group_weight, group_indices in zip(group_weights, groups_indices):
        local_distribution = y_pred[group_indices]
        local_weights = sample_weight[group_indices]
        result += group_weight * \
                  _ks_2samp_fast(prepared_data, local_distribution, prepared_weight, local_weights, prep_F)
    return result
示例#5
0
def groups_based_ks(y_pred, mask, sample_weight, groups_indices):
    """Kolmogorov-Smirnov flatness on groups """
    assert len(y_pred) == len(sample_weight) == len(mask)
    group_weights = compute_group_weights_by_indices(
        groups_indices, sample_weight=sample_weight)
    prepared_data, prepared_weight, prep_F = prepare_distribution(
        y_pred[mask], weights=sample_weight[mask])

    result = 0.
    for group_weight, group_indices in zip(group_weights, groups_indices):
        local_distribution = y_pred[group_indices]
        local_weights = sample_weight[group_indices]
        result += group_weight * \
                  _ks_2samp_fast(prepared_data, local_distribution, prepared_weight, local_weights, prep_F)
    return result
示例#6
0
def group_based_cvm(y_pred, mask, sample_weight, groups_indices):
    y_pred = column_or_1d(y_pred)
    sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight)
    group_weights = compute_group_weights_by_indices(
        groups_indices, sample_weight=sample_weight)

    result = 0.
    global_data, global_weight, global_F = prepare_distribution(
        y_pred[mask], weights=sample_weight[mask])
    for group, group_weight in zip(groups_indices, group_weights):
        local_distribution = y_pred[group]
        local_weights = sample_weight[group]
        result += group_weight * _cvm_2samp_fast(
            global_data, local_distribution, global_weight, local_weights,
            global_F)
    return result
def bin_based_cvm(y_pred, sample_weight, bin_indices):
    """Cramer-von Mises similarity, quite slow meanwhile"""
    assert len(y_pred) == len(sample_weight) == len(bin_indices)
    bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight)

    result = 0.
    global_data, global_weight, global_F = prepare_distribution(y_pred, weights=sample_weight)

    for bin, bin_weight in enumerate(bin_weights):
        if bin_weight <= 0:
            continue
        bin_mask = bin_indices == bin
        local_distribution = y_pred[bin_mask]
        local_weights = sample_weight[bin_mask]
        result += bin_weight * _cvm_2samp_fast(global_data, local_distribution,
                                               global_weight, local_weights, global_F)

    return result
def bin_based_ks(y_pred, mask, sample_weight, bin_indices):
    """Kolmogorov-Smirnov flatness on bins"""
    assert len(y_pred) == len(sample_weight) == len(bin_indices) == len(mask)
    y_pred = y_pred[mask]
    sample_weight = sample_weight[mask]
    bin_indices = bin_indices[mask]

    bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight)
    prepared_data, prepared_weight, prep_F = prepare_distribution(y_pred, weights=sample_weight)

    result = 0.
    for bin, bin_weight in enumerate(bin_weights):
        if bin_weight <= 0:
            continue
        local_distribution = y_pred[bin_indices == bin]
        local_weights = sample_weight[bin_indices == bin]
        result += bin_weight * \
                  _ks_2samp_fast(prepared_data, local_distribution, prepared_weight, local_weights, prep_F)
    return result
示例#9
0
def bin_based_cvm(y_pred, sample_weight, bin_indices):
    """Cramer-von Mises similarity, quite slow meanwhile"""
    assert len(y_pred) == len(sample_weight) == len(bin_indices)
    bin_weights = compute_bin_weights(bin_indices=bin_indices,
                                      sample_weight=sample_weight)

    result = 0.
    global_data, global_weight, global_F = prepare_distribution(
        y_pred, weights=sample_weight)

    for bin, bin_weight in enumerate(bin_weights):
        if bin_weight <= 0:
            continue
        bin_mask = bin_indices == bin
        local_distribution = y_pred[bin_mask]
        local_weights = sample_weight[bin_mask]
        result += bin_weight * _cvm_2samp_fast(global_data, local_distribution,
                                               global_weight, local_weights,
                                               global_F)

    return result
示例#10
0
def bin_based_ks(y_pred, mask, sample_weight, bin_indices):
    """Kolmogorov-Smirnov flatness on bins"""
    assert len(y_pred) == len(sample_weight) == len(bin_indices) == len(mask)
    y_pred = y_pred[mask]
    sample_weight = sample_weight[mask]
    bin_indices = bin_indices[mask]

    bin_weights = compute_bin_weights(bin_indices=bin_indices,
                                      sample_weight=sample_weight)
    prepared_data, prepared_weight, prep_F = prepare_distribution(
        y_pred, weights=sample_weight)

    result = 0.
    for bin, bin_weight in enumerate(bin_weights):
        if bin_weight <= 0:
            continue
        local_distribution = y_pred[bin_indices == bin]
        local_weights = sample_weight[bin_indices == bin]
        result += bin_weight * \
                  _ks_2samp_fast(prepared_data, local_distribution, prepared_weight, local_weights, prep_F)
    return result