Пример #1
0
def test_valid_percentages():
    # these are valid:
    assert_valid_percent(0.5)
    assert_valid_percent(0.9)
    assert_valid_percent(0.1)

    # these will fail:
    assert_raises(ValueError, assert_valid_percent, x=0.0)
    assert_raises(ValueError, assert_valid_percent, x=1.0)

    # these will pass:
    assert_valid_percent(x=1.0, eq_upper=True)
    assert_valid_percent(x=0.0, eq_lower=True)
Пример #2
0
def insert_missing_values(df, percent_rows, random_state=None):
    """
    Inserts missing values into a data frame.

    :param df: data frame we're operating on
    :param percent_rows: the percentage of rows that should have a missing value.
    :param random_state: the numpy RandomState
    :return: a df with missing values
    """
    # get the initialized random_state (if not already initialized)
    random_state = get_random_state(random_state)
    df = df.copy()

    def _insert_random_null(x):
        """
        Chose a random column in a df row to null. This
        operates in-place. But it's on the copy, so it should be OK.

        :param x: the data frame
        """
        # -1 because last col will always be y
        x[random_state.randint(0, len(x) - 1)] = np.nan
        return x

    # this is a "truthy" check. If it's zero or False, this will work.
    if not percent_rows:
        return df
    else:
        # otherwise validate that it's a float
        percent_rows = assert_valid_percent(percent_rows, eq_upper=True)  # eq_lower not necessary because != 0.
        sample_index = df.sample(frac=percent_rows, random_state=random_state).index  # random sample of rows to null
        df.loc[sample_index] = df.loc[sample_index].apply(_insert_random_null, axis=1)
        return df
Пример #3
0
def test_score_dataset_multiclass():
    results = score_dataset(y_file="y_test.csv", y_hat_file="y_hat_test.csv")
    assert_valid_percent(results[0], "Not a valid percent for Accuracy")
Пример #4
0
def test_score_datasetclassification():
    results = score_dataset(y_file="y_test.csv", y_hat_file="y_hat_test.csv")
    assert_valid_percent(results[0], "Not a valid percent for AUC")
Пример #5
0
def test_invalid_percetages(pct):
    with pytest.raises(ValueError):
        assert_valid_percent(x=pct)
Пример #6
0
def test_valid_percentages(pct, kwargs):
    assert_valid_percent(pct, **kwargs)