예제 #1
0
def test_filter_subsets(min, max):
    data = generate_samples(0, 5000, 3)
    upset_data = UpSet(data, subset_size='auto')
    subset_upset_data = UpSet(data,
                              subset_size='auto',
                              min_subset_size=min,
                              max_subset_size=max)
    intersections = upset_data.intersections
    df = upset_data._df
    subset_intersections = intersections[np.logical_and(
        intersections >= min, intersections <= max)]
    subset_df = df[df.index.isin(subset_intersections.index)]
    assert_series_equal(subset_upset_data.intersections, subset_intersections)

    def _pack_binary(X):
        X = pd.DataFrame(X)
        out = 0
        for i, (_, col) in enumerate(X.items()):
            out *= 2
            out += col
        return out

    subset_df_packed = _pack_binary(subset_df.index.to_frame())
    subset_data_packed = _pack_binary(subset_intersections.index.to_frame())
    subset_df['_bin'] = pd.Series(subset_df_packed).map(
        pd.Series(np.arange(len(subset_data_packed)),
                  index=subset_data_packed))
    assert_frame_equal(subset_upset_data._df, subset_df)
예제 #2
0
def test_not_unique(sort_by, sort_categories_by):
    kw = {
        'sort_by': sort_by,
        'sort_categories_by': sort_categories_by,
        'subset_size': 'sum',
        'sum_over': None
    }
    Xagg = generate_counts()
    df1, intersections1, totals1 = _process_data(Xagg, **kw)
    Xunagg = generate_samples()['value']
    Xunagg.loc[:] = 1
    df2, intersections2, totals2 = _process_data(Xunagg, **kw)
    assert_series_equal(intersections1, intersections2, check_dtype=False)
    assert_series_equal(totals1, totals2, check_dtype=False)
    assert set(df1.columns) == {'_value', '_bin'}
    assert set(df2.columns) == {'_value', '_bin'}
    assert len(df2) == len(Xunagg)
    assert df2['_bin'].nunique() == len(intersections2)
예제 #3
0
    if sort_categories_by:
        assert is_ascending(totals.values[::-1])

    assert np.all(totals.index.values == intersections.index.names)

    assert np.all(df.index.names == intersections.index.names)
    assert set(df.columns) == {'_value', '_bin'}
    assert_index_equal(df['_value'].reorder_levels(x.index.names).index,
                       x.index)
    assert_array_equal(df['_value'], x)
    assert_index_equal(intersections.iloc[df['_bin']].index, df.index)
    assert len(df) == len(x)


@pytest.mark.parametrize('x', [
    generate_samples()['value'],
    generate_counts(),
])
def test_subset_size_series(x):
    kw = {
        'sort_by': 'cardinality',
        'sort_categories_by': 'cardinality',
        'sum_over': None
    }
    df_sum, intersections_sum, totals_sum = _process_data(x,
                                                          subset_size='sum',
                                                          **kw)

    if x.index.is_unique:
        expected_warning = None
    else: