def test_filter_subsets(min, max): data = generate_samples(0, 5000, 3) upset_data = UpSet(data, subset_size='auto') subset_upset_data = UpSet(data, subset_size='auto', min_subset_size=min, max_subset_size=max) intersections = upset_data.intersections df = upset_data._df subset_intersections = intersections[np.logical_and( intersections >= min, intersections <= max)] subset_df = df[df.index.isin(subset_intersections.index)] assert_series_equal(subset_upset_data.intersections, subset_intersections) def _pack_binary(X): X = pd.DataFrame(X) out = 0 for i, (_, col) in enumerate(X.items()): out *= 2 out += col return out subset_df_packed = _pack_binary(subset_df.index.to_frame()) subset_data_packed = _pack_binary(subset_intersections.index.to_frame()) subset_df['_bin'] = pd.Series(subset_df_packed).map( pd.Series(np.arange(len(subset_data_packed)), index=subset_data_packed)) assert_frame_equal(subset_upset_data._df, subset_df)
def test_not_unique(sort_by, sort_categories_by): kw = { 'sort_by': sort_by, 'sort_categories_by': sort_categories_by, 'subset_size': 'sum', 'sum_over': None } Xagg = generate_counts() df1, intersections1, totals1 = _process_data(Xagg, **kw) Xunagg = generate_samples()['value'] Xunagg.loc[:] = 1 df2, intersections2, totals2 = _process_data(Xunagg, **kw) assert_series_equal(intersections1, intersections2, check_dtype=False) assert_series_equal(totals1, totals2, check_dtype=False) assert set(df1.columns) == {'_value', '_bin'} assert set(df2.columns) == {'_value', '_bin'} assert len(df2) == len(Xunagg) assert df2['_bin'].nunique() == len(intersections2)
if sort_categories_by: assert is_ascending(totals.values[::-1]) assert np.all(totals.index.values == intersections.index.names) assert np.all(df.index.names == intersections.index.names) assert set(df.columns) == {'_value', '_bin'} assert_index_equal(df['_value'].reorder_levels(x.index.names).index, x.index) assert_array_equal(df['_value'], x) assert_index_equal(intersections.iloc[df['_bin']].index, df.index) assert len(df) == len(x) @pytest.mark.parametrize('x', [ generate_samples()['value'], generate_counts(), ]) def test_subset_size_series(x): kw = { 'sort_by': 'cardinality', 'sort_categories_by': 'cardinality', 'sum_over': None } df_sum, intersections_sum, totals_sum = _process_data(x, subset_size='sum', **kw) if x.index.is_unique: expected_warning = None else: