def test_parse_v2_no_blacklist_after_rename(self): self._test_parse_v2('A', { 'renames': { 'a': 'b' }, 'blacklist': ['c'] }, RefineSpec({'a': 'b'}))
def test_refine_rename_swap(self): self._test_refine_spec_apply( pd.DataFrame({'A': ['a', 'b']}, dtype='category'), 'A', RefineSpec({'a': 'b', 'b': 'a'}), pd.DataFrame({'A': ['b', 'a']}, dtype='category') )
def test_refine_cast_int_to_str(self): self._test_refine_spec_apply( pd.DataFrame({'A': [1, 2]}), 'A', RefineSpec({'1': '2'}), pd.DataFrame({'A': ['2', '2']}, dtype='category') )
def test_refine_blacklist(self): self._test_refine_spec_apply( pd.DataFrame({'A': ['a', 'b']}, dtype='category'), 'A', RefineSpec({}, ['a']), pd.DataFrame({'A': ['b']}, dtype='category') )
def test_refine_rename_category_to_existing(self): self._test_refine_spec_apply( pd.DataFrame({"A": ["a", "b"]}, dtype="category"), "A", RefineSpec({"b": "a"}), pd.DataFrame({"A": ["a", "a"]}, dtype="category"), )
def test_refine_ignore_nan(self): self._test_refine_spec_apply( pd.DataFrame({"A": ["a", "b", np.nan]}, dtype="category"), "A", RefineSpec({"b": "a"}), pd.DataFrame({"A": ["a", "a", np.nan]}, dtype="category"), )
def test_refine_spurious_rename(self): self._test_refine_spec_apply( pd.DataFrame({"A": ["a"]}, dtype="category"), "A", RefineSpec({"b": "c"}), pd.DataFrame({"A": ["a"]}, dtype="category"), )
def test_refine_rename_empty_category(self): self._test_refine_spec_apply( pd.DataFrame({"A": []}, dtype="category"), "A", RefineSpec({"b": "c"}), pd.DataFrame({"A": []}, dtype="category"), )
def test_refine_rename_to_new(self): self._test_refine_spec_apply( pd.DataFrame({"A": ["a", "b"]}), "A", RefineSpec({"b": "c"}), pd.DataFrame({"A": ["a", "c"]}, dtype="category"), )
def test_refine_cast_date_to_str(self): self._test_refine_spec_apply( pd.DataFrame({'A': [np.datetime64('2018-08-03T17:12')]}), 'A', RefineSpec({'2018-08-03 17:12:00': 'x'}), pd.DataFrame({'A': ['x']}, dtype='category') )
def test_parse_v2_no_blacklist_after_rename(self): self._test_parse_v2("A", { "renames": { "a": "b" }, "blacklist": ["c"] }, RefineSpec({"a": "b"}))
def test_refine_rename_category_to_new(self): self._test_refine_spec_apply( pd.DataFrame({'A': ['a', 'b']}, dtype='category'), 'A', RefineSpec({'b': 'c'}), pd.DataFrame({'A': ['a', 'c']}, dtype='category') )
def test_parse_v0_cascade_rename(self): self._test_parse_v0( "A", [ { "type": "change", "column": "A", "content": { "fromVal": "x", "toVal": "y" }, }, { "type": "change", "column": "A", "content": { "fromVal": "y", "toVal": "z" }, }, ], RefineSpec({ "x": "z", "y": "z" }), )
def test_parse_v0_filter(self): self._test_parse_v0('A', [{ 'type': 'select', 'column': 'A', 'content': { 'value': 'foo' } }], RefineSpec())
def test_parse_v0_ignore_wrong_column(self): self._test_parse_v0('A', [{ 'type': 'select', 'column': 'B', 'content': { 'value': 'foo' } }], RefineSpec())
def test_parse_v0_rename(self): self._test_parse_v0( 'A', [ {'type': 'change', 'column': 'A', 'content': {'fromVal': 'x', 'toVal': 'y'}}, ], RefineSpec({'x': 'y'}) )
def test_parse_v0_filter_multiple(self): self._test_parse_v0( 'A', [ {'type': 'select', 'column': 'A', 'content': {'value': 'foo'}}, {'type': 'select', 'column': 'A', 'content': {'value': 'foo'}}, ], RefineSpec(blacklist=[]) )
def test_parse_v0_ignore_wrong_column(self): self._test_parse_v0( "A", [{ "type": "select", "column": "B", "content": { "value": "foo" } }], RefineSpec(), )
def test_parse_v0_filter(self): self._test_parse_v0( "A", [{ "type": "select", "column": "A", "content": { "value": "foo" } }], RefineSpec(), )
def test_refine_rename_nan_category(self): # Get explicit, because pandas botches the comparison: # # >>> self._test_refine_spec_apply( # pd.DataFrame({'A': [np.nan]}, dtype='category'), # 'A', RefineSpec({'b': 'c'}), # pd.DataFrame({'A': [np.nan]}, dtype='category') # ) # Attribute "dtype" are different # [left]: CategoricalDtype(categories=[], ordered=False) # [right]: CategoricalDtype(categories=[], ordered=False) spec = RefineSpec({'b': 'c'}) dtype1 = pd.DataFrame({'A': []}, dtype='category')['A'].dtype result = spec.apply(pd.DataFrame({'A': []}, dtype='category'), 'A') dtype2 = result['A'].dtype self.assertEqual(0, len(result)) self.assertEqual(0, len(result['A'].cat.categories))
def test_parse_v0_no_blacklist_after_rename(self): # The old logic would run one edit at a time, modifying the dataframe # each time and adding a separate "selected" column. When the user # added a 'change', the old logic would check the 'selected' of the # destination value. # # ... this was a stateful and confusing way of accomplishing something # terribly simple: rename first, then filter. # # Unfortunately, the behavior would depend on the values in the table. # Now we don't: the user edits a set of instructions, not direct table # values. Before, in this example, 'y' might be selected or it might # be deselected. Now, after the upgrade, it's deselected. This isn't # strictly compatible, but how hard are we meant to work on supporting # this old format? # UPDATE 1/29/2019 # New Refine module does not filter and therefore does not make use of the blacklist. # blacklist now omitted from RefineSpec, so only rename should be included self._test_parse_v0( 'A', [ { 'type': 'select', 'column': 'A', 'content': { 'value': 'x' } }, { 'type': 'change', 'column': 'A', 'content': { 'fromVal': 'x', 'toVal': 'y' } }, ], RefineSpec({'x': 'y'}) # opinionated )
def test_parse_v3_only_rename(self): self._test_parse_v2("A", {"renames": { "a": "b" }}, RefineSpec({"a": "b"}))
def test_parse_v3_only_rename(self): self._test_parse_v2('A', {'renames': { 'a': 'b' }}, RefineSpec({'a': 'b'}))
def test_refine_ignore_nan(self): self._test_refine_spec_apply( pd.DataFrame({'A': ['a', 'b', np.nan]}, dtype='category'), 'A', RefineSpec({'b': 'a'}), pd.DataFrame({'A': ['a', 'a', np.nan]}, dtype='category'))
def test_refine_rename_empty_category(self): self._test_refine_spec_apply(pd.DataFrame({'A': []}, dtype='category'), 'A', RefineSpec({'b': 'c'}), pd.DataFrame({'A': []}, dtype='category'))
def test_refine_spurious_rename(self): self._test_refine_spec_apply( pd.DataFrame({'A': ['a']}, dtype='category'), 'A', RefineSpec({'b': 'c'}), pd.DataFrame({'A': ['a']}, dtype='category'))