Пример #1
0
 def test_parse_v2_no_blacklist_after_rename(self):
     self._test_parse_v2('A', {
         'renames': {
             'a': 'b'
         },
         'blacklist': ['c']
     }, RefineSpec({'a': 'b'}))
Пример #2
0
 def test_refine_rename_swap(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': ['a', 'b']}, dtype='category'),
         'A',
         RefineSpec({'a': 'b', 'b': 'a'}),
         pd.DataFrame({'A': ['b', 'a']}, dtype='category')
     )
Пример #3
0
 def test_refine_cast_int_to_str(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': [1, 2]}),
         'A',
         RefineSpec({'1': '2'}),
         pd.DataFrame({'A': ['2', '2']}, dtype='category')
     )
Пример #4
0
 def test_refine_blacklist(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': ['a', 'b']}, dtype='category'),
         'A',
         RefineSpec({}, ['a']),
         pd.DataFrame({'A': ['b']}, dtype='category')
     )
Пример #5
0
 def test_refine_rename_category_to_existing(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a", "b"]}, dtype="category"),
         "A",
         RefineSpec({"b": "a"}),
         pd.DataFrame({"A": ["a", "a"]}, dtype="category"),
     )
Пример #6
0
 def test_refine_ignore_nan(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a", "b", np.nan]}, dtype="category"),
         "A",
         RefineSpec({"b": "a"}),
         pd.DataFrame({"A": ["a", "a", np.nan]}, dtype="category"),
     )
Пример #7
0
 def test_refine_spurious_rename(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a"]}, dtype="category"),
         "A",
         RefineSpec({"b": "c"}),
         pd.DataFrame({"A": ["a"]}, dtype="category"),
     )
Пример #8
0
 def test_refine_rename_empty_category(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": []}, dtype="category"),
         "A",
         RefineSpec({"b": "c"}),
         pd.DataFrame({"A": []}, dtype="category"),
     )
Пример #9
0
 def test_refine_rename_to_new(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a", "b"]}),
         "A",
         RefineSpec({"b": "c"}),
         pd.DataFrame({"A": ["a", "c"]}, dtype="category"),
     )
Пример #10
0
 def test_refine_cast_date_to_str(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': [np.datetime64('2018-08-03T17:12')]}),
         'A',
         RefineSpec({'2018-08-03 17:12:00': 'x'}),
         pd.DataFrame({'A': ['x']}, dtype='category')
     )
Пример #11
0
 def test_parse_v2_no_blacklist_after_rename(self):
     self._test_parse_v2("A", {
         "renames": {
             "a": "b"
         },
         "blacklist": ["c"]
     }, RefineSpec({"a": "b"}))
Пример #12
0
 def test_refine_rename_category_to_new(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': ['a', 'b']}, dtype='category'),
         'A',
         RefineSpec({'b': 'c'}),
         pd.DataFrame({'A': ['a', 'c']}, dtype='category')
     )
Пример #13
0
 def test_parse_v0_cascade_rename(self):
     self._test_parse_v0(
         "A",
         [
             {
                 "type": "change",
                 "column": "A",
                 "content": {
                     "fromVal": "x",
                     "toVal": "y"
                 },
             },
             {
                 "type": "change",
                 "column": "A",
                 "content": {
                     "fromVal": "y",
                     "toVal": "z"
                 },
             },
         ],
         RefineSpec({
             "x": "z",
             "y": "z"
         }),
     )
Пример #14
0
 def test_parse_v0_filter(self):
     self._test_parse_v0('A', [{
         'type': 'select',
         'column': 'A',
         'content': {
             'value': 'foo'
         }
     }], RefineSpec())
Пример #15
0
 def test_parse_v0_ignore_wrong_column(self):
     self._test_parse_v0('A', [{
         'type': 'select',
         'column': 'B',
         'content': {
             'value': 'foo'
         }
     }], RefineSpec())
Пример #16
0
 def test_parse_v0_rename(self):
     self._test_parse_v0(
         'A',
         [
             {'type': 'change', 'column': 'A',
              'content': {'fromVal': 'x', 'toVal': 'y'}},
         ],
         RefineSpec({'x': 'y'})
     )
Пример #17
0
 def test_parse_v0_filter_multiple(self):
     self._test_parse_v0(
         'A',
         [
           {'type': 'select', 'column': 'A', 'content': {'value': 'foo'}},
           {'type': 'select', 'column': 'A', 'content': {'value': 'foo'}},
         ],
         RefineSpec(blacklist=[])
     )
Пример #18
0
 def test_parse_v0_ignore_wrong_column(self):
     self._test_parse_v0(
         "A",
         [{
             "type": "select",
             "column": "B",
             "content": {
                 "value": "foo"
             }
         }],
         RefineSpec(),
     )
Пример #19
0
 def test_parse_v0_filter(self):
     self._test_parse_v0(
         "A",
         [{
             "type": "select",
             "column": "A",
             "content": {
                 "value": "foo"
             }
         }],
         RefineSpec(),
     )
Пример #20
0
 def test_refine_rename_nan_category(self):
     # Get explicit, because pandas botches the comparison:
     #
     # >>> self._test_refine_spec_apply(
     #     pd.DataFrame({'A': [np.nan]}, dtype='category'),
     #     'A', RefineSpec({'b': 'c'}),
     #     pd.DataFrame({'A': [np.nan]}, dtype='category')
     # )
     # Attribute "dtype" are different
     # [left]:  CategoricalDtype(categories=[], ordered=False)
     # [right]: CategoricalDtype(categories=[], ordered=False)
     spec = RefineSpec({'b': 'c'})
     dtype1 = pd.DataFrame({'A': []}, dtype='category')['A'].dtype
     result = spec.apply(pd.DataFrame({'A': []}, dtype='category'), 'A')
     dtype2 = result['A'].dtype
     self.assertEqual(0, len(result))
     self.assertEqual(0, len(result['A'].cat.categories))
Пример #21
0
    def test_parse_v0_no_blacklist_after_rename(self):
        # The old logic would run one edit at a time, modifying the dataframe
        # each time and adding a separate "selected" column. When the user
        # added a 'change', the old logic would check the 'selected' of the
        # destination value.
        #
        # ... this was a stateful and confusing way of accomplishing something
        # terribly simple: rename first, then filter.
        #
        # Unfortunately, the behavior would depend on the values in the table.
        # Now we don't: the user edits a set of instructions, not direct table
        # values. Before, in this example, 'y' might be selected or it might
        # be deselected. Now, after the upgrade, it's deselected. This isn't
        # strictly compatible, but how hard are we meant to work on supporting
        # this old format?

        # UPDATE 1/29/2019
        # New Refine module does not filter and therefore does not make use of the blacklist.
        # blacklist now omitted from RefineSpec, so only rename should be included
        self._test_parse_v0(
            'A',
            [
                {
                    'type': 'select',
                    'column': 'A',
                    'content': {
                        'value': 'x'
                    }
                },
                {
                    'type': 'change',
                    'column': 'A',
                    'content': {
                        'fromVal': 'x',
                        'toVal': 'y'
                    }
                },
            ],
            RefineSpec({'x': 'y'})  # opinionated
        )
Пример #22
0
 def test_parse_v3_only_rename(self):
     self._test_parse_v2("A", {"renames": {
         "a": "b"
     }}, RefineSpec({"a": "b"}))
Пример #23
0
 def test_parse_v3_only_rename(self):
     self._test_parse_v2('A', {'renames': {
         'a': 'b'
     }}, RefineSpec({'a': 'b'}))
Пример #24
0
 def test_refine_ignore_nan(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': ['a', 'b', np.nan]}, dtype='category'), 'A',
         RefineSpec({'b': 'a'}),
         pd.DataFrame({'A': ['a', 'a', np.nan]}, dtype='category'))
Пример #25
0
 def test_refine_rename_empty_category(self):
     self._test_refine_spec_apply(pd.DataFrame({'A': []}, dtype='category'),
                                  'A', RefineSpec({'b': 'c'}),
                                  pd.DataFrame({'A': []}, dtype='category'))
Пример #26
0
 def test_refine_spurious_rename(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': ['a']}, dtype='category'), 'A',
         RefineSpec({'b': 'c'}), pd.DataFrame({'A': ['a']},
                                              dtype='category'))