Пример #1
0
    def _combine_filters(self, ext_filters):
        """
        Combines existing filter in ``self.filter`` with additional filters.

        Parameters
        ----------
        ext_filters: dict
            dict with variable name as key, str or tupel of str, and logic
            as value. For example:
            ext_filters = {'q1': {'gender': 1}}

        Returns
        -------
        new_filter: dict {'new_filter_name': intersection([old_logic, new_logic])}
        """
        old_filter = self.filter
        no_global_filter = old_filter == 'no_filter'
        if no_global_filter:
            combined_name = '(no_filter)+({})'.format(ext_filters.keys()[0])
            new_filter = {combined_name: ext_filters.values()[0]}
        else:
            old_filter_name = old_filter.keys()[0]
            old_filter_logic = old_filter.values()[0]
            new_filter_name = ext_filters.keys()[0]
            new_filter_logic = ext_filters.values()[0]
            combined_name = '({})+({})'.format(old_filter_name, new_filter_name)
            combined_logic = intersection([old_filter_logic, new_filter_logic])
            new_filter = {combined_name: combined_logic}
        return new_filter
Пример #2
0
 def test_nested_logic_list(self):
     q2 = self.example_data_A_data['q2']
     test_logic = intersection(
         [union([has_all([1, 2]), has_any([3, 4])]),
          has_count([3])])
     idx, vkey = get_logic_index(q2, test_logic)
     idx1, vkey1 = get_logic_index(q2, has_all([1, 2]))
     idx2, vkey2 = get_logic_index(q2, has_any([3, 4]))
     idx3, vkey3 = get_logic_index(q2, has_count([3]))
     self.assertItemsEqual(idx, idx1.union(idx2).intersection(idx3))
     self.assertEqual(vkey, 'x[(({1&2},{3,4})&{3})]:y')
Пример #3
0
    def test_logic_list(self):
        q2 = self.example_data_A_data['q2']
        test_logic = union([has_all([1, 2]), has_any([3, 4]), has_count([3])])
        idx, vkey = get_logic_index(q2, test_logic)
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertCountEqual(idx, idx1.union(idx2).union(idx3))
        self.assertEqual(vkey, 'x[({1&2},{3,4},{3})]:y')

        q2 = self.example_data_A_data['q2']
        test_logic = intersection(
            [has_all([1, 2]), has_any([3, 4]),
             has_count([3])])
        idx, vkey = get_logic_index(q2, test_logic)
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertCountEqual(idx, idx1.intersection(idx2).intersection(idx3))
        self.assertEqual(vkey, 'x[({1&2}&{3,4}&{3})]:y')

        q2 = self.example_data_A_data['q2']
        test_logic = difference(
            [has_all([1, 2]), has_any([3, 4]),
             has_count([3])])
        idx, vkey = get_logic_index(q2, test_logic)
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertCountEqual(idx, idx1.difference(idx2).difference(idx3))
        self.assertEqual(vkey, 'x[({1&2}~{3,4}~{3})]:y')

        q2 = self.example_data_A_data['q2']
        test_logic = symmetric_difference(
            [has_all([1, 2]), has_any([3, 4]),
             has_count([3])])
        idx, vkey = get_logic_index(q2, test_logic)
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        if pd.__version__ == '0.19.2':
            self.assertCountEqual(
                idx,
                idx1.symmetric_difference(idx2).symmetric_difference(idx3))
        else:
            self.assertCountEqual(
                idx,
                idx1.symmetric_difference(idx2).symmetric_difference(idx3))
        self.assertEqual(vkey, 'x[({1&2}^{3,4}^{3})]:y')
Пример #4
0
 def test_filter(self):
     dataset = self._get_dataset()
     f = intersection([{'gender': [2]},
                       {'age': frange('35-45')}])
     alias = 'men: 35 to 45 years old'
     dataset.filter(alias, f, inplace=True)
     # alias copied correctly?
     self.assertEqual(dataset.filtered, alias)
     # correctly sliced?
     expected_index_len = 1509
     self.assertEqual(len(dataset._data.index), expected_index_len)
     self.assertEqual(dataset['age'].value_counts().sum(), expected_index_len)
     expected_gender_codes = [2]
     expected_age_codes = [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45]
     self.assertTrue(dataset['gender'].value_counts().index.tolist() ==
                     expected_gender_codes)
     self.assertTrue(sorted(dataset['age'].value_counts().index.tolist()) ==
                     expected_age_codes)
Пример #5
0
    def test_logic_list(self):
        q2 = self.example_data_A_data['q2']
        test_logic = union([has_all([1, 2]), has_any([3, 4]), has_count([3])])
        idx, vkey = get_logic_index(q2, test_logic)
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertItemsEqual(idx, idx1.union(idx2).union(idx3))
        self.assertEqual(vkey, 'x[((1&2),(3,4),{3})]:y')

        q2 = self.example_data_A_data['q2']
        test_logic = intersection(
            [has_all([1, 2]), has_any([3, 4]),
             has_count([3])])
        idx, vkey = get_logic_index(q2, test_logic)
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertItemsEqual(idx, idx1.intersection(idx2).intersection(idx3))
        self.assertEqual(vkey, 'x[((1&2)&(3,4)&{3})]:y')

        q2 = self.example_data_A_data['q2']
        test_logic = difference(
            [has_all([1, 2]), has_any([3, 4]),
             has_count([3])])
        idx, vkey = get_logic_index(q2, test_logic)
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertItemsEqual(idx, idx1.difference(idx2).difference(idx3))
        self.assertEqual(vkey, 'x[((1&2)~(3,4)~{3})]:y')

        q2 = self.example_data_A_data['q2']
        test_logic = sym_diff(
            [has_all([1, 2]), has_any([3, 4]),
             has_count([3])])
        idx, vkey = get_logic_index(q2, test_logic)
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertItemsEqual(idx, idx1.sym_diff(idx2).sym_diff(idx3))
        self.assertEqual(vkey, 'x[((1&2)^(3,4)^{3})]:y')
Пример #6
0
    def test_get_logic_key(self):

        logic = has_all([1, 2, 3], True)
        self.assertEqual(
            get_logic_key(logic),
            'x[e{1&2&3}]:y'
        )

        logic = has_count([is_ge(1), [5, 6, 7, 8, 9]])
        self.assertEqual(
            get_logic_key(logic),
            'x[(5,6,7,8,9){>=1}]:y'
        )

        logic = not_count([is_ge(1), [5, 6, 7, 8, 9]])
        self.assertEqual(
            get_logic_key(logic),
            'x[(5,6,7,8,9)~{>=1}]:y'
        )

        logic = union([
            has_any([1, 2]),
            has_all([3, 4]),
            not_any([5, 6])
        ])
        self.assertEqual(
            get_logic_key(logic),
            'x[({1,2},{3&4},~{5,6})]:y'
        )

        logic = union([
            intersection([
                has_any([1, 2]),
                not_any([3])
            ]),
            {'Wave': has_any([1,2])}
        ])
        self.assertEqual(
            get_logic_key(logic, self.example_data_A_data),
            'x[(({1,2}&~{3}),Wave={1,2})]:y'
        )
Пример #7
0
 def test_get_logic_key(self):
     
     logic = has_all([1, 2, 3], True)
     self.assertEqual(
         get_logic_key(logic),
         'x[e(1&2&3)]:y'
     )
     
     logic = has_count([is_ge(1), [5, 6, 7, 8, 9]])
     self.assertEqual(
         get_logic_key(logic),
         'x[(5,6,7,8,9){>=1}]:y'
     )
     
     logic = not_count([is_ge(1), [5, 6, 7, 8, 9]])
     self.assertEqual(
         get_logic_key(logic),
         'x[(5,6,7,8,9)~{>=1}]:y'
     )
     
     logic = union([
         has_any([1, 2]), 
         has_all([3, 4]), 
         not_any([5, 6])
     ])
     self.assertEqual(
         get_logic_key(logic),
         'x[((1,2),(3&4),~(5,6))]:y'
     )
     
     logic = union([
         intersection([
             has_any([1, 2]),
             not_any([3])
         ]),
         {'Wave': has_any([1,2])}
     ])
     self.assertEqual(
         get_logic_key(logic, self.example_data_A_data),
         'x[(((1,2)&~(3)),Wave=(1,2))]:y'
     )
Пример #8
0
    def test_get_logic_key(self):

        logic = has_all([1, 2, 3], True)
        self.assertEqual(get_logic_key(logic), 'x[e(1&2&3)]:y')

        logic = has_count([is_ge(1), [5, 6, 7, 8, 9]])
        self.assertEqual(get_logic_key(logic), 'x[(5,6,7,8,9){>=1}]:y')

        logic = not_count([is_ge(1), [5, 6, 7, 8, 9]])
        self.assertEqual(get_logic_key(logic), 'x[(5,6,7,8,9)~{>=1}]:y')

        logic = union([has_any([1, 2]), has_all([3, 4]), not_any([5, 6])])
        self.assertEqual(get_logic_key(logic), 'x[((1,2),(3&4),~(5,6))]:y')

        logic = union([
            intersection([has_any([1, 2]), not_any([3])]), {
                'Wave': has_any([1, 2])
            }
        ])
        self.assertEqual(get_logic_key(logic, self.example_data_A_data),
                         'x[(((1,2)&~(3)),Wave=(1,2))]:y')
Пример #9
0
 def test_nested_logic_list(self):
     q2 = self.example_data_A_data['q2']       
     test_logic = intersection([
         union([
             has_all([1, 2]),
             has_any([3, 4])
         ]),
         has_count([3])
     ])
     idx, vkey = get_logic_index(q2, test_logic)
     idx1, vkey1 = get_logic_index(q2, has_all([1, 2]))
     idx2, vkey2 = get_logic_index(q2, has_any([3, 4]))
     idx3, vkey3 = get_logic_index(q2, has_count([3]))
     self.assertItemsEqual(
         idx,
         idx1.union(idx2).intersection(idx3)
     )
     self.assertEqual(
         vkey,
         'x[(((1&2),(3,4))&{3})]:y'
     )
Пример #10
0
 def test_extend_filter(self):
     batch, ds = _get_batch('test', full=True)
     b_meta = _get_meta(batch)
     ext_filters = {
         'q1': {
             'age': frange('20-25')
         },
         ('q2', 'q6'): {
             'age': frange('30-35')
         }
     }
     batch.extend_filter(ext_filters)
     filter_names = [
         'men only', '(men only)+(q1)', '(men only)+(q2)',
         '(men only)+(q6)', '(men only)+(q6_1)', '(men only)+(q6_2)',
         '(men only)+(q6_3)'
     ]
     self.assertEqual(b_meta['filter_names'], filter_names)
     x_filter_map = OrderedDict([('q1', {
         '(men only)+(q1)':
         intersection([{
             'gender': 1
         }, {
             'age': [20, 21, 22, 23, 24, 25]
         }])
     }),
                                 ('q2', {
                                     '(men only)+(q2)':
                                     intersection([{
                                         'gender': 1
                                     }, {
                                         'age': [30, 31, 32, 33, 34, 35]
                                     }])
                                 }),
                                 ('q6', {
                                     '(men only)+(q6)':
                                     intersection([{
                                         'gender': 1
                                     }, {
                                         'age': [30, 31, 32, 33, 34, 35]
                                     }])
                                 }),
                                 (u'q6_1', {
                                     '(men only)+(q6_1)':
                                     intersection([{
                                         'gender': 1
                                     }, {
                                         'age': [30, 31, 32, 33, 34, 35]
                                     }])
                                 }),
                                 (u'q6_2', {
                                     '(men only)+(q6_2)':
                                     intersection([{
                                         'gender': 1
                                     }, {
                                         'age': [30, 31, 32, 33, 34, 35]
                                     }])
                                 }),
                                 (u'q6_3', {
                                     '(men only)+(q6_3)':
                                     intersection([{
                                         'gender': 1
                                     }, {
                                         'age': [30, 31, 32, 33, 34, 35]
                                     }])
                                 }), ('age', {
                                     'men only': {
                                         'gender': 1
                                     }
                                 })])
     self.assertEqual(b_meta['x_filter_map'], x_filter_map)
Пример #11
0
    def test_logic_list(self):
        q2 = self.example_data_A_data['q2']       
        test_logic = union([
            has_all([1, 2]),
            has_any([3, 4]),
            has_count([3])
        ])
        idx, vkey = get_logic_index(q2, test_logic)  
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertItemsEqual(
            idx,
            idx1.union(idx2).union(idx3)
        )
        self.assertEqual(
            vkey,
            'x[((1&2),(3,4),{3})]:y'
        )

        q2 = self.example_data_A_data['q2']       
        test_logic = intersection([
            has_all([1, 2]),
            has_any([3, 4]),
            has_count([3])
        ])
        idx, vkey = get_logic_index(q2, test_logic)        
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertItemsEqual(
            idx,
            idx1.intersection(idx2).intersection(idx3)
        )
        self.assertEqual(
            vkey,
            'x[((1&2)&(3,4)&{3})]:y'
        )

        q2 = self.example_data_A_data['q2']       
        test_logic = difference([
            has_all([1, 2]),
            has_any([3, 4]),
            has_count([3])
        ])
        idx, vkey = get_logic_index(q2, test_logic)        
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertItemsEqual(
            idx,
            idx1.difference(idx2).difference(idx3)
        )
        self.assertEqual(
            vkey,
            'x[((1&2)~(3,4)~{3})]:y'
        )
        
        q2 = self.example_data_A_data['q2']       
        test_logic = sym_diff([
            has_all([1, 2]),
            has_any([3, 4]),
            has_count([3])
        ])
        idx, vkey = get_logic_index(q2, test_logic)        
        idx1, vkey1 = get_logic_index(q2, test_logic[1][0])
        idx2, vkey2 = get_logic_index(q2, test_logic[1][1])
        idx3, vkey3 = get_logic_index(q2, test_logic[1][2])
        self.assertItemsEqual(
            idx,
            idx1.sym_diff(idx2).sym_diff(idx3)
        )
        self.assertEqual(
            vkey,
            'x[((1&2)^(3,4)^{3})]:y'
        )