def test_discovery_analytics_class(self):
     tools = SyntheticBuilder.scratch_pad()
     dataset = tools.get_category(list('ABCDE')+[np.nan], relative_freq=[1,3,2,7,4], size=694)
     result = Discover.analyse_category(dataset)
     analytics = DataAnalytics(analysis=result)
     self.assertEqual(analytics.intent.selection, analytics.sample_map.index.to_list())
     self.assertEqual(analytics.patterns.sample_distribution, analytics.sample_map.to_list())
 def get_weights(self, df, columns: list, index: int, weighting: dict):
     col = columns[index]
     weighting.update({col: Discover.analyse_category(df[col])})
     if index == len(columns)-1:
         return
     for category in weighting.get(col).get('selection'):
         if weighting.get(col).get('sub_category') is None:
             weighting[col].update({'sub_category': {}})
         weighting.get(col).get('sub_category').update({category: {}})
         sub_category = weighting.get(col).get('sub_category').get(category)
         self.get_weights(df[df[col] == category], columns, index + 1, sub_category)
     return
 def test_analyse_category_limits(self):
     top = 2
     dataset = ['A']*8 + ['B']*6 + ['C']*4 + ['D']*2
     result = Discover.analyse_category(dataset, top=top, freq_precision=0)
     control = ['dtype', 'categories', 'top', 'highest_unique', 'lowest_unique', 'category_count']
     self.assertCountEqual(control, list(result.get('top')))
     self.assertEqual(top, len(result.get('intent').get('categories')))
     self.assertCountEqual(['A', 'B'], result.get('intent').get('categories'))
     self.assertCountEqual([40, 30], result.get('patterns').get('relative_freq'))
     self.assertEqual(30, result.get('stats').get('excluded_percent'))
     self.assertEqual(14, result.get('stats').get('sample_size'))
     lower = 0.2
     upper = 7
     result = Discover.analyse_category(dataset, lower=lower, upper=upper, freq_precision=0)
     control = ['dtype', 'categories', 'highest_unique', 'lowest_unique', 'granularity']
     self.assertCountEqual(control, list(result.get('intent').keys()))
     self.assertEqual(lower, result.get('intent').get('lowest_unique'))
     self.assertEqual(upper, result.get('intent').get('highest_unique'))
     self.assertCountEqual(['C', 'B'], result.get('intent').get('categories'))
     self.assertCountEqual([33, 50], result.get('patterns').get('relative_freq'))
     self.assertEqual(50, result.get('stats').get('excluded_percent'))
     self.assertEqual(10, result.get('stats').get('sample_size'))
 def test_analyse_category(self):
     builer = SyntheticBuilder.from_memory()
     tools = builer.tools
     dataset = tools.get_category(list('ABCDE')+[np.nan], relative_freq=[1,3,2,7,4], size=694)
     result = Discover.analyse_category(dataset)
     control = ['intent', 'patterns', 'stats', 'params']
     self.assertCountEqual(control, list(result.keys()))
     control = ['dtype', 'categories', 'highest_unique', 'lowest_unique', 'category_count']
     self.assertCountEqual(control, list(result.get('intent').keys()))
     control = ['relative_freq', 'sample_distribution']
     self.assertCountEqual(control, list(result.get('patterns').keys()))
     control = ['nulls_percent', 'sample_size', 'excluded_percent']
     self.assertCountEqual(control, list(result.get('stats').keys()))
     control = ['freq_precision']
     self.assertCountEqual(control, list(result.get('params').keys()))