def test_discovery_analytics_class(self): tools = SyntheticBuilder.scratch_pad() dataset = tools.get_category(list('ABCDE')+[np.nan], relative_freq=[1,3,2,7,4], size=694) result = Discover.analyse_category(dataset) analytics = DataAnalytics(analysis=result) self.assertEqual(analytics.intent.selection, analytics.sample_map.index.to_list()) self.assertEqual(analytics.patterns.sample_distribution, analytics.sample_map.to_list())
def get_weights(self, df, columns: list, index: int, weighting: dict): col = columns[index] weighting.update({col: Discover.analyse_category(df[col])}) if index == len(columns)-1: return for category in weighting.get(col).get('selection'): if weighting.get(col).get('sub_category') is None: weighting[col].update({'sub_category': {}}) weighting.get(col).get('sub_category').update({category: {}}) sub_category = weighting.get(col).get('sub_category').get(category) self.get_weights(df[df[col] == category], columns, index + 1, sub_category) return
def test_analyse_category_limits(self): top = 2 dataset = ['A']*8 + ['B']*6 + ['C']*4 + ['D']*2 result = Discover.analyse_category(dataset, top=top, freq_precision=0) control = ['dtype', 'categories', 'top', 'highest_unique', 'lowest_unique', 'category_count'] self.assertCountEqual(control, list(result.get('top'))) self.assertEqual(top, len(result.get('intent').get('categories'))) self.assertCountEqual(['A', 'B'], result.get('intent').get('categories')) self.assertCountEqual([40, 30], result.get('patterns').get('relative_freq')) self.assertEqual(30, result.get('stats').get('excluded_percent')) self.assertEqual(14, result.get('stats').get('sample_size')) lower = 0.2 upper = 7 result = Discover.analyse_category(dataset, lower=lower, upper=upper, freq_precision=0) control = ['dtype', 'categories', 'highest_unique', 'lowest_unique', 'granularity'] self.assertCountEqual(control, list(result.get('intent').keys())) self.assertEqual(lower, result.get('intent').get('lowest_unique')) self.assertEqual(upper, result.get('intent').get('highest_unique')) self.assertCountEqual(['C', 'B'], result.get('intent').get('categories')) self.assertCountEqual([33, 50], result.get('patterns').get('relative_freq')) self.assertEqual(50, result.get('stats').get('excluded_percent')) self.assertEqual(10, result.get('stats').get('sample_size'))
def test_analyse_category(self): builer = SyntheticBuilder.from_memory() tools = builer.tools dataset = tools.get_category(list('ABCDE')+[np.nan], relative_freq=[1,3,2,7,4], size=694) result = Discover.analyse_category(dataset) control = ['intent', 'patterns', 'stats', 'params'] self.assertCountEqual(control, list(result.keys())) control = ['dtype', 'categories', 'highest_unique', 'lowest_unique', 'category_count'] self.assertCountEqual(control, list(result.get('intent').keys())) control = ['relative_freq', 'sample_distribution'] self.assertCountEqual(control, list(result.get('patterns').keys())) control = ['nulls_percent', 'sample_size', 'excluded_percent'] self.assertCountEqual(control, list(result.get('stats').keys())) control = ['freq_precision'] self.assertCountEqual(control, list(result.get('params').keys()))