class TestAbstractDatasets(TestBase): NUM_COLS = 15 NUM_ROWS = 19 def setUp(self): TestBase.setUp(self) self.controller = Datasets() self._file_name = 'good_eats.csv' self._update_file_name = 'good_eats_update.json' self._update_check_file_path = '%sgood_eats_update_values.json' % ( self.FIXTURE_PATH) self.default_formulae = [ 'amount', 'amount + 1', 'amount - 5', ] def _put_row_updates(self, dataset_id=None, file_name=None, validate=True): if not dataset_id: dataset_id = self.dataset_id if not file_name: file_name = self._update_file_name update = open('%s%s' % (self.FIXTURE_PATH, file_name), 'r').read() result = json.loads(self.controller.update(dataset_id=dataset_id, update=update)) if validate: self.assertTrue(isinstance(result, dict)) self.assertTrue(Dataset.ID in result) # set up the (default) values to test against with open(self._update_check_file_path, 'r') as f: self._update_values = json.loads(f.read()) def _load_schema(self): return json.loads( self.controller.info(self.dataset_id))[Dataset.SCHEMA] def _check_dframes_are_equal(self, dframe1, dframe2): rows1 = comparable(dframe1) rows2 = comparable(dframe2) self.__check_dframe_is_subset(rows1, rows2) self.__check_dframe_is_subset(rows2, rows1) def __check_dframe_is_subset(self, rows1, rows2): for row in rows1: self.assertTrue(row in rows2, '\nrow:\n%s\n\nnot in rows2:\n%s' % (row, rows2)) def _post_calculations(self, formulae=[], group=None): schema = self._load_schema() controller = Calculations() for idx, formula in enumerate(formulae): name = 'calc_%d' % idx if not schema or\ formula in schema.keys() else formula controller.create(self.dataset_id, formula=formula, name=name, group=group) def _test_summary_built(self, result): # check that summary is created self.assertTrue(isinstance(result, dict)) self.assertTrue(Dataset.ID in result) self.dataset_id = result[Dataset.ID] results = self.controller.summary( self.dataset_id, select=self.controller.SELECT_ALL_FOR_SUMMARY) return self._test_summary_results(results) def _test_summary_results(self, results): results = json.loads(results) self.assertTrue(isinstance(results, dict)) return results def _test_aggregations(self, groups=['']): results = json.loads(self.controller.aggregations(self.dataset_id)) self.assertTrue(isinstance(results, dict)) self.assertEqual(len(results.keys()), len(groups)) self.assertEqual(results.keys(), groups) linked_dataset_id = results[groups[0]] self.assertTrue(isinstance(linked_dataset_id, basestring)) # inspect linked dataset return json.loads(self.controller.show(linked_dataset_id)) def _test_summary_no_group(self, results, dataset_id=None, group=None): if not dataset_id: dataset_id = self.dataset_id group = [group] if group else [] result_keys = results.keys() # minus the column that we are grouping on self.assertEqual(len(result_keys), self.NUM_COLS - len(group)) columns = [col for col in self.get_data(self._file_name).columns.tolist() if not col in [MONGO_ID] + group] dataset = Dataset.find_one(dataset_id) labels_to_slugs = dataset.schema.labels_to_slugs for col in columns: slug = labels_to_slugs[col] self.assertTrue(slug in result_keys, 'col (slug): %s in: %s' % (slug, result_keys)) self.assertTrue(SUMMARY in results[slug].keys())
class TestProfile(TestBase): TEST_CASE_SIZES = { 'tiny': (1, 1), 'small': (2, 2), 'large': (4, 40), } def setUp(self): TestBase.setUp(self) self.datasets = Datasets() self.tmp_file = NamedTemporaryFile(delete=False) def tearDown(self): os.unlink(self.tmp_file.name) def _expand_width(self, df, exponent): for i in xrange(0, exponent): other = df.rename(columns={ col: '%s-%s' % (col, idx) for (idx, col) in enumerate(df.columns) }) df = df.join(other) df.rename(columns={ col: str(idx) for (idx, col) in enumerate(df.columns) }, inplace=True) return df def _grow_test_data(self, dataset_name, width_exp, length_factor): df = self.get_data(dataset_name) df = self._expand_width(df, width_exp) return concat([df] * length_factor) def test_tiny_profile(self): self._test_profile('tiny') def test_small_profile(self): self._test_profile('small') def test_large_profile(self): self._test_profile('large') @run_profiler def _test_profile(self, size): print 'bamboo/bamboo: %s' % size self._test_create_data(*self.TEST_CASE_SIZES[size]) print 'saving dataset' self._test_save_dataset() self._test_get_info() self._test_get_summary() self._test_get_summary_with_group('province') self._test_get_summary_with_group('school_zone') def _test_create_data(self, width_exp, length_factor): self.data = self._grow_test_data('kenya_secondary_schools_2007.csv', width_exp, length_factor) print 'bamboo/bamboo rows: %s, columns: %s' % (len( self.data), len(self.data.columns)) def _test_save_dataset(self): self.data.to_csv(self.tmp_file) self.tmp_file.close() mock_uploaded_file = MockUploadedFile(open(self.tmp_file.name, 'r')) result = json.loads(self.datasets.create(csv_file=mock_uploaded_file)) self.assertTrue(isinstance(result, dict)) self.assertTrue(Dataset.ID in result) self.dataset_id = result[Dataset.ID] def _test_get_info(self): result = json.loads(self.datasets.info(self.dataset_id)) self.assertTrue(isinstance(result, dict)) def _test_get_summary(self): result = json.loads( self.datasets.summary(self.dataset_id, select=self.datasets.SELECT_ALL_FOR_SUMMARY)) self.assertTrue(isinstance(result, dict)) def _test_get_summary_with_group(self, group): result = json.loads( self.datasets.summary(self.dataset_id, group=group, select=self.datasets.SELECT_ALL_FOR_SUMMARY)) self.assertTrue(isinstance(result, dict))
class TestProfile(TestBase): TEST_CASE_SIZES = { 'tiny': (1, 1), 'small': (2, 2), 'large': (4, 40), } def setUp(self): TestBase.setUp(self) self.datasets = Datasets() self.tmp_file = NamedTemporaryFile(delete=False) def tearDown(self): os.unlink(self.tmp_file.name) def _expand_width(self, df, exponent): for i in xrange(0, exponent): other = df.rename( columns={col: '%s-%s' % (col, idx) for (idx, col) in enumerate(df.columns)}) df = df.join(other) df.rename(columns={col: str(idx) for (idx, col) in enumerate(df.columns)}, inplace=True) return df def _grow_test_data(self, dataset_name, width_exp, length_factor): df = self.get_data(dataset_name) df = self._expand_width(df, width_exp) return concat([df] * length_factor) def test_tiny_profile(self): self._test_profile('tiny') def test_small_profile(self): self._test_profile('small') def test_large_profile(self): self._test_profile('large') @run_profiler def _test_profile(self, size): print 'bamboo/bamboo: %s' % size self._test_create_data(*self.TEST_CASE_SIZES[size]) print 'saving dataset' self._test_save_dataset() self._test_get_info() self._test_get_summary() self._test_get_summary_with_group('province') self._test_get_summary_with_group('school_zone') def _test_create_data(self, width_exp, length_factor): self.data = self._grow_test_data( 'kenya_secondary_schools_2007.csv', width_exp, length_factor) print 'bamboo/bamboo rows: %s, columns: %s' % ( len(self.data), len(self.data.columns)) def _test_save_dataset(self): self.data.to_csv(self.tmp_file) self.tmp_file.close() mock_uploaded_file = MockUploadedFile(open(self.tmp_file.name, 'r')) result = json.loads(self.datasets.create(csv_file=mock_uploaded_file)) self.assertTrue(isinstance(result, dict)) self.assertTrue(Dataset.ID in result) self.dataset_id = result[Dataset.ID] def _test_get_info(self): result = json.loads(self.datasets.info(self.dataset_id)) self.assertTrue(isinstance(result, dict)) def _test_get_summary(self): result = json.loads(self.datasets.summary( self.dataset_id, select=self.datasets.SELECT_ALL_FOR_SUMMARY)) self.assertTrue(isinstance(result, dict)) def _test_get_summary_with_group(self, group): result = json.loads(self.datasets.summary( self.dataset_id, group=group, select=self.datasets.SELECT_ALL_FOR_SUMMARY)) self.assertTrue(isinstance(result, dict))