def setUp(self): TestBase.setUp(self) self.dataset = Dataset.save(self.test_dataset_ids['good_eats.csv']) Dataset.build_schema(self.dataset, self.test_data['good_eats.csv'].dtypes) self.formula = 'rating' self.name = 'test'
def test_build_schema(self): illegal_col_regex = re.compile(r'\W|[A-Z]') for dataset_name in self.TEST_DATASETS: dataset = Dataset.create(self.test_dataset_ids[dataset_name]) Dataset.build_schema(dataset, self.test_data[dataset_name].dtypes) # get dataset with new schema dataset = Dataset.find_one(self.test_dataset_ids[dataset_name]) for key in [CREATED_AT, SCHEMA, UPDATED_AT]: self.assertTrue(key in dataset.keys()) df_columns = self.test_data[dataset_name].columns.tolist() seen_columns = [] for column_name, column_attributes in dataset[SCHEMA].items(): # check column_name is unique self.assertFalse(column_name in seen_columns) seen_columns.append(column_name) # check column name is only legal chars self.assertFalse(illegal_col_regex.search(column_name)) # check has require attributes self.assertTrue(SIMPLETYPE in column_attributes) self.assertTrue(OLAP_TYPE in column_attributes) self.assertTrue(LABEL in column_attributes) # check label is an original column self.assertTrue(column_attributes[LABEL] in df_columns) df_columns.remove(column_attributes[LABEL]) # ensure all columns in df_columns have store columns self.assertTrue(len(df_columns) == 0)
def import_dataset(_file, dataset): """ For reading a URL and saving the corresponding dataset. """ dframe = read_csv(_file) Dataset.build_schema(dataset, dframe.dtypes) Observation.save(dframe, dataset)
def setUp(self): TestBase.setUp(self) self.dataset = Dataset.save(self.test_dataset_ids['good_eats.csv']) dframe = self.test_data['good_eats.csv'] Dataset.build_schema(self.dataset, dframe.dtypes) Observation.save(dframe, self.dataset) self.calculations = [ 'rating', 'gps', 'amount + gps_alt', 'amount - gps_alt', 'amount + 5', 'amount - gps_alt + 2.5', 'amount * gps_alt', 'amount / gps_alt', 'amount * gps_alt / 2.5', 'amount + gps_alt * gps_precision', '(amount + gps_alt) * gps_precision', 'amount = 2', '10 < amount', '10 < amount + gps_alt', 'not amount = 2', 'not(amount = 2)', 'amount = 2 and 10 < amount', 'amount = 2 or 10 < amount', 'not not amount = 2 or 10 < amount', 'not amount = 2 or 10 < amount', '(not amount = 2) or 10 < amount', 'not(amount = 2 or 10 < amount)', 'amount ^ 3', '(amount + gps_alt) ^ 2 + 100', '-amount', '-amount < gps_alt - 100', 'rating in ["delectible"]', 'risk_factor in ["low_risk"]', 'amount in ["9.0", "2.0", "20.0"]', '(risk_factor in ["low_risk"]) and (amount in ["9.0", "20.0"])', ] self.places = 5
def setUp(self): TestBase.setUp(self) self.dataset = Dataset.save(self.test_dataset_ids['good_eats.csv']) Dataset.build_schema(self.dataset, self.test_data['good_eats.csv'].dtypes)