def setUp(self): raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';') raw_data = raw_data.head(50) self.clean_data = cleanse(raw_data) self.train = self.clean_data[:5] self.test = self.clean_data[5:] self.cats = ['articleID', 'customerID', 'voucherID', 'productGroup']
def setUp(self): self.raw_data = pd.read_csv('tests/test_data.txt', delimiter=';') train_ids = ['a1000001', 'a1000002', 'a1000003'] test_ids = ['a1000007', 'a1000008'] clean_data = cleanse(self.raw_data) self.data = {'data': clean_data, 'train_ids': train_ids, 'test_ids': test_ids}
def setUp(self): raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';') raw_data = raw_data.head(50) clean_data = cleanse(raw_data) featured_data = add_independent_features(clean_data) self.X, self.Y = transform(featured_data) self.X = self.X.toarray()
def setUp(self): raw_data = pd.read_csv('tests/test_data.txt', delimiter=';') train_ids = ['a1000001', 'a1000002', 'a1000003'] test_ids = ['a1000007', 'a1000008'] self.data = preprocessing.cleanse(raw_data) self.train, self.test = preprocessing.split_train_test(self.data, train_ids, test_ids)
def setUp(self): raw_data = pd.read_csv('tests/test_data.txt', delimiter=';') train_ids = ['a1000001', 'a1000002', 'a1000003'] test_ids = ['a1000007', 'a1000008'] self.data = preprocessing.cleanse(raw_data) self.train, self.test = preprocessing.split_train_test( self.data, train_ids, test_ids)
def setUp(self): raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';') raw_data = raw_data.head(50) clean_data = cleanse(raw_data) data = add_independent_features(clean_data) X, Y = transform(data, scaler=normalize_features) self.X_tr, self.Y_tr = X[:6], Y[:6] self.X_te, self.Y_te = X[6:], Y[6:]
def test_cleanse(self): df = cleanse(self.raw_data) # Column values self.assertTrue((df.quantity != 0).all()) self.assertTrue((df.quantity >= df.returnQuantity).all()) # Column types self.assertTrue(df.orderDate.dtype == np.dtype('<M8[ns]')) self.assertTrue(df.orderID.dtype == np.int) self.assertTrue(df.articleID.dtype == np.int) self.assertTrue(df.customerID.dtype == np.int) self.assertTrue(df.voucherID.dtype == np.float)
def setUp(self): self.raw_data = pd.read_csv('tests/test_data.txt', delimiter=';') train_ids = ['a1000001', 'a1000002', 'a1000003'] test_ids = ['a1000007', 'a1000008'] clean_data = cleanse(self.raw_data) self.data = { 'data': clean_data, 'train_ids': train_ids, 'test_ids': test_ids }
def setUp(self): raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';') raw_data = raw_data.head(50) clean_data = cleanse(raw_data) self.data = clean_data
def test_cleanse(self): cleanse(self.data)