示例#1
0
 def setUp(self):
     raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';')
     raw_data = raw_data.head(50)
     self.clean_data = cleanse(raw_data)
     self.train = self.clean_data[:5]
     self.test = self.clean_data[5:]
     self.cats = ['articleID', 'customerID', 'voucherID', 'productGroup']
示例#2
0
 def setUp(self):
     raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';')
     raw_data = raw_data.head(50)
     self.clean_data = cleanse(raw_data)
     self.train = self.clean_data[:5]
     self.test = self.clean_data[5:]
     self.cats = ['articleID', 'customerID', 'voucherID', 'productGroup']
示例#3
0
    def setUp(self):
        self.raw_data = pd.read_csv('tests/test_data.txt', delimiter=';')
        train_ids = ['a1000001', 'a1000002', 'a1000003']
        test_ids = ['a1000007', 'a1000008']

        clean_data = cleanse(self.raw_data)
        self.data = {'data': clean_data, 'train_ids': train_ids, 'test_ids': test_ids}
示例#4
0
 def setUp(self):
     raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';')
     raw_data = raw_data.head(50)
     clean_data = cleanse(raw_data)
     featured_data = add_independent_features(clean_data)
     self.X, self.Y = transform(featured_data)
     self.X = self.X.toarray()
示例#5
0
    def setUp(self):
        raw_data = pd.read_csv('tests/test_data.txt', delimiter=';')
        train_ids = ['a1000001', 'a1000002', 'a1000003']
        test_ids = ['a1000007', 'a1000008']

        self.data = preprocessing.cleanse(raw_data)
        self.train, self.test = preprocessing.split_train_test(self.data, train_ids, test_ids)
示例#6
0
 def setUp(self):
     raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';')
     raw_data = raw_data.head(50)
     clean_data = cleanse(raw_data)
     featured_data = add_independent_features(clean_data)
     self.X, self.Y = transform(featured_data)
     self.X = self.X.toarray()
示例#7
0
    def setUp(self):
        raw_data = pd.read_csv('tests/test_data.txt', delimiter=';')
        train_ids = ['a1000001', 'a1000002', 'a1000003']
        test_ids = ['a1000007', 'a1000008']

        self.data = preprocessing.cleanse(raw_data)
        self.train, self.test = preprocessing.split_train_test(
            self.data, train_ids, test_ids)
示例#8
0
 def setUp(self):
     raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';')
     raw_data = raw_data.head(50)
     clean_data = cleanse(raw_data)
     data = add_independent_features(clean_data)
     X, Y = transform(data, scaler=normalize_features)
     self.X_tr, self.Y_tr = X[:6], Y[:6]
     self.X_te, self.Y_te = X[6:], Y[6:]
示例#9
0
 def setUp(self):
     raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';')
     raw_data = raw_data.head(50)
     clean_data = cleanse(raw_data)
     data = add_independent_features(clean_data)
     X, Y = transform(data, scaler=normalize_features)
     self.X_tr, self.Y_tr = X[:6], Y[:6]
     self.X_te, self.Y_te = X[6:], Y[6:]
示例#10
0
 def test_cleanse(self):
     df = cleanse(self.raw_data)
     # Column values
     self.assertTrue((df.quantity != 0).all())
     self.assertTrue((df.quantity >= df.returnQuantity).all())
     # Column types
     self.assertTrue(df.orderDate.dtype == np.dtype('<M8[ns]'))
     self.assertTrue(df.orderID.dtype == np.int)
     self.assertTrue(df.articleID.dtype == np.int)
     self.assertTrue(df.customerID.dtype == np.int)
     self.assertTrue(df.voucherID.dtype == np.float)
示例#11
0
    def setUp(self):
        self.raw_data = pd.read_csv('tests/test_data.txt', delimiter=';')
        train_ids = ['a1000001', 'a1000002', 'a1000003']
        test_ids = ['a1000007', 'a1000008']

        clean_data = cleanse(self.raw_data)
        self.data = {
            'data': clean_data,
            'train_ids': train_ids,
            'test_ids': test_ids
        }
示例#12
0
 def test_cleanse(self):
     df = cleanse(self.raw_data)
     # Column values
     self.assertTrue((df.quantity != 0).all())
     self.assertTrue((df.quantity >= df.returnQuantity).all())
     # Column types
     self.assertTrue(df.orderDate.dtype == np.dtype('<M8[ns]'))
     self.assertTrue(df.orderID.dtype == np.int)
     self.assertTrue(df.articleID.dtype == np.int)
     self.assertTrue(df.customerID.dtype == np.int)
     self.assertTrue(df.voucherID.dtype == np.float)
示例#13
0
 def setUp(self):
     raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';')
     raw_data = raw_data.head(50)
     clean_data = cleanse(raw_data)
     self.data = clean_data
示例#14
0
 def setUp(self):
     raw_data = pd.read_csv('tests/test_data_old.txt', delimiter=';')
     raw_data = raw_data.head(50)
     clean_data = cleanse(raw_data)
     self.data = clean_data
示例#15
0
 def test_cleanse(self):
     cleanse(self.data)