Пример #1
0
 def test_PartitionedTableAppender_range_short(self):
     self.s.run('''
     dbPath = "dfs://PTA_test"
     if(existsDatabase(dbPath))
         dropDatabase(dbPath)
     t = table(100:100,`sym`id`qty`price,[SYMBOL,SHORT,INT,DOUBLE])
     db=database(dbPath,RANGE,short([1,10001,20001,30001]))
     pt = db.createPartitionedTable(t, `pt, `id)
     ''')
     appender = ddb.PartitionedTableAppender("dfs://PTA_test", "pt", "id",
                                             self.pool)
     sym = list(map(str, np.arange(10001, 60001)))
     id = np.random.randint(1, 30001, 50000)
     qty = np.random.randint(0, 101, 50000)
     price = np.random.randint(0, 60001, 50000) * 0.1
     data = pd.DataFrame({'sym': sym, 'id': id, 'qty': qty, 'price': price})
     num = appender.append(data)
     self.assertEqual(num, 50000)
     re = self.s.run(
         "select * from loadTable('dfs://PTA_test', 'pt') order by id, sym, qty, price"
     )
     expected = data.sort_values(by=['id', 'sym', 'qty', 'price'],
                                 ascending=[True, True, True, True])
     expected.set_index(np.arange(0, 50000), inplace=True)
     assert_frame_equal(re, expected, check_dtype=False)
Пример #2
0
 def test_PartitionedTableAppender_compo_value_list_range(self):
     self.s.run('''
                dbPath="dfs://db_compoDB_sym"
                if (existsDatabase(dbPath))
                    dropDatabase(dbPath)
                t = table(100:100,`sym`ticker`id,[SYMBOL,STRING,INT])
                dbSym = database(,VALUE,`aaa`bbb`ccc`ddd)
                dbTic = database(, LIST, [`IBM`ORCL`MSFT, `GOOG`FB] )
                dbId = database(,RANGE,0 40000 80000 120000)
                db = database("dfs://db_compoDB_sym", COMPO, [dbSym, dbTic,dbId])
                pt = db.createPartitionedTable(t, `pt, `sym`ticker`id)
                ''')
     appender = ddb.PartitionedTableAppender("dfs://db_compoDB_sym", "pt",
                                             "sym", self.pool)
     n = 100000
     x = np.array(['aaa', 'bbb', 'ccc', 'ddd'])
     y = np.array(['IBM', 'ORCL', 'MSFT', 'GOOG', 'FB'])
     data = pd.DataFrame({
         "sym": np.repeat(x, 25000),
         "ticker": np.repeat(y, 20000),
         'id': range(0, n)
     })
     data['id'] = data["id"].astype("int32")
     re = appender.append(data)
     self.assertEqual(re, n)
     re = self.s.run(
         '''select * from loadTable("dfs://db_compoDB_sym",`pt)''')
     assert_frame_equal(data, re)
Пример #3
0
 def test_PartitionedTableAppender_compo_hash_range(self):
     self.s.run('''
                     dbPath="dfs://db_compoDB_int"
                     if (existsDatabase(dbPath))
                         dropDatabase(dbPath)
                     t = table(100:100,`id`ticker,[INT,STRING])
                     dbId = database(,HASH,[INT,2])
                     sym_range=cutPoints(string(10001..60000), 10)
                     dbTic = database(, RANGE, sym_range )
                     db = database("dfs://db_compoDB_int", COMPO, [dbId, dbTic])
                     pt = db.createPartitionedTable(t, `pt, `id`ticker)
                     ''')
     appender = ddb.PartitionedTableAppender("dfs://db_compoDB_int", "pt",
                                             "id", self.pool)
     n = 50000
     id = np.repeat(np.arange(1, 5001), 10, axis=0)
     ticker = list(map(str, np.arange(10001, 60001)))
     data = pd.DataFrame({"id": id, "ticker": ticker})
     data['id'] = data["id"].astype("int32")
     re = appender.append(data)
     self.assertEqual(re, n)
     re = self.s.run(
         '''select * from loadTable("dfs://db_compoDB_int",`pt) order by id,ticker'''
     )
     assert_frame_equal(data, re)
Пример #4
0
 def test_PartitionedTableAppender_value_string(self):
     self.s.run('''
     dbPath = "dfs://PTA_test"
     if(existsDatabase(dbPath))
         dropDatabase(dbPath)
     t = table(100:100,`sym`id`qty`price,[STRING,INT,INT,DOUBLE])
     db=database(dbPath,VALUE,['AAPL', 'MSFT', 'IBM', 'GOOG', 'YHOO'])
     pt = db.createPartitionedTable(t, `pt, `sym)
     ''')
     appender = ddb.PartitionedTableAppender("dfs://PTA_test", "pt", "sym",
                                             self.pool)
     sym = np.repeat(['AAPL', 'MSFT', 'IBM', 'GOOG', 'YHOO'], 10000, axis=0)
     id = np.random.randint(0, 60001, 50000)
     qty = np.random.randint(0, 101, 50000)
     price = np.random.randint(0, 60001, 50000) * 0.1
     data = pd.DataFrame({'sym': sym, 'id': id, 'qty': qty, 'price': price})
     num = appender.append(data)
     self.assertEqual(num, 50000)
     re = self.s.run(
         "select * from loadTable('dfs://PTA_test', 'pt') order by id, sym, qty, price"
     )
     expected = data.sort_values(by=['id', 'sym', 'qty', 'price'],
                                 ascending=[True, True, True, True])
     expected.set_index(np.arange(0, 50000), inplace=True)
     assert_frame_equal(re, expected, check_dtype=False)
Пример #5
0
 def test_PartitionedTableAppender_compo_range_list(self):
     self.s.run('''
             dbPath="dfs://db_compoDB_int"
             if (existsDatabase(dbPath))
                 dropDatabase(dbPath)
             t = table(100:100,`id`ticker,[INT,STRING])
             dbId = database(,RANGE,0 40000 80000 120000)
             dbTic = database(, LIST, [`IBM`ORCL`MSFT, `GOOG`FB] )
             db = database("dfs://db_compoDB_int", COMPO, [dbId, dbTic])
             pt = db.createPartitionedTable(t, `pt, `id`ticker)
             ''')
     appender = ddb.PartitionedTableAppender("dfs://db_compoDB_int", "pt",
                                             "id", self.pool)
     n = 100000
     y = np.array(['IBM', 'ORCL', 'MSFT', 'GOOG', 'FB'])
     data = pd.DataFrame({"id": range(0, n), "ticker": np.repeat(y, 20000)})
     data['id'] = data["id"].astype("int32")
     re = appender.append(data)
     self.assertEqual(re, n)
     re = self.s.run(
         '''select * from loadTable("dfs://db_compoDB_int",`pt)''')
     assert_frame_equal(data, re)
Пример #6
0
 def test_PartitionedTableAppender_compo_hash_value(self):
     self.s.run('''
                             dbPath="dfs://db_compoDB_str"
                             if (existsDatabase(dbPath))
                                 dropDatabase(dbPath)
                             t = table(100:100,`str`ticker,[STRING,SYMBOL])
                             dbStr = database(,HASH,[STRING,10])
                             dbTic = database(, VALUE,  symbol(['AAPL', 'MSFT', 'IBM', 'GOOG', 'YHOO']) )
                             db = database("dfs://db_compoDB_str", COMPO, [dbStr, dbTic])
                             pt = db.createPartitionedTable(t, `pt, `str`ticker)
                             ''')
     appender = ddb.PartitionedTableAppender("dfs://db_compoDB_str", "pt",
                                             "str", self.pool)
     n = 50000
     y = list(map(str, np.arange(10001, 60001)))
     ticker = np.repeat(['AAPL', 'MSFT', 'IBM', 'GOOG', 'YHOO'], 10000)
     data = pd.DataFrame({"str": y, "ticker": ticker})
     re = appender.append(data)
     self.assertEqual(re, n)
     re = self.s.run(
         '''select * from loadTable("dfs://db_compoDB_str",`pt) order by str,ticker'''
     )
     assert_frame_equal(data, re)
Пример #7
0
 def test_PartitionedTableAppender_compo_hash_list(self):
     self.s.run('''
                             dbPath="dfs://db_compoDB_sym"
                             if (existsDatabase(dbPath))
                                 dropDatabase(dbPath)
                             t = table(100:100,`sym`ticker,[SYMBOL,STRING])
                             dbSym = database(,HASH,[SYMBOL,2])
                             dbTic = database(, LIST,  [`IBM`ORCL`MSFT, `GOOG`FB] )
                             db = database("dfs://db_compoDB_sym", COMPO, [dbSym, dbTic])
                             pt = db.createPartitionedTable(t, `pt, `sym`ticker)
                             ''')
     appender = ddb.PartitionedTableAppender("dfs://db_compoDB_sym", "pt",
                                             "sym", self.pool)
     sym = list(map(str, np.arange(10001, 60001)))
     y = np.array(['IBM', 'ORCL', 'MSFT', 'GOOG', 'FB'])
     ticker = list(map(str, np.arange(10001, 60001)))
     data = pd.DataFrame({"sym": sym, "ticker": np.repeat(y, 10000)})
     re = appender.append(data)
     self.assertEqual(re, 50000)
     re = self.s.run(
         '''select * from loadTable("dfs://db_compoDB_sym",`pt) order by sym,ticker'''
     )
     assert_frame_equal(data, re)