def test_million_row(): conn = create_conn() tablename = 'pymcsapi_millionrow' drop_table(conn, tablename) exec_stmt(conn, 'CREATE TABLE %s (a int, b int) engine=columnstore' % tablename) d = pymcsapi.ColumnStoreDriver() b = d.createBulkInsert(DB_NAME, tablename, 0, 0) rows = 1000000 try: for i in range(0, rows): b.setColumn(0, i) b.setColumn(1, rows - i) b.writeRow() b.commit() except RuntimeError as err: b.rollback() pytest.fail("Error executing bulk insert: %s" % (err, )) try: cursor = conn.cursor() cursor.execute('select count(*) cnt from %s' % tablename) row = cursor.fetchone() assert row is not None assert row[0] == rows except mariadb.Error as err: pytest.fail("Error executing query: %s, error: %s" % (query_all_types, err)) finally: if cursor: cursor.close() drop_table(conn, tablename) conn.close()
def test_list_table_locks(): conn = create_conn() tablename1 = 'pymcsapi_lock1' tablename2 = 'pymcsapi_lock2' drop_table(conn, tablename1) drop_table(conn, tablename2) exec_stmt(conn, 'CREATE TABLE %s (a int, b int) engine=columnstore' % tablename1) exec_stmt(conn, 'CREATE TABLE %s (a int, b int) engine=columnstore' % tablename2) d = pymcsapi.ColumnStoreDriver() try: lockedTablesAtBeginning = len(d.listTableLocks()) b1 = d.createBulkInsert(DB_NAME, tablename1, 0, 0) # verify that one additional table is locked assert 1 == (len(d.listTableLocks()) - lockedTablesAtBeginning) b2 = d.createBulkInsert(DB_NAME, tablename2, 0, 0) # verify that two additional tables are locked assert 2 == (len(d.listTableLocks()) - lockedTablesAtBeginning) b2.rollback() # verify that one additional table is locked assert 1 == (len(d.listTableLocks()) - lockedTablesAtBeginning) b1.rollback() assert 0 == (len(d.listTableLocks()) - lockedTablesAtBeginning) except RuntimeError as err: pytest.fail("Error executing driver operation: %s" % (err,)) try: drop_table(conn, tablename1) drop_table(conn, tablename2) except mariadb.Error as err: pytest.fail("Error during cleanup: %s" %(err,)) finally: conn.close()
def test_dates(): # setup / create test table conn = create_conn() tablename = 'pymcsapi_dates' drop_table(conn, tablename) exec_stmt( conn, "create table %s (id int, dt date, dtm datetime) engine=columnstore" % (tablename, )) # generate and load a range of dates d = pymcsapi.ColumnStoreDriver() b = d.createBulkInsert('mcsapi', 'pymcsapi_dates', 0, 0) maxDays = 366 + 365 + 1 dt = datetime.datetime(2016, 1, 1, 12, 34, 56) try: for i in range(1, maxDays): b.setColumn(0, i) b.setColumn( 1, pymcsapi.ColumnStoreDateTime(dt.year, dt.month, dt.day)) b.setColumn( 2, pymcsapi.ColumnStoreDateTime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)) b.writeRow() dt += datetime.timedelta(days=1) b.commit() except RuntimeError as err: b.rollback() pytest.fail("Error executing bulk insert: %s" % (err, )) # query and validate values written try: cursor = conn.cursor() cursor.execute("select id, dt, dtm from pymcsapi_dates order by id") exp_i = 1 exp_dt = datetime.date(2016, 1, 1) exp_dtm = datetime.datetime(2016, 1, 1, 12, 34, 56) for (id, dt, dtm) in cursor: assert id == exp_i assert dt == exp_dt assert dtm == exp_dtm exp_i += 1 exp_dt += datetime.timedelta(days=1) exp_dtm += datetime.timedelta(days=1) except mariadb.Error as err: pytest.fail("Error executing test_dates validation query: %s" % (err, )) finally: if cursor: cursor.close() drop_table(conn, tablename) conn.close()
def i1_common(datatype, ch_len): conn = create_conn() tablename = 'pymcsapi_i1' drop_table(conn, tablename) if (datatype == 'char'): exec_stmt( conn, 'create table pymcsapi_i1(i int, ch char(%s)) engine=columnstore' % (ch_len, )) else: exec_stmt( conn, 'create table pymcsapi_i1(i int, ch varchar(%s)) engine=columnstore' % (ch_len, )) d = pymcsapi.ColumnStoreDriver() b = d.createBulkInsert(DB_NAME, tablename, 0, 0) try: b.setColumn(0, 1)[0].setColumn(1, 'ABC')[0].writeRow() b.setColumn(0, 2)[0].setColumn(1, 'A')[0].writeRow() b.setColumn(0, 3)[0].setColumn(1, 'XYZ')[0].writeRow() b.commit() except RuntimeError as err: b.rollback() pytest.fail("Error executing bulk insert: %s" % (err, )) try: cursor = conn.cursor() cursor.execute('select i, ch from pymcsapi_i1 order by i') for (i, ch) in cursor: if (i == 1): assert ch == 'ABC' elif (i == 2): assert ch == 'A' elif (i == 3): assert ch == 'XYZ' else: pytest.fail("invalid i value: %s" % (i, )) except mariadb.Error as err: pytest.fail("Error executing i1_common query: %s" % (err, )) finally: if cursor: cursor.close() drop_table(conn, tablename) conn.close()
def test_inserted(): conn = create_conn() tablename = 'py_summary_ins' rows = 1000 drop_table(conn, tablename) exec_stmt( conn, 'create table if not exists %s(a int, b varchar(10)) engine=columnstore' % (tablename, )) d = pymcsapi.ColumnStoreDriver() b = d.createBulkInsert(DB_NAME, tablename, 0, 0) try: for i in range(0, rows): bulk, status = b.setColumn(0, i) assert status == pymcsapi.CONVERT_STATUS_NONE bulk, status = b.setColumn(1, 'ABC') assert status == pymcsapi.CONVERT_STATUS_NONE bulk.writeRow() b.commit() s = b.getSummary() assert int(s.getRowsInsertedCount()) == rows except RuntimeError as err: b.rollback() pytest.fail("Error executing status row insertion test: %s" % (err, )) try: cursor = conn.cursor() cursor.execute('select count(*) cnt from %s' % (tablename, )) row = cursor.fetchone() assert row is not None assert row[0] == rows except mariadb.Error as err: pytest.fail("Error executing status row insertion query: %s" % (err, )) finally: if cursor: cursor.close() drop_table(conn, tablename) conn.close()
def test_truncated(): conn = create_conn() tablename = 'py_summary_tru' drop_table(conn, tablename) exec_stmt( conn, 'create table if not exists %s(a int, b varchar(10)) engine=columnstore' % (tablename, )) d = pymcsapi.ColumnStoreDriver() b = d.createBulkInsert(DB_NAME, tablename, 0, 0) try: bulk, status = b.setColumn(0, 23) assert status == pymcsapi.CONVERT_STATUS_NONE bulk, status = b.setColumn(1, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') assert status == pymcsapi.CONVERT_STATUS_TRUNCATED bulk.writeRow() b.commit() s = b.getSummary() assert int(s.getTruncationCount()) == 1 except RuntimeError as err: pytest.fail("Error executing status row truncation test: %s" % (err, )) try: cursor = conn.cursor() cursor.execute('select count(*) cnt from %s' % (tablename, )) row = cursor.fetchone() assert row is not None assert row[0] == 1 except mariadb.Error as err: pytest.fail("Error executing status row truncation query: %s" % (err, )) finally: if cursor: cursor.close() drop_table(conn, tablename) conn.close()
def test_is_table_locked(): conn = create_conn() tablename = 'pymcsapi_lock3' drop_table(conn, tablename) exec_stmt(conn, 'CREATE TABLE %s (a int, b int) engine=columnstore' % tablename) d = pymcsapi.ColumnStoreDriver() try: # verify that table is locked b = d.createBulkInsert(DB_NAME, tablename, 0, 0) assert True == d.isTableLocked(DB_NAME, tablename) # verify that table is not locked after rollback b.rollback() assert False == d.isTableLocked(DB_NAME, tablename) except RuntimeError as err: pytest.fail("Error executing driver operation: %s" % (err,)) try: drop_table(conn, tablename) except mariadb.Error as err: pytest.fail("Error during cleanup: %s" %(err,)) finally: conn.close()
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301 USA import sys, pymcsapi, csv if len(sys.argv) != 4: print("required arguments: db-name table-name import-file") sys.exit(2) driver = pymcsapi.ColumnStoreDriver() catalog = driver.getSystemCatalog() table = catalog.getTable(sys.argv[1], sys.argv[2]) columnCount = table.getColumnCount() bulk = driver.createBulkInsert(sys.argv[1], sys.argv[2], 0, 0) with open(sys.argv[3], 'rb') as csvfile: csvreader = csv.reader(csvfile, delimiter='|') for row in csvreader: for col in xrange(0, len(row)): if col < columnCount: bulk.setColumn(col, row[col]) bulk.writeRow() bulk.commit() summary = bulk.getSummary() print("Execution time: %s" % (summary.getExecutionTime(),))
def export(database, table, df, configuration=None): global long python2 = True if sys.version_info[0] == 3: long = int python2 = False rows = df.collect() if configuration == None: driver = pymcsapi.ColumnStoreDriver() else: driver = pymcsapi.ColumnStoreDriver(configuration) bulkInsert = driver.createBulkInsert(database, table, 0, 0) # get the column count of table dbCatalog = driver.getSystemCatalog() dbTable = dbCatalog.getTable(database, table) dbTableColumnCount = dbTable.getColumnCount() # insert row by row into table try: for row in rows: for columnId in range(0, len(row)): if columnId < dbTableColumnCount: if row[columnId] is None: if dbTable.getColumn(columnId).isNullable(): bulkInsert.setNull(columnId) else: print("warning: column %d is not nullable. Using default value instead." % (columnId,)) bulkInsert.setColumn(columnId, dbTable.getColumn(columnId).getDefaultValue()) else: if isinstance(row[columnId], bool): if row[columnId]: bulkInsert.setColumn(columnId, 1) else: bulkInsert.setColumn(columnId, 0) elif isinstance(row[columnId], datetime.date): bulkInsert.setColumn(columnId, row[columnId].strftime('%Y-%m-%d %H:%M:%S')) elif isinstance(row[columnId], decimal.Decimal): dbColumn = dbTable.getColumn(columnId) #DATA_TYPE_DECIMAL, DATA_TYPE_UDECIMAL, DATA_TYPE_FLOAT, DATA_TYPE_UFLOAT, DATA_TYPE_DOUBLE, DATA_TYPE_UDOUBLE if dbColumn.getType() == 4 or dbColumn.getType() == 18 or dbColumn.getType() == 7 or dbColumn.getType() == 21 or dbColumn.getType() == 10 or dbColumn.getType() == 23: s = '{0:f}'.format(row[columnId]) bulkInsert.setColumn(columnId, pymcsapi.ColumnStoreDecimal(s)) #ANY OTHER DATA TYPE else: bulkInsert.setColumn(columnId, long(row[columnId])) #handle python2 unicode strings elif python2 and isinstance(row[columnId], unicode): bulkInsert.setColumn(columnId, row[columnId].encode('utf-8')) #any other datatype is inserted without parsing else: bulkInsert.setColumn(columnId, row[columnId]) bulkInsert.writeRow() bulkInsert.commit() except Exception as e: bulkInsert.rollback() print("An exception occured. The bulk insert was rolled back.") print("row: %s, ingest type: %s, ingest value: %s" % (row[columnId], type(row[columnId]), row[columnId])) print(type(e), str(e)) #print a short summary of the insertion process summary = bulkInsert.getSummary() print("Execution time: %s" % (summary.getExecutionTime(),)) print("Rows inserted: %s" % (summary.getRowsInsertedCount(),)) print("Truncation count: %s" %(summary.getTruncationCount(),)) print("Saturated count: %s" %(summary.getSaturatedCount(),)) print("Invalid count: %s" %(summary.getInvalidCount(),))
def test_all_types(): # create connection for table creation / query conn = create_conn() # create all_types table tablename = 'pymcsapi_all_types' drop_table(conn, tablename) create_all_types = """create table %s ( uint64 bigint unsigned, int64 bigint, uint32 int unsigned, int32 int, uint16 smallint unsigned, int16 smallint, uint8 tinyint unsigned, `int8` tinyint, f float, d double, ch4 char(5), vch30 varchar(30), dt date, dtm datetime, dc decimal(18), tx text ) engine=columnstore""" % (tablename, ) exec_stmt(conn, create_all_types) # load rows into all_types table d = pymcsapi.ColumnStoreDriver() # simple values b = d.createBulkInsert(DB_NAME, tablename, 0, 0) try: b.setColumn(0, long(1)) b.setColumn(1, long(2)) b.setColumn(2, int(3)) b.setColumn(3, int(4)) b.setColumn(4, int(5)) b.setColumn(5, int(6)) b.setColumn(6, int(7)) b.setColumn(7, int(8)) b.setColumn(8, float(1.234)) b.setColumn(9, float(2.34567)) b.setColumn(10, 'ABCD') b.setColumn(11, 'Hello World') b.setColumn(12, pymcsapi.ColumnStoreDateTime(2017, 9, 8)) b.setColumn(13, pymcsapi.ColumnStoreDateTime(2017, 9, 8, 13, 58, 23)) b.setColumn(14, pymcsapi.ColumnStoreDecimal(123)) b.setColumn(15, 'Hello World Longer') b.writeRow() # min values b.setColumn(0, long(0)) b.setColumn(1, long(-9223372036854775806)) b.setColumn(2, int(0)) b.setColumn(3, int(-2147483646)) b.setColumn(4, int(0)) b.setColumn(5, int(-32766)) b.setColumn(6, int(0)) b.setColumn(7, int(-126)) b.setColumn(8, float(1.234)) b.setColumn(9, float(2.34567)) b.setColumn(10, 'A') b.setColumn(11, 'B') b.setColumn(12, pymcsapi.ColumnStoreDateTime(1000, 1, 1)) b.setColumn(13, pymcsapi.ColumnStoreDateTime(1000, 1, 1, 0, 0, 0)) b.setColumn(14, pymcsapi.ColumnStoreDecimal(-123)) b.setColumn(15, 'C') b.writeRow() # max values b.setColumn(0, long(9223372036854775807)) b.setColumn(1, long(9223372036854775807)) # python long is signed b.setColumn(2, int(4294967293)) b.setColumn(3, int(2147483647)) b.setColumn(4, int(65533)) b.setColumn(5, int(32767)) b.setColumn(6, int(253)) b.setColumn(7, int(127)) b.setColumn(8, float(1.234)) b.setColumn(9, float(2.34567)) b.setColumn(10, 'ZYXW') b.setColumn(11, '012345678901234567890123456789') b.setColumn(12, pymcsapi.ColumnStoreDateTime(9999, 12, 31)) b.setColumn(13, pymcsapi.ColumnStoreDateTime(9999, 12, 31, 23, 59, 59)) b.setColumn(14, pymcsapi.ColumnStoreDecimal(123)) b.setColumn(15, '012345678901234567890123456789') b.writeRow() b.commit() except RuntimeError as err: b.rollback() pytest.fail("Error executing bulk insert: %s" % (err, )) # verify data all_types_validate( conn, 1, "1, 2, 3, 4, 5, 6, 7, 8, 1.234, 2.34567, ABCD, Hello World, 2017-09-08, 2017-09-08 13:58:23, 123, Hello World Longer" ) all_types_validate( conn, 0, "0, -9223372036854775806, 0, -2147483646, 0, -32766, 0, -126, 1.234, 2.34567, A, B, 1000-01-01, 1000-01-01 00:00:00, -123, C" ) all_types_validate( conn, 9223372036854775807, "9223372036854775807, 9223372036854775807, 4294967293, 2147483647, 65533, 32767, 253, 127, 1.234, 2.34567, ZYXW, 012345678901234567890123456789, 9999-12-31, 9999-12-31 23:59:59, 123, 012345678901234567890123456789" ) drop_table(conn, tablename) conn.close()
def test_clear_table_lock(): conn = create_conn() tablename = 'pymcsapi_lock4' drop_table(conn, tablename) exec_stmt(conn, 'CREATE TABLE %s (a int, b int) engine=columnstore' % tablename) d = pymcsapi.ColumnStoreDriver() try: lockedTablesAtBeginning = len(d.listTableLocks()) # initiate ingestion and verify that tablename is locked b1 = d.createBulkInsert(DB_NAME, tablename, 0, 0) for i in range(0, 100001): b1.setColumn(0, 0) b1.setColumn(1, 1) b1.writeRow() assert True == d.isTableLocked(DB_NAME, tablename) d.clearTableLock(DB_NAME, tablename) # verify that tablename is not locked after clearTableLock assert False == d.isTableLocked(DB_NAME, tablename) # use bulk 2 to ingest one row b2 = d.createBulkInsert(DB_NAME, tablename, 0, 0) b2.setColumn(0, 23) b2.setColumn(1, 42) b2.writeRow() lks = d.listTableLocks() b2.commit() # verify that tablename is not locked after commit of bulk2 assert False == d.isTableLocked(DB_NAME, tablename) # verify that an error is thrown if lock does not exist oid = d.getSystemCatalog().getTable(DB_NAME, tablename).getOID() lockId = -1 for l in lks: if l.tableOID == oid: lockId = l.id assert lockId > -1 with pytest.raises(RuntimeError) as exceptionInfo: d.clearTableLock(lockId) assert exceptionInfo.value.args[0] == "Error, no lock found for lockId: %d" % (lockId,) except RuntimeError as err: pytest.fail("Error caught: %s" % (err,)) # verify that only one row was written from bulk2 and bulk1's write was rolled back try: cursor = conn.cursor() cursor.execute('select count(*) cnt from %s' % tablename) row = cursor.fetchone() assert row is not None assert row[0] == 1 cursor.execute('select b from %s WHERE a=23' % tablename) row = cursor.fetchone() assert row is not None assert row[0] == 42 drop_table(conn, tablename) except mariadb.Error as err: pytest.fail("Error executing query: %s" %(err,)) finally: if cursor: cursor.close() conn.close()
def test_exception_thrown(): with pytest.raises(RuntimeError) as exceptionInfo: driver = pymcsapi.ColumnStoreDriver() catalog = driver.getSystemCatalog() table = catalog.getTable("test", "nonexistent") assert exceptionInfo.value.args[0] == 'test.nonexistent not found'