def test_insert_select(self, engine, connection): one_row = Table('one_row', MetaData(bind=engine), autoload=True) table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer)) table.drop(checkfirst=True) table.create() connection.execute(table.insert().from_select(['a'], one_row.select())) result = table.select().execute().fetchall() expected = [(1, )] self.assertEqual(result, expected)
def test_insert_select(self, engine, connection): one_row = Table('one_row', MetaData(bind=engine), autoload=True) table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer), schema='pyhive_test_database') table.drop(checkfirst=True) table.create() connection.execute('SET mapred.job.tracker=local') # NOTE(jing) I'm stuck on a version of Hive without INSERT ... VALUES connection.execute(table.insert().from_select(['a'], one_row.select())) result = table.select().execute().fetchall() expected = [(1,)] self.assertEqual(result, expected)
def test_engine_with_dataset(engine_using_test_dataset): rows = engine_using_test_dataset.execute('SELECT * FROM sample_one_row').fetchall() assert list(rows[0]) == ONE_ROW_CONTENTS table_one_row = Table('sample_one_row', MetaData(bind=engine_using_test_dataset), autoload=True) rows = table_one_row.select().execute().fetchall() assert list(rows[0]) == ONE_ROW_CONTENTS_EXPANDED table_one_row = Table('test_pybigquery.sample_one_row', MetaData(bind=engine_using_test_dataset), autoload=True) rows = table_one_row.select().execute().fetchall() # verify that we are pulling from the specifically-named dataset, # instead of pulling from the default dataset of the engine (which # does not have this table at all) assert list(rows[0]) == ONE_ROW_CONTENTS_EXPANDED
def test_insert_select(self, engine, connection): one_row = Table('one_row', MetaData(bind=engine), autoload=True) table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer), schema='pyhive_test_database') table.drop(checkfirst=True) table.create() connection.execute('SET mapred.job.tracker=local') # NOTE(jing) I'm stuck on a version of Hive without INSERT ... VALUES connection.execute(table.insert().from_select(['a'], one_row.select())) result = table.select().execute().fetchall() expected = [(1,)] self.assertEqual(result, expected)
def test_reflect_select(self, engine, connection): """reflecttable should be able to fill in a table from the name""" one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True) self.assertEqual(len(one_row_complex.c), 14) self.assertIsInstance(one_row_complex.c.string, Column) row = one_row_complex.select().execute().fetchone() self.assertEqual(list(row), _ONE_ROW_COMPLEX_CONTENTS) # TODO some of these types could be filled in better self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean) self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer) self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer) self.assertIsInstance(one_row_complex.c.int.type, types.Integer) self.assertIsInstance(one_row_complex.c.bigint.type, types.BigInteger) self.assertIsInstance(one_row_complex.c.float.type, types.Float) self.assertIsInstance(one_row_complex.c.double.type, types.Float) self.assertIsInstance(one_row_complex.c.string.type, types.String) self.assertIsInstance(one_row_complex.c.timestamp.type, types.TIMESTAMP) self.assertIsInstance(one_row_complex.c.binary.type, types.String) self.assertIsInstance(one_row_complex.c.array.type, types.String) self.assertIsInstance(one_row_complex.c.map.type, types.String) self.assertIsInstance(one_row_complex.c.struct.type, types.String) self.assertIsInstance(one_row_complex.c.decimal.type, types.DECIMAL)
def test_one_row_complex_null(self, engine, connection): one_row_complex_null = Table('one_row_complex_null', MetaData(bind=engine), autoload=True) rows = one_row_complex_null.select().execute().fetchall() self.assertEqual(len(rows), 1) self.assertEqual(list(rows[0]), [None] * len(rows[0]))
def test_reflect_select(self, engine, connection): """reflecttable should be able to fill in a table from the name""" one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True) self.assertEqual(len(one_row_complex.c), 15) self.assertIsInstance(one_row_complex.c.string, Column) rows = one_row_complex.select().execute().fetchall() self.assertEqual(len(rows), 1) self.assertEqual(list(rows[0]), _ONE_ROW_COMPLEX_CONTENTS) try: from sqlalchemy.types import BigInteger except ImportError: from sqlalchemy.databases.mysql import MSBigInteger as BigInteger # TODO some of these types could be filled in better self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean) self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer) self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer) self.assertIsInstance(one_row_complex.c.int.type, types.Integer) self.assertIsInstance(one_row_complex.c.bigint.type, BigInteger) self.assertIsInstance(one_row_complex.c.float.type, types.Float) self.assertIsInstance(one_row_complex.c.double.type, types.Float) self.assertIsInstance(one_row_complex.c.string.type, types.String) self.assertIsInstance(one_row_complex.c.timestamp.type, HiveTimestamp) self.assertIsInstance(one_row_complex.c.binary.type, types.String) self.assertIsInstance(one_row_complex.c.array.type, types.String) self.assertIsInstance(one_row_complex.c.map.type, types.String) self.assertIsInstance(one_row_complex.c.struct.type, types.String) self.assertIsInstance(one_row_complex.c.union.type, types.String) self.assertIsInstance(one_row_complex.c.decimal.type, HiveDecimal)
def test_reflect_select(self, engine, connection): """reflecttable should be able to fill in a table from the name""" one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True) # Presto ignores the union and decimal columns self.assertEqual(len(one_row_complex.c), 15 - 2) self.assertIsInstance(one_row_complex.c.string, Column) rows = one_row_complex.select().execute().fetchall() self.assertEqual(len(rows), 1) self.assertEqual( list(rows[0]), [ True, 127, 32767, 2147483647, 9223372036854775807, 0.5, 0.25, 'a string', 0, '123', '[1,2]', '{"1":2,"3":4}', # Presto converts all keys to strings so that they're valid JSON '{"a":1,"b":2}', #'{0:1}', #0.1, ])
def upgrade(migrate_engine): meta.bind = migrate_engine records_table = Table('records', meta, autoload=True) # Add the hash column, start with allowing NULLs hash_column = Column('hash', String(32), nullable=True, default=None, unique=True) hash_column.create(records_table, unique_name='unique_record') sync_domains = [] # Fill out the hash values. We need to do this in a way that lets us track # which domains need to be re-synced, so having the DB do this directly # won't work. for record in records_table.select().execute(): try: records_table.update()\ .where(records_table.c.id == record.id)\ .values(hash=_build_hash(record))\ .execute() except IntegrityError: if record.domain_id not in sync_domains: sync_domains.append(record.domain_id) LOG.warn("Domain '%s' needs to be synchronised" % record.domain_id) records_table.delete()\ .where(records_table.c.id == record.id)\ .execute() # Finally, the column should not be nullable. records_table.c.hash.alter(nullable=False)
def test_reflect_select(self, engine, connection): """reflecttable should be able to fill in a table from the name""" one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True) self.assertEqual(len(one_row_complex.c), 15) self.assertIsInstance(one_row_complex.c.string, Column) rows = one_row_complex.select().execute().fetchall() self.assertEqual(len(rows), 1) self.assertEqual(list(rows[0]), _ONE_ROW_COMPLEX_CONTENTS) try: from sqlalchemy.types import BigInteger except ImportError: from sqlalchemy.databases.mysql import MSBigInteger as BigInteger # TODO some of these types could be filled in better self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean) self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer) self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer) self.assertIsInstance(one_row_complex.c.int.type, types.Integer) self.assertIsInstance(one_row_complex.c.bigint.type, BigInteger) self.assertIsInstance(one_row_complex.c.float.type, types.Float) self.assertIsInstance(one_row_complex.c.double.type, types.Float) self.assertIsInstance(one_row_complex.c.string.type, types.String) self.assertIsInstance(one_row_complex.c.timestamp.type, HiveTimestamp) self.assertIsInstance(one_row_complex.c.binary.type, types.String) self.assertIsInstance(one_row_complex.c.array.type, types.String) self.assertIsInstance(one_row_complex.c.map.type, types.String) self.assertIsInstance(one_row_complex.c.struct.type, types.String) self.assertIsInstance(one_row_complex.c.union.type, types.String) self.assertIsInstance(one_row_complex.c.decimal.type, HiveDecimal)
def downgrade(migrate_engine): meta = MetaData() meta.bind = migrate_engine tasks_table = Table('tasks', meta, autoload=True) task_info_table = Table('task_info', meta, autoload=True) for col_name in TASKS_MIGRATE_COLUMNS: column = Column(col_name, Text()) column.create(tasks_table) task_info_records = task_info_table.select().execute().fetchall() for task_info in task_info_records: values = { 'input': task_info.input, 'result': task_info.result, 'message': task_info.message } tasks_table\ .update(values=values)\ .where(tasks_table.c.id == task_info.task_id)\ .execute() drop_tables([task_info_table])
def downgrade(migrate_engine): meta = MetaData() meta.bind = migrate_engine tasks_table = Table('tasks', meta, autoload=True) task_info_table = Table('task_info', meta, autoload=True) for col_name in TASKS_MIGRATE_COLUMNS: column = Column(col_name, Text()) column.create(tasks_table) task_info_records = task_info_table.select().execute().fetchall() for task_info in task_info_records: values = { 'input': task_info.input, 'result': task_info.result, 'message': task_info.message } tasks_table\ .update(values=values)\ .where(tasks_table.c.id == task_info.task_id)\ .execute() drop_tables([task_info_table])
def test_reflect_select(self, engine, connection): """reflecttable should be able to fill in a table from the name""" one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True) # Presto ignores the union and decimal columns self.assertEqual(len(one_row_complex.c), 15 - 2) self.assertIsInstance(one_row_complex.c.string, Column) rows = one_row_complex.select().execute().fetchall() self.assertEqual(len(rows), 1) self.assertEqual(list(rows[0]), [ True, 127, 32767, 2147483647, 9223372036854775807, 0.5, 0.25, 'a string', '1970-01-01 00:00:00.000', '123', [1, 2], {"1": 2, "3": 4}, # Presto converts all keys to strings so that they're valid JSON [1, 2], # struct is returned as a list of elements #'{0:1}', #0.1, ])
def upgrade(migrate_engine): meta.bind = migrate_engine records_table = Table('records', meta, autoload=True) # Add the hash column, start with allowing NULLs hash_column = Column('hash', String(32), nullable=True, default=None, unique=True) hash_column.create(records_table, unique_name='unique_record') sync_domains = [] # Fill out the hash values. We need to do this in a way that lets us track # which domains need to be re-synced, so having the DB do this directly # won't work. for record in records_table.select().execute(): try: records_table.update()\ .where(records_table.c.id == record.id)\ .values(hash=_build_hash(record))\ .execute() except IntegrityError: if record.domain_id not in sync_domains: sync_domains.append(record.domain_id) LOG.warn(_LW("Domain '%s' needs to be synchronised") % record.domain_id) records_table.delete()\ .where(records_table.c.id == record.id)\ .execute() # Finally, the column should not be nullable. records_table.c.hash.alter(nullable=False)
def check_ckan_login(request): """ Connect to a specified CKAN database via SQLAlchemy and try to find the user that is authenticating. """ db_uri = app.config.get('CKAN_DB_URI') if db_uri is None: log.warn("No CKAN_DB_URI given, cannot authenticate!") return False if 'Authorization' in request.headers: authorization = request.headers.get('Authorization') authorization = authorization.split(' ', 1)[-1] user, password = authorization.decode('base64').split(':', 1) engine = create_engine(db_uri, poolclass=NullPool) meta = MetaData() meta.bind = engine table = Table('user', meta, autoload=True) results = engine.execute(table.select(table.c.name == user)) # TODO: check for multiple matches, never trust ckan. record = results.first() if record is not None and check_hashed_password( password, record['password']): return user raise WebstoreException('Invalid username or password!', None, state='error', code=401) return None
def copy_table(self, table_to_copy, database_in, database_out, new_table_name=None, use_chunking=True, memory_in_gigabytes=2): in_metadata = database_in.metadata out_metadata = database_out.metadata in_table = Table(table_to_copy, in_metadata, autoload=True) out_table = in_table.tometadata(out_metadata) if new_table_name is not None: out_table.rename(new_table_name) out_metadata.create_all() if use_chunking: memory_in_bytes = memory_in_gigabytes * 1024**2 row_size = self._row_byte_size(in_table) chunk_size = int(.1 * memory_in_bytes / row_size) num_rows = in_table.count().execute().fetchone()[0] num_inserted_rows = 0 while num_inserted_rows < num_rows: qry = in_table.select().offset(num_inserted_rows).limit( chunk_size) result = qry.execute() data = [ dict((col.key, x[col.name]) for col in in_table.c) for x in result ] out_table.insert().execute(*data) num_inserted_rows += len(data) else: qry = in_table.select() data = [ dict((col.key, x[col.name]) for col in in_table.c) for x in qry.execute() ] out_table.insert().execute(*data)
def test_create_view(self): """ Test the creation of a view construct a view that: 1) joins two tables together 2) sums one column 3) assigns a new label to a column 4) groups by user """ url = connection_string() db = db_connect_impl(DatabaseTablesV0, url, False) t1 = db.tables.TestTable1 t2 = db.tables.TestTable2 query = select([ t1.c.user_id, func.sum(t1.c.size).label('usage'), t2.c.quota, ]).select_from(t2) \ .where(t1.c.user_id == t2.c.user_id) \ .group_by(t1.c.user_id) db.engine.execute(CreateView('v_usage_v0', query)) db.metadata.bind = db.engine db.metadata.create_all() view = Table('v_usage_v0', db.metadata, Column('user_id', String), Column('usage', Integer), Column('quota', Integer)) # insert 3 users with different data for i, user_id in enumerate(["user0", "user1", "user2"]): db.tables.TestTable1.insert().values({ 'user_id': user_id, 'size': 5 * (i + 1) + 100 }).execute() db.tables.TestTable1.insert().values({ 'user_id': user_id, 'size': 7 * (i + 1) + 100 }).execute() db.tables.TestTable1.insert().values({ 'user_id': user_id, 'size': 9 * (i + 1) + 100 }).execute() db.tables.TestTable2.insert().values({ 'user_id': user_id, 'quota': 400 * (i + 1) }).execute() print(dir(view)) for row in view.select().where(view.c.user_id == 'user0').execute(): self.assertEqual(row.usage, 321) self.assertEqual(row.quota, 400)
def test_reflect_select(self, engine, connection): """reflecttable should be able to fill in a table from the name""" one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True) self.assertEqual(len(one_row_complex.c), 15) self.assertIsInstance(one_row_complex.c.string, Column) rows = one_row_complex.select().execute().fetchall() self.assertEqual(len(rows), 1) self.assertEqual(list(rows[0]), _ONE_ROW_COMPLEX_CONTENTS)
def test_reflect_select(self, engine, connection): """reflecttable should be able to fill in a table from the name""" one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True) self.assertEqual(len(one_row_complex.c), 15) self.assertIsInstance(one_row_complex.c.string, Column) rows = one_row_complex.select().execute().fetchall() self.assertEqual(len(rows), 1) self.assertEqual(list(rows[0]), _ONE_ROW_COMPLEX_CONTENTS)
def test_reserved_words(self, engine, connection): """Hive uses backticks""" # Use keywords for the table/column name fake_table = Table('select', MetaData(bind=engine), Column('map', sqlalchemy.types.String)) query = str(fake_table.select(fake_table.c.map == 'a')) self.assertIn('`select`', query) self.assertIn('`map`', query) self.assertNotIn('"select"', query) self.assertNotIn('"map"', query)
def test_reserved_words(self, engine, connection): """Presto uses double quotes, not backticks""" # Use keywords for the table/column name fake_table = Table('select', MetaData(bind=engine), Column('current_timestamp', String)) query = str(fake_table.select(fake_table.c.current_timestamp == 'a')) self.assertIn('"select"', query) self.assertIn('"current_timestamp"', query) self.assertNotIn('`select`', query) self.assertNotIn('`current_timestamp`', query)
def test_reserved_words(self, engine, connection): """Hive uses backticks""" # Use keywords for the table/column name fake_table = Table('select', MetaData(bind=engine), Column('map', sqlalchemy.types.String)) query = str(fake_table.select(fake_table.c.map == 'a')) self.assertIn('`select`', query) self.assertIn('`map`', query) self.assertNotIn('"select"', query) self.assertNotIn('"map"', query)
def test_insert_values(self, engine, connection): table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer)) table.drop(checkfirst=True) table.create() connection.execute(table.insert([{'a': 1}, {'a': 2}])) result = table.select().execute().fetchall() expected = [(1, ), (2, )] self.assertEqual(result, expected)
def query(self): query = self.config.get('query') if not query: table_name = self.config.get('table') table = Table(table_name, self.meta, autoload=True) query = table.select() else: query = sql_text(query) log.info("Query: %s", query) return query
def test_reserved_words(self, engine, connection): """Presto uses double quotes, not backticks""" # Use keywords for the table/column name fake_table = Table('select', MetaData(bind=engine), Column('current_timestamp', String)) query = str(fake_table.select(fake_table.c.current_timestamp == 'a')) self.assertIn('"select"', query) self.assertIn('"current_timestamp"', query) self.assertNotIn('`select`', query) self.assertNotIn('`current_timestamp`', query)
def query(self): query = self.config.get('query') if not query: table_name = self.config.get('table') table = Table(table_name, self.meta, autoload=True) query = table.select() else: query = sql_text(query) log.info("Query: %s", query) return query
def test_insert_values(self, engine, connection): table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer), schema='pyhive_test_database') table.drop(checkfirst=True) table.create() connection.execute(table.insert([{'a': 1}, {'a': 2}])) result = table.select().execute().fetchall() expected = [(1,), (2,)] self.assertEqual(result, expected)
def test_reflect_select(self, engine, connection): """reflecttable should be able to fill in a table from the name""" one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True) # Presto ignores the union column self.assertEqual(len(one_row_complex.c), 15 - 1) self.assertIsInstance(one_row_complex.c.string, Column) rows = one_row_complex.select().execute().fetchall() self.assertEqual(len(rows), 1) self.assertEqual( list(rows[0]), [ True, 127, 32767, 2147483647, 9223372036854775807, 0.5, 0.25, 'a string', '1970-01-01 00:00:00.000', b'123', [1, 2], { "1": 2, "3": 4 }, # Presto converts all keys to strings so that they're valid JSON [1, 2], # struct is returned as a list of elements # '{0:1}', '0.1', ]) try: from sqlalchemy.types import BigInteger except ImportError: from sqlalchemy.databases.mysql import MSBigInteger as BigInteger # TODO some of these types could be filled in better self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean) self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer) self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer) self.assertIsInstance(one_row_complex.c.int.type, types.Integer) self.assertIsInstance(one_row_complex.c.bigint.type, BigInteger) self.assertIsInstance(one_row_complex.c.float.type, types.Float) self.assertIsInstance(one_row_complex.c.double.type, types.Float) self.assertIsInstance(one_row_complex.c.string.type, String) self.assertIsInstance(one_row_complex.c.timestamp.type, types.TIMESTAMP) self.assertIsInstance(one_row_complex.c.binary.type, types.NullType) self.assertIsInstance(one_row_complex.c.array.type, types.NullType) self.assertIsInstance(one_row_complex.c.map.type, types.NullType) self.assertIsInstance(one_row_complex.c.struct.type, types.NullType) self.assertIsInstance(one_row_complex.c.decimal.type, types.NullType)
def test_migrate_data(self): meta = MetaData(bind=self.engine) # create TableA table_a = Table('TableA', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False), Column('second', Integer)) table_a.create() # update it with sample data values = [ {'id': 1, 'first': 'a'}, {'id': 2, 'first': 'b'}, {'id': 3, 'first': 'c'} ] for value in values: self.engine.execute(table_a.insert(values=value)) # create TableB similar to TableA, except column 'second' table_b = Table('TableB', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False)) table_b.create() # migrate data migrate_utils.migrate_data(self.engine, table_a, table_b, ['second']) # validate table_a is dropped self.assertTrue(self.engine.dialect.has_table( self.engine.connect(), 'TableA'), 'Data migration failed to drop source table') # validate table_b is updated with data from table_a table_b_rows = list(table_b.select().execute()) self.assertEqual(3, len(table_b_rows), "Data migration is failed") table_b_values = [] for row in table_b_rows: table_b_values.append({'id': row.id, 'first': row.first}) self.assertEqual(values, table_b_values, "Data migration failed with invalid data copy")
def copy_table(self, table_to_copy, database_in, database_out, new_table_name = None, use_chunking = True, memory_in_gigabytes = 2): in_metadata = database_in.metadata out_metadata = database_out.metadata in_table = Table(table_to_copy, in_metadata, autoload=True) out_table = in_table.tometadata(out_metadata) if new_table_name is not None: out_table.rename(new_table_name) out_metadata.create_all() if use_chunking: memory_in_bytes = memory_in_gigabytes * 1024**2 row_size = self._row_byte_size(in_table) chunk_size = int( .1 * memory_in_bytes / row_size ) num_rows = in_table.count().execute().fetchone()[0] num_inserted_rows = 0 while num_inserted_rows < num_rows: qry = in_table.select().offset(num_inserted_rows).limit(chunk_size) result = qry.execute() data = [ dict( (col.key, x[col.name]) for col in in_table.c) for x in result ] out_table.insert().execute(*data) num_inserted_rows += len(data) else: qry = in_table.select() data = [ dict( (col.key, x[col.name]) for col in in_table.c) for x in qry.execute() ] out_table.insert().execute(*data)
def beforOutLog(self, logLevel, farmNo, componentNo, componentName, instanceNo, instanceName, code, additions): farmT = Table("FARM", METADATA, autoload=True) farm = self.selectOne(farmT.select(farmT.c.FARM_NO==farmNo)) farmName = None if farm is not None: farmName = farm["FARM_NAME"] userT = Table("USER", METADATA, autoload=True) user = self.selectOne(userT.select(userT.c.USER_NO==self.userNo)) userName = None if user is not None: userName = user["USERNAME"] instanceType = None platformNo = None if instanceNo is not None: instanceT = Table("INSTANCE", METADATA, autoload=True) instance = self.selectOne(instanceT.select(instanceT.c.INSTANCE_NO==instanceNo)) if instance is not None: platformNo = instance["PLATFORM_NO"] isSelect = True if isSelect: awsinstanceT = Table("AWS_INSTANCE", METADATA, autoload=True) awsinstance = self.selectOne(awsinstanceT.select(awsinstanceT.c.INSTANCE_NO==instanceNo)) if awsinstance is not None: instanceType = awsinstance["INSTANCE_TYPE"] isSelect = False if isSelect: csinstanceT = Table("CLOUDSTACK_INSTANCE", METADATA, autoload=True) csinstance = self.selectOne(csinstanceT.select(csinstanceT.c.INSTANCE_NO==instanceNo)) if csinstance is not None: instanceType = csinstance["INSTANCE_TYPE"] isSelect = False if isSelect: vminstanceT = Table("VMWARE_INSTANCE", METADATA, autoload=True) vminstance = self.selectOne(vminstanceT.select(vminstanceT.c.INSTANCE_NO==instanceNo)) if vminstance is not None: instanceType = vminstance["INSTANCE_TYPE"] isSelect = False if isSelect: nifinstanceT = Table("NIFTY_INSTANCE", METADATA, autoload=True) nifinstance = self.selectOne(nifinstanceT.select(nifinstanceT.c.INSTANCE_NO==instanceNo)) if nifinstance is not None: instanceType = nifinstance["INSTANCE_TYPE"] isSelect = False self.outLog(logLevel, self.userNo, userName, farmNo, farmName, componentNo, componentName, instanceNo, instanceName, code, instanceType, platformNo, additions)
def test_reflect_select(self, engine, connection): """reflecttable should be able to fill in a table from the name""" one_row_complex = Table("one_row_complex", MetaData(bind=engine), autoload=True) # Presto ignores the union column self.assertEqual(len(one_row_complex.c), 15 - 1) self.assertIsInstance(one_row_complex.c.string, Column) rows = one_row_complex.select().execute().fetchall() self.assertEqual(len(rows), 1) self.assertEqual( list(rows[0]), [ True, 127, 32767, 2147483647, 9223372036854775807, 0.5, 0.25, "a string", "1970-01-01 00:00:00.000", b"123", [1, 2], {"1": 2, "3": 4}, # Presto converts all keys to strings so that they're valid JSON [1, 2], # struct is returned as a list of elements # '{0:1}', "0.1", ], ) try: from sqlalchemy.types import BigInteger except ImportError: from sqlalchemy.databases.mysql import MSBigInteger as BigInteger # TODO some of these types could be filled in better self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean) self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer) self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer) self.assertIsInstance(one_row_complex.c.int.type, types.Integer) self.assertIsInstance(one_row_complex.c.bigint.type, BigInteger) self.assertIsInstance(one_row_complex.c.float.type, types.Float) self.assertIsInstance(one_row_complex.c.double.type, types.Float) self.assertIsInstance(one_row_complex.c.string.type, String) self.assertIsInstance(one_row_complex.c.timestamp.type, types.TIMESTAMP) self.assertIsInstance(one_row_complex.c.binary.type, types.NullType) self.assertIsInstance(one_row_complex.c.array.type, types.NullType) self.assertIsInstance(one_row_complex.c.map.type, types.NullType) self.assertIsInstance(one_row_complex.c.struct.type, types.NullType) self.assertIsInstance(one_row_complex.c.decimal.type, types.NullType)
class TableHandler(object): """ Used by automatically generated objects such as datasets and dimensions to generate, write and clear the table under its management. """ def _init_table(self, meta, namespace, name, id_type=Integer): """ Create the given table if it does not exist, otherwise reflect the current table schema from the database. """ name = namespace + '__' + name self.table = Table(name, meta) if id_type is not None: col = Column('id', id_type, primary_key=True) self.table.append_column(col) def _generate_table(self): """ Create the given table if it does not exist. """ # TODO: make this support some kind of migration? if not db.engine.has_table(self.table.name): self.table.create(db.engine) def _upsert(self, bind, data, unique_columns): """ Upsert a set of values into the table. This will query for the set of unique columns and either update an existing row or create a new one. In both cases, the ID of the changed row will be returned. """ key = and_(*[self.table.c[c] == data.get(c) for c in unique_columns]) q = self.table.update(key, data) if bind.execute(q).rowcount == 0: q = self.table.insert(data) rs = bind.execute(q) return rs.inserted_primary_key[0] else: q = self.table.select(key) row = bind.execute(q).fetchone() return row['id'] def _flush(self, bind): """ Delete all rows in the table. """ q = self.table.delete() bind.execute(q) def _drop(self, bind): """ Drop the table and the local reference to it. """ if db.engine.has_table(self.table.name): self.table.drop() del self.table
def test_migrate_data(self): meta = MetaData(bind=self.engine) # create TableA table_a = Table('TableA', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False), Column('second', Integer)) table_a.create() # update it with sample data values = [{ 'id': 1, 'first': 'a' }, { 'id': 2, 'first': 'b' }, { 'id': 3, 'first': 'c' }] for value in values: self.engine.execute(table_a.insert(values=value)) # create TableB similar to TableA, except column 'second' table_b = Table('TableB', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False)) table_b.create() # migrate data migrate_utils.migrate_data(self.engine, table_a, table_b, ['second']) # validate table_a is dropped self.assertTrue( self.engine.dialect.has_table(self.engine.connect(), 'TableA'), 'Data migration failed to drop source table') # validate table_b is updated with data from table_a table_b_rows = list(table_b.select().execute()) self.assertEqual(3, len(table_b_rows), "Data migration is failed") table_b_values = [] for row in table_b_rows: table_b_values.append({'id': row.id, 'first': row.first}) self.assertEqual(values, table_b_values, "Data migration failed with invalid data copy")
def test_engine_with_dataset(engine_using_test_dataset): rows = engine_using_test_dataset.execute( 'SELECT * FROM sample_one_row').fetchall() assert list(rows[0]) == ONE_ROW_CONTENTS table_one_row = Table('sample_one_row', MetaData(bind=engine_using_test_dataset), autoload=True) rows = table_one_row.select().execute().fetchall() assert list(rows[0]) == ONE_ROW_CONTENTS_EXPANDED # Table name shouldn't include dataset with pytest.raises(Exception): table_one_row = Table('test_pybigquery.sample_one_row', MetaData(bind=engine_using_test_dataset), autoload=True)
class TableHandler(object): """ Used by automatically generated objects such as datasets and dimensions to generate, write and clear the table under its management. """ def _init_table(self, meta, namespace, name, id_type=Integer): """ Create the given table if it does not exist, otherwise reflect the current table schema from the database. """ name = namespace + '__' + name self.table = Table(name, meta) if id_type is not None: col = Column('id', id_type, primary_key=True) self.table.append_column(col) def _generate_table(self): """ Create the given table if it does not exist. """ # TODO: make this support some kind of migration? if not db.engine.has_table(self.table.name): self.table.create(db.engine) def _upsert(self, bind, data, unique_columns): """ Upsert a set of values into the table. This will query for the set of unique columns and either update an existing row or create a new one. In both cases, the ID of the changed row will be returned. """ key = and_(*[self.table.c[c] == data.get(c) for c in unique_columns]) q = self.table.update(key, data) if bind.execute(q).rowcount == 0: q = self.table.insert(data) rs = bind.execute(q) return rs.inserted_primary_key[0] else: q = self.table.select(key) row = bind.execute(q).fetchone() return row['id'] def _flush(self, bind): """ Delete all rows in the table. """ q = self.table.delete() bind.execute(q) def _drop(self, bind): """ Drop the table and the local reference to it. """ if db.engine.has_table(self.table.name): self.table.drop() del self.table
def unload(self, schema: str, table: str, unload_plan: RecordsUnloadPlan, directory: RecordsDirectory) -> None: if not isinstance(unload_plan.records_format, DelimitedRecordsFormat): raise NotImplementedError( "This only supports delimited mode for now") unhandled_hints = set(unload_plan.records_format.hints.keys()) processing_instructions = unload_plan.processing_instructions date_output_style, date_order_style, postgres_options =\ postgres_copy_to_options(unhandled_hints, unload_plan.records_format, processing_instructions.fail_if_cant_handle_hint) if date_order_style is None: # U-S-A! U-S-A! date_order_style = 'MDY' complain_on_unhandled_hints( processing_instructions.fail_if_dont_understand, unhandled_hints, unload_plan.records_format.hints) table_obj = Table(table, self.meta, schema=schema, autoload=True, autoload_with=self.db) with self.db.engine.begin() as conn: # https://www.postgresql.org/docs/8.3/sql-set.html # # The effects of SET LOCAL last only till the end of the # current transaction, whether committed or not. A special # case is SET followed by SET LOCAL within a single # transaction: the SET LOCAL value will be seen until the end # of the transaction, but afterwards (if the transaction is # committed) the SET value will take effect. date_style = f"{date_output_style}, {date_order_style}" sql = f"SET LOCAL DateStyle = {quote_value(conn, date_style)}" logger.info(sql) conn.execute(sql) filename = unload_plan.records_format.generate_filename('data') loc = directory.loc.file_in_this_directory(filename) with loc.open(mode='wb') as fileobj: copy_to(table_obj.select(), fileobj, conn, **postgres_options) logger.info('Copy complete') directory.save_preliminary_manifest()
def get_data(table="europe_load_dt", schema="agg_avg_1hour_pdata_caps_eu2020", metadata=None): if metadata == None: engine, inspector, metadata = connect2database(localhost=True) data = Table(table, metadata, schema=schema, autoload=True) s = data.select() rs = s.execute() data = list(empty(rs.rowcount)) for i in arange(rs.rowcount): data[i] = list(rs.fetchone()) return_data = transposed(data) # data = rs.fetchall() return return_data
def check_ckan_login(user, password): """ Connect to a specified CKAN database via SQLAlchemy and try to find the user that is authenticating. """ db_uri = app.config.get('CKAN_DB_URI') if db_uri is None: log.warn("No CKAN_DB_URI given, cannot authenticate!") return False engine = create_engine(db_uri, poolclass=NullPool) meta = MetaData() meta.bind = engine table = Table('user', meta, autoload=True) results = engine.execute(table.select(table.c.name==user)) # TODO: check for multiple matches, never trust ckan. record = results.first() if record is not None: return check_hashed_password(password, record['password']) return False
def test_lots_of_types(self, engine, connection): # Presto doesn't have raw CREATE TABLE support, so we ony test hive # take type list from sqlalchemy.types types = [ 'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT', 'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB', 'BOOLEAN', 'SMALLINT', 'DATE', 'TIME', 'String', 'Integer', 'SmallInteger', 'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'Binary', 'Boolean', 'Unicode', 'UnicodeText', ] cols = [] for i, t in enumerate(types): cols.append(Column(str(i), getattr(sqlalchemy.types, t))) cols.append(Column('hive_date', HiveDate)) cols.append(Column('hive_decimal', HiveDecimal)) cols.append(Column('hive_timestamp', HiveTimestamp)) table = Table('test_table', MetaData(bind=engine), *cols, schema='pyhive_test_database') table.drop(checkfirst=True) table.create() connection.execute('SET mapred.job.tracker=local') connection.execute('USE pyhive_test_database') big_number = 10 ** 10 - 1 connection.execute(""" INSERT OVERWRITE TABLE test_table SELECT 1, "a", "a", "a", "a", "a", 0.1, 0.1, 0.1, 0, 0, "a", "a", false, 1, 0, 0, "a", 1, 1, 0.1, 0.1, 0, 0, 0, "a", false, "a", "a", 0, %d, 123 + 2000 FROM default.one_row """, big_number) row = connection.execute(table.select()).fetchone() self.assertEqual(row.hive_date, datetime.date(1970, 1, 1)) self.assertEqual(row.hive_decimal, decimal.Decimal(big_number)) self.assertEqual(row.hive_timestamp, datetime.datetime(1970, 1, 1, 0, 0, 2, 123)) table.drop()
def test_lots_of_types(self, engine, connection): # Presto doesn't have raw CREATE TABLE support, so we ony test hive # take type list from sqlalchemy.types types = [ 'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT', 'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB', 'BOOLEAN', 'SMALLINT', 'DATE', 'TIME', 'String', 'Integer', 'SmallInteger', 'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'Binary', 'Boolean', 'Unicode', 'UnicodeText', ] cols = [] for i, t in enumerate(types): cols.append(Column(str(i), getattr(sqlalchemy.types, t))) cols.append(Column('hive_date', HiveDate)) cols.append(Column('hive_decimal', HiveDecimal)) cols.append(Column('hive_timestamp', HiveTimestamp)) table = Table('test_table', MetaData(bind=engine), *cols, schema='pyhive_test_database') table.drop(checkfirst=True) table.create() connection.execute('SET mapred.job.tracker=local') connection.execute('USE pyhive_test_database') big_number = 10 ** 10 - 1 connection.execute(""" INSERT OVERWRITE TABLE test_table SELECT 1, "a", "a", "a", "a", "a", 0.1, 0.1, 0.1, 0, 0, "a", "a", false, 1, 0, 0, "a", 1, 1, 0.1, 0.1, 0, 0, 0, "a", false, "a", "a", 0, %d, 123 + 2000 FROM default.one_row """, big_number) row = connection.execute(table.select()).fetchone() self.assertEqual(row.hive_date, datetime.date(1970, 1, 1)) self.assertEqual(row.hive_decimal, decimal.Decimal(big_number)) self.assertEqual(row.hive_timestamp, datetime.datetime(1970, 1, 1, 0, 0, 2, 123)) table.drop()
def upgrade(migrate_engine): meta = MetaData() meta.bind = migrate_engine tables = [define_task_info_table(meta)] create_tables(tables) tasks_table = Table('tasks', meta, autoload=True) task_info_table = Table('task_info', meta, autoload=True) tasks = tasks_table.select().execute().fetchall() for task in tasks: values = { 'task_id': task.id, 'input': task.input, 'result': task.result, 'message': task.message, } task_info_table.insert(values=values).execute() for col_name in TASKS_MIGRATE_COLUMNS: tasks_table.columns[col_name].drop()
def check_ckan_login(request): """ Connect to a specified CKAN database via SQLAlchemy and try to find the user that is authenticating. """ db_uri = app.config.get('CKAN_DB_URI') if db_uri is None: log.warn("No CKAN_DB_URI given, cannot authenticate!") return False if 'Authorization' in request.headers: apikey = request.headers.get('Authorization') engine = create_engine(db_uri, poolclass=NullPool) meta = MetaData() meta.bind = engine table = Table('user', meta, autoload=True) results = engine.execute(table.select(table.c.apikey==apikey)) # TODO: check for multiple matches, never trust ckan. record = results.first() if record is not None: return record['name'] raise WebstoreException('Invalid apikey!', None, state='error', code=401) return None
def upgrade(migrate_engine): meta = MetaData() meta.bind = migrate_engine tables = [define_task_info_table(meta)] create_tables(tables) tasks_table = Table('tasks', meta, autoload=True) task_info_table = Table('task_info', meta, autoload=True) tasks = tasks_table.select().execute().fetchall() for task in tasks: values = { 'task_id': task.id, 'input': task.input, 'result': task.result, 'message': task.message, } task_info_table.insert(values=values).execute() for col_name in TASKS_MIGRATE_COLUMNS: tasks_table.columns[col_name].drop()
def check_ckan_login(request): """ Connect to a specified CKAN database via SQLAlchemy and try to find the user that is authenticating. """ db_uri = app.config.get("CKAN_DB_URI") if db_uri is None: log.warn("No CKAN_DB_URI given, cannot authenticate!") return False if "Authorization" in request.headers: authorization = request.headers.get("Authorization") authorization = authorization.split(" ", 1)[-1] user, password = authorization.decode("base64").split(":", 1) engine = create_engine(db_uri, poolclass=NullPool) meta = MetaData() meta.bind = engine table = Table("user", meta, autoload=True) results = engine.execute(table.select(table.c.name == user)) # TODO: check for multiple matches, never trust ckan. record = results.first() if record is not None and check_hashed_password(password, record["password"]): return user raise WebstoreException("Invalid username or password!", None, state="error", code=401) return None
def check_ckan_login(request): """ Connect to a specified CKAN database via SQLAlchemy and try to find the user that is authenticating. """ db_uri = app.config.get('CKAN_DB_URI') if db_uri is None: log.warn("No CKAN_DB_URI given, cannot authenticate!") return False if 'Authorization' in request.headers: apikey = request.headers.get('Authorization') engine = create_engine(db_uri, poolclass=NullPool) meta = MetaData() meta.bind = engine table = Table('user', meta, autoload=True) results = engine.execute(table.select(table.c.apikey == apikey)) # TODO: check for multiple matches, never trust ckan. record = results.first() if record is not None: return record['name'] raise WebstoreException('Invalid apikey!', None, state='error', code=401) return None
def test_one_row_complex_null(self, engine, connection): one_row_complex_null = Table('one_row_complex_null', MetaData(bind=engine), autoload=True) rows = one_row_complex_null.select().execute().fetchall() self.assertEqual(len(rows), 1) self.assertEqual(list(rows[0]), [None] * len(rows[0]))
class BaseTestCase(unittest.TestCase): def setUp(self): engine = create_engine('postgresql://postgres@localhost/pypet') self.metadata = MetaData(bind=engine) self.store_table = Table('store', self.metadata, Column('store_id', types.Integer, primary_key=True), Column('store_name', types.String), Column('country_id', types.Integer, ForeignKey('country.country_id'))) self.country_table = Table('country', self.metadata, Column('country_id', types.Integer, primary_key=True), Column('country_name', types.String), Column('region_id', types.Integer, ForeignKey('region.region_id'))) self.region_table = Table('region', self.metadata, Column('region_id', types.Integer, primary_key=True), Column('region_name', types.String)) self.product_table = Table('product', self.metadata, Column('product_id', types.Integer, primary_key=True), Column('product_name', types.String), Column('product_category_id', types.Integer, ForeignKey('product_category.product_category_id'))) self.product_category_table = Table('product_category', self.metadata, Column('product_category_id', types.Integer, primary_key=True), Column('product_category_name', types.String)) self.facts_table = Table('facts_table', self.metadata, Column('store_id', types.Integer, ForeignKey('store.store_id')), Column('date', types.Date), Column('product_id', types.Integer, ForeignKey('product.product_id')), Column('price', types.Float), Column('qty', types.Integer)) agg_name = ('agg_time_month_product_product_store_store' '_Unit Price_Quantity') self.agg_by_month_table = Table(agg_name, self.metadata, Column('store_store', types.Integer, ForeignKey('store.store_id')), Column('time_month', types.Date), Column('product_product', types.Integer, ForeignKey('product.product_id')), Column('Unit Price', types.Float), Column('Quantity', types.Integer), Column('fact_count', types.Integer)) agg_name = ('agg_time_year_store_country_product_product' '_Unit Price_Quantity') self.agg_by_year_country_table = Table(agg_name, self.metadata, Column('store_country', types.Integer, ForeignKey('country.country_id')), Column('time_year', types.Date), Column('product_product', types.Integer, ForeignKey('product.product_id')), Column('Unit Price', types.Float), Column('Quantity', types.Integer), Column('fact_count', types.Integer)) self.metadata.create_all() self.store_dim = Dimension('store', [ Hierarchy('default', [ Level('region', self.region_table.c.region_id, self.region_table.c.region_name), Level('country', self.country_table.c.country_id, self.country_table.c.country_name), Level('store', self.store_table.c.store_id, self.store_table.c.store_name)])]) self.product_dim = Dimension('product', [ Hierarchy('default', [ Level('category', self.product_category_table.c.product_category_id, self.product_category_table.c .product_category_name), Level('product', self.product_table.c.product_id, self.product_table.c.product_name)])]) self.time_dim = TimeDimension('time', self.facts_table.c.date, ['year', 'month', 'day']) unit_price = Measure('Unit Price', self.facts_table.c.price, aggregates.avg) quantity = Measure('Quantity', self.facts_table.c.qty, aggregates.sum) price = ((unit_price.aggregate_with(None) * quantity.aggregate_with(None)) .aggregate_with(aggregates.sum).label('Price')) self.cube = Cube(self.metadata, self.facts_table, [self.store_dim, self.product_dim, self.time_dim], [unit_price, quantity, price], fact_count_column=self.facts_table.c.qty) self.region_table.insert({'region_id': 1, 'region_name': 'Europe'}).execute() self.country_table.insert({'region_id': 1, 'country_name': 'France', 'country_id': 1}).execute() self.country_table.insert({'region_id': 1, 'country_name': 'Germany', 'country_id': 2}).execute() self.region_table.insert({'region_id': 2, 'region_name': 'America'}).execute() self.country_table.insert({'region_id': 2, 'country_name': 'USA', 'country_id': 3}).execute() self.country_table.insert({'region_id': 2, 'country_name': 'Canada', 'country_id': 4}).execute() self.store_table.insert({ 'store_id': 1, 'store_name': 'ACME.fr', 'country_id': 1}).execute() self.store_table.insert({ 'store_id': 2, 'store_name': 'ACME.de', 'country_id': 2}).execute() self.store_table.insert({ 'store_id': 3, 'store_name': 'Food Mart.fr', 'country_id': 1}).execute() self.store_table.insert({ 'store_id': 4, 'store_name': 'Food Mart.de', 'country_id': 2}).execute() self.store_table.insert({ 'store_id': 5, 'store_name': 'ACME.us', 'country_id': 3}).execute() self.store_table.insert({ 'store_id': 6, 'store_name': 'Food Mart.us', 'country_id': 3}).execute() self.store_table.insert({ 'store_id': 7, 'store_name': 'ACME.ca', 'country_id': 4}).execute() self.store_table.insert({ 'store_id': 8, 'store_name': 'Food Mart.ca', 'country_id': 4}).execute() self.product_category_table.insert({ 'product_category_id': 1, 'product_category_name': 'Vegetables'}).execute() self.product_category_table.insert({ 'product_category_id': 2, 'product_category_name': 'Shoes'}).execute() self.product_table.insert({ 'product_id': 1, 'product_category_id': 1, 'product_name': 'Carrots'}).execute() self.product_table.insert({ 'product_id': 2, 'product_category_id': 1, 'product_name': 'Bananas'}).execute() self.product_table.insert({ 'product_id': 3, 'product_category_id': 2, 'product_name': 'Red shoes'}).execute() self.product_table.insert({ 'product_id': 4, 'product_category_id': 2, 'product_name': 'Green shoes'}).execute() self.product_table.insert({ 'product_id': 5, 'product_category_id': 2, 'product_name': 'Blue shoes'}).execute() years = cycle([2009, 2010, 2011]) months = cycle([1, 5, 8, 9, 11]) days = cycle([3, 12, 21, 29]) prices = iter(cycle([100, 500, 1000])) quantities = iter(cycle([1, 5, 1, 2, 3, 20, 8])) values = iter((date(*value) for value in izip(years, months, days))) for value in self.product_table.select().with_only_columns([ self.product_table.c.product_id, self.store_table.c.store_id]).execute(): self.facts_table.insert({ 'product_id': value.product_id, 'store_id': value.store_id, 'date': next(values), 'qty': next(quantities), 'price': next(prices)}).execute() results = (self.facts_table.select().with_only_columns([ (func.sum(self.facts_table.c.price * self.facts_table.c.qty) / func.sum(self.facts_table.c.qty)) .label('Unit Price'), func.sum(self.facts_table.c.qty).label('Quantity'), func.sum(self.facts_table.c.qty).label('fact_count'), self.facts_table.c.product_id.label('product_product'), self.facts_table.c.store_id.label('store_store'), func.date_trunc('month', self.facts_table.c.date).label('time_month')]) .group_by(func.date_trunc('month', self.facts_table.c.date), self.facts_table.c.product_id, self.facts_table.c.store_id) .execute()) for res in results: self.agg_by_month_table.insert().execute(dict(res)) second_agg = (self.facts_table.select().with_only_columns([ (func.sum(self.facts_table.c.price * self.facts_table.c.qty) / func.sum(self.facts_table.c.qty)) .label('Unit Price'), func.sum(self.facts_table.c.qty).label('Quantity'), func.sum(self.facts_table.c.qty).label('fact_count'), self.facts_table.c.product_id.label('product_product'), self.store_table.c.country_id.label('store_country'), func.date_trunc('year', self.facts_table.c.date).label('time_year')]) .where(self.facts_table.c.store_id == self.store_table.c.store_id) .group_by(self.facts_table.c.product_id.label('product_product'), self.store_table.c.country_id.label('store_country'), func.date_trunc('year', self.facts_table.c.date).label('time_year')) .execute()) for res in second_agg: self.agg_by_year_country_table.insert().execute(dict(res)) def tearDown(self): self.metadata.drop_all()
def beforOutLog(self, logLevel, farmNo, componentNo, componentName, instanceNo, instanceName, code, additions): farmT = Table("FARM", METADATA, autoload=True) farm = self.selectOne(farmT.select(farmT.c.FARM_NO == farmNo)) farmName = None if farm is not None: farmName = farm["FARM_NAME"] userT = Table("USER", METADATA, autoload=True) user = self.selectOne(userT.select(userT.c.USER_NO == self.userNo)) userName = None if user is not None: userName = user["USERNAME"] instanceType = None platformNo = None if instanceNo is not None: instanceT = Table("INSTANCE", METADATA, autoload=True) instance = self.selectOne( instanceT.select(instanceT.c.INSTANCE_NO == instanceNo)) if instance is not None: platformNo = instance["PLATFORM_NO"] isSelect = True if isSelect: awsinstanceT = Table("AWS_INSTANCE", METADATA, autoload=True) awsinstance = self.selectOne( awsinstanceT.select( awsinstanceT.c.INSTANCE_NO == instanceNo)) if awsinstance is not None: instanceType = awsinstance["INSTANCE_TYPE"] isSelect = False if isSelect: csinstanceT = Table("CLOUDSTACK_INSTANCE", METADATA, autoload=True) csinstance = self.selectOne( csinstanceT.select( csinstanceT.c.INSTANCE_NO == instanceNo)) if csinstance is not None: instanceType = csinstance["INSTANCE_TYPE"] isSelect = False if isSelect: vminstanceT = Table("VMWARE_INSTANCE", METADATA, autoload=True) vminstance = self.selectOne( vminstanceT.select( vminstanceT.c.INSTANCE_NO == instanceNo)) if vminstance is not None: instanceType = vminstance["INSTANCE_TYPE"] isSelect = False if isSelect: nifinstanceT = Table("NIFTY_INSTANCE", METADATA, autoload=True) nifinstance = self.selectOne( nifinstanceT.select( nifinstanceT.c.INSTANCE_NO == instanceNo)) if nifinstance is not None: instanceType = nifinstance["INSTANCE_TYPE"] isSelect = False self.outLog(logLevel, self.userNo, userName, farmNo, farmName, componentNo, componentName, instanceNo, instanceName, code, instanceType, platformNo, additions)
class AlchemySqlStore(Model): """ datastore using SQLAlchemy meta-SQL Python package create table vesper_stmts ( subject UNIQUE predicate UNIQUE object UNIQUE objecttype UNIQUE context UNIQUE ) """ def __init__(self, source=None, defaultStatements=None, autocommit=False, **kw): if source is None: # this seems like a reasonable default thing to do source = "sqlite://" log.debug("SQLite in-memory database being opened") # We take source to be a SQLAlchemy-style dbapi spec: # dialect+driver://username:password@host:port/database # connection is made JIT on first connect() log.debug("sqla engine being created with:", source) self.engine = create_engine(source) self.md = sqlalchemy.schema.MetaData() # utterly insufficient datatypes. just for first pass # technically the keep_existing bool is redundant as create_all() default is "check first" self.vesper_stmts = Table( "vesper_stmts", self.md, Column("subject", String(255)), # primary_key = True), Column("predicate", String(255)), # primary_key = True), Column("object", String(255)), # primary_key = True), Column("objecttype", String(8)), Column("context", String(8)), UniqueConstraint("subject", "predicate", "object", "objecttype", "context"), keep_existing=True, ) Index("idx_vs", self.vesper_stmts.c.subject, self.vesper_stmts.c.predicate, self.vesper_stmts.c.object) self.md.create_all(self.engine) # Set up our state machine and grab a connection from the sqlalchemy pool self.conn = self.engine.connect() self.trans = None self.autocommit = autocommit def _checkConnection(self): if self.conn is None: self.conn = self.engine.connect() if self.autocommit is False: if not self.conn.in_transaction(): self.trans = self.conn.begin() self.conn.execution_options(autocommit=self.autocommit) def getStatements( self, subject=None, predicate=None, object=None, objecttype=None, context=None, asQuad=True, hints=None ): """ Return all the statements in the model that match the given arguments. Any combination of subject and predicate can be None, and any None slot is treated as a wildcard that matches any value in the model. """ fs = subject is not None fp = predicate is not None fo = object is not None fot = objecttype is not None fc = context is not None hints = hints or {} limit = hints.get("limit") offset = hints.get("offset") log.debug("s p o ot c quad lim offset: ", fs, fp, fo, fot, fc, asQuad, limit, offset) if fo: if isinstance(object, ResourceUri): object = object.uri fot = True objecttype = OBJECT_TYPE_RESOURCE elif not fot: objecttype = OBJECT_TYPE_LITERAL if not asQuad and not fc: query = select( [ self.vesper_stmts.c.subject, self.vesper_stmts.c.predicate, self.vesper_stmts.c.object, self.vesper_stmts.c.objecttype, func.min(self.vesper_stmts.c.context).label("context"), ] ) else: # asQuad is True query = self.vesper_stmts.select() if fs: query = query.where(self.vesper_stmts.c.subject == subject) if fp: query = query.where(self.vesper_stmts.c.predicate == predicate) if fo: query = query.where(self.vesper_stmts.c.object == object) if fot: query = query.where(self.vesper_stmts.c.objecttype == objecttype) if fc: query = query.where(self.vesper_stmts.c.context == context) if not asQuad and not fc: query = query.group_by( self.vesper_stmts.c.subject, self.vesper_stmts.c.predicate, self.vesper_stmts.c.object, self.vesper_stmts.c.objecttype, ) if limit is not None: query = query.limit(limit) if offset is not None: query = query.offset(offset) stmts = [] self._checkConnection() result = self.conn.execute(query) for r in result: stmts.append(Statement(r["subject"], r["predicate"], r["object"], r["objecttype"], r["context"])) log.debug("stmts returned: ", len(stmts), stmts) return stmts def addStatement(self, stmt): """add the specified statement to the model""" log.debug("addStatement called with ", stmt) self._checkConnection() result = self.conn.execute( self.vesper_stmts.insert(prefixes=["OR IGNORE"]), {"subject": stmt[0], "predicate": stmt[1], "object": stmt[2], "objecttype": stmt[3], "context": stmt[4]}, ) return result.rowcount def addStatements(self, stmts): """adds multiple statements to the model""" log.debug("addStatement called with ", stmts) self._checkConnection() result = self.conn.execute( self.vesper_stmts.insert(prefixes=["OR IGNORE"]), [ {"subject": stmt[0], "predicate": stmt[1], "object": stmt[2], "objecttype": stmt[3], "context": stmt[4]} for stmt in stmts ], ) return result.rowcount def removeStatement(self, stmt): """removes the statement from the model""" log.debug("removeStatement called with: ", stmt) rmv = self.vesper_stmts.delete().where( (self.vesper_stmts.c.subject == stmt[0]) & (self.vesper_stmts.c.predicate == stmt[1]) & (self.vesper_stmts.c.object == stmt[2]) & (self.vesper_stmts.c.objecttype == stmt[3]) & (self.vesper_stmts.c.context == stmt[4]) ) self._checkConnection() result = self.conn.execute(rmv) return result.rowcount def removeStatements(self, stmts): """removes multiple statements from the model""" log.debug("removeStatements called with: ", stmts) wc = [] [ wc.append( (self.vesper_stmts.c.subject == stmt[0]) & (self.vesper_stmts.c.predicate == stmt[1]) & (self.vesper_stmts.c.object == stmt[2]) & (self.vesper_stmts.c.objecttype == stmt[3]) & (self.vesper_stmts.c.context == stmt[4]) ) for stmt in stmts ] # no protection for singleton stmt here! rmv = self.vesper_stmts.delete().where(or_(*wc)) self._checkConnection() result = self.conn.execute(rmv) return result.rowcount def commit(self, **kw): if self.conn is not None: if self.conn.in_transaction(): self.trans.commit() def rollback(self): if self.conn is not None: if self.conn.in_transaction(): self.trans.rollback() def close(self): log.debug("closing!") if self.conn is not None: self.conn.close() self.conn = None
class Table(object): def __init__(self, db, schema, table, columns=None): self.db = db self.schema = schema self.name = table self.engine = create_engine(db.url) self.metadata = MetaData(schema=schema) self.metadata.bind = self.engine # http://docs.sqlalchemy.org/en/rel_1_0/core/metadata.html # if provided columns (SQLAlchemy columns), create the table if table: if columns: self.table = SQLATable( table, self.metadata, schema=self.schema, *columns ) self.table.create() # otherwise just load from db else: self.table = SQLATable( table, self.metadata, schema=self.schema, autoload=True ) self.indexes = dict((i.name, i) for i in self.table.indexes) self._is_dropped = False else: self._is_dropped = True self.table = None @property def _normalized_columns(self): return list(map(normalize_column_name, self.columns)) @property def columns(self): """Return list of all columns in table """ return list(self.table.columns.keys()) @property def sqla_columns(self): """Return all columns in table as sqlalchemy column types """ return self.table.columns @property def column_types(self): """Return a dict mapping column name to type for all columns in table """ column_types = {} for c in self.sqla_columns: column_types[c.name] = c.type return column_types @property def primary_key(self): """Return a list of columns making up the primary key constraint """ return [c.name for c in self.table.primary_key] @property def op(self): ctx = MigrationContext.configure(self.engine.connect()) return Operations(ctx) def _valid_table_name(self, table_name): """Check if the table name is obviously invalid. """ if table_name is None or not len(table_name.strip()): raise ValueError("Invalid table name: %r" % table_name) return table_name.strip() def _update_table(self, table_name): self.metadata = MetaData(schema=self.schema) self.metadata.bind = self.engine return SQLATable(table_name, self.metadata, schema=self.schema) def add_primary_key(self, column="id"): """Add primary key constraint to specified column """ if not self.primary_key: sql = """ALTER TABLE {s}.{t} ADD PRIMARY KEY ({c}) """.format( s=self.schema, t=self.name, c=column ) self.db.execute(sql) def drop(self): """Drop the table from the database """ if self._is_dropped is False: self.table.drop(self.engine) self._is_dropped = True def _check_dropped(self): if self._is_dropped: raise DatasetException( "the table has been dropped. this object should not be used again." ) def _args_to_clause(self, args): clauses = [] for k, v in args.items(): if isinstance(v, (list, tuple)): clauses.append(self.table.c[k].in_(v)) else: clauses.append(self.table.c[k] == v) return and_(*clauses) def create_column(self, name, type): """ Explicitely create a new column ``name`` of a specified type. ``type`` must be a `SQLAlchemy column type <http://docs.sqlalchemy.org/en/rel_0_8/core/types.html>`_. :: table.create_column('created_at', sqlalchemy.DateTime) """ self._check_dropped() if normalize_column_name(name) not in self._normalized_columns: self.op.add_column(self.table.name, Column(name, type), self.table.schema) self.table = self._update_table(self.table.name) def drop_column(self, name): """ Drop the column ``name`` :: table.drop_column('created_at') """ self._check_dropped() if name in list(self.table.columns.keys()): self.op.drop_column(self.table.name, name, schema=self.schema) self.table = self._update_table(self.table.name) def create_index(self, columns, name=None, index_type="btree"): """ Create an index to speed up queries on a table. If no ``name`` is given a random name is created. :: table.create_index(['name', 'country']) """ self._check_dropped() if not name: sig = "||".join(columns + [index_type]) # This is a work-around for a bug in <=0.6.1 which would create # indexes based on hash() rather than a proper hash. key = abs(hash(sig)) name = "ix_%s_%s" % (self.table.name, key) if name in self.indexes: return self.indexes[name] key = sha1(sig.encode("utf-8")).hexdigest()[:16] name = "ix_%s_%s" % (self.table.name, key) if name in self.indexes: return self.indexes[name] # self.db._acquire() columns = [self.table.c[col] for col in columns] idx = Index(name, *columns, postgresql_using=index_type) idx.create(self.engine) # finally: # self.db._release() self.indexes[name] = idx return idx def create_index_geom(self, column="geom"): """Shortcut to create index on geometry """ self.create_index([column], index_type="gist") def distinct(self, *columns, **_filter): """ Returns all rows of a table, but removes rows in with duplicate values in ``columns``. Interally this creates a `DISTINCT statement <http://www.w3schools.com/sql/sql_distinct.asp>`_. :: # returns only one row per year, ignoring the rest table.distinct('year') # works with multiple columns, too table.distinct('year', 'country') # you can also combine this with a filter table.distinct('year', country='China') """ self._check_dropped() qargs = [] try: columns = [self.table.c[c] for c in columns] for col, val in _filter.items(): qargs.append(self.table.c[col] == val) except KeyError: return [] q = expression.select( columns, distinct=True, whereclause=and_(*qargs), order_by=[c.asc() for c in columns], ) # if just looking at one column, return a simple list if len(columns) == 1: return itertools.chain.from_iterable(self.engine.execute(q)) # otherwise return specified row_type else: return ResultIter(self.engine.execute(q), row_type=self.db.row_type) def insert(self, row): """ Add a row (type: dict) by inserting it into the table. Columns must exist. :: data = dict(title='I am a banana!') table.insert(data) Returns the inserted row's primary key. """ self._check_dropped() res = self.engine.execute(self.table.insert(row)) if len(res.inserted_primary_key) > 0: return res.inserted_primary_key[0] def insert_many(self, rows, chunk_size=1000): """ Add many rows at a time, which is significantly faster than adding them one by one. Per default the rows are processed in chunks of 1000 per commit, unless you specify a different ``chunk_size``. See :py:meth:`insert() <dataset.Table.insert>` for details on the other parameters. :: rows = [dict(name='Dolly')] * 10000 table.insert_many(rows) """ def _process_chunk(chunk): self.table.insert().execute(chunk) self._check_dropped() chunk = [] for i, row in enumerate(rows, start=1): chunk.append(row) if i % chunk_size == 0: _process_chunk(chunk) chunk = [] if chunk: _process_chunk(chunk) def rename(self, name): """Rename the table """ sql = """ALTER TABLE {s}.{t} RENAME TO {name} """.format( s=self.schema, t=self.name, name=name ) self.engine.execute(sql) self.table = SQLATable(name, self.metadata, schema=self.schema, autoload=True) def find_one(self, **kwargs): """ Works just like :py:meth:`find() <dataset.Table.find>` but returns one result, or None. :: row = table.find_one(country='United States') """ kwargs["_limit"] = 1 iterator = self.find(**kwargs) try: return next(iterator) except StopIteration: return None def _args_to_order_by(self, order_by): if order_by[0] == "-": return self.table.c[order_by[1:]].desc() else: return self.table.c[order_by].asc() def find( self, _limit=None, _offset=0, _step=5000, order_by="id", return_count=False, **_filter ): """ Performs a simple search on the table. Simply pass keyword arguments as ``filter``. :: results = table.find(country='France') results = table.find(country='France', year=1980) Using ``_limit``:: # just return the first 10 rows results = table.find(country='France', _limit=10) You can sort the results by single or multiple columns. Append a minus sign to the column name for descending order:: # sort results by a column 'year' results = table.find(country='France', order_by='year') # return all rows sorted by multiple columns (by year in descending order) results = table.find(order_by=['country', '-year']) By default :py:meth:`find() <dataset.Table.find>` will break the query into chunks of ``_step`` rows to prevent huge tables from being loaded into memory at once. For more complex queries, please use :py:meth:`db.query()` instead.""" self._check_dropped() if not isinstance(order_by, (list, tuple)): order_by = [order_by] order_by = [ o for o in order_by if (o.startswith("-") and o[1:] or o) in self.table.columns ] order_by = [self._args_to_order_by(o) for o in order_by] args = self._args_to_clause(_filter) # query total number of rows first count_query = alias( self.table.select(whereclause=args, limit=_limit, offset=_offset), name="count_query_alias", ).count() rp = self.engine.execute(count_query) total_row_count = rp.fetchone()[0] if return_count: return total_row_count if _limit is None: _limit = total_row_count if _step is None or _step is False or _step == 0: _step = total_row_count if total_row_count > _step and not order_by: _step = total_row_count log.warn( "query cannot be broken into smaller sections because it is unordered" ) queries = [] for i in count(): qoffset = _offset + (_step * i) qlimit = min(_limit - (_step * i), _step) if qlimit <= 0: break queries.append( self.table.select( whereclause=args, limit=qlimit, offset=qoffset, order_by=order_by ) ) return ResultIter( (self.engine.execute(q) for q in queries), row_type=self.db.row_type ) def count(self, **_filter): """ Return the count of results for the given filter set (same filter options as with ``find()``). """ return self.find(return_count=True, **_filter) def __getitem__(self, item): """ This is an alias for distinct which allows the table to be queried as using square bracket syntax. :: # Same as distinct: print list(table['year']) """ if not isinstance(item, tuple): item = (item,) return self.distinct(*item) def all(self): """ Returns all rows of the table as simple dictionaries. This is simply a shortcut to *find()* called with no arguments. :: rows = table.all()""" return self.find() def __iter__(self): """ Allows for iterating over all rows in the table without explicetly calling :py:meth:`all() <dataset.Table.all>`. :: for row in table: print(row) """ return self.all() def __repr__(self): return "<Table(%s)>" % self.table.name
class SQLTable(Component): _selects = 0 _inserts = 0 _updates = 0 _finalized = False STORE_MODE_LOOKUP = "lookup" STORE_MODE_INSERT = "insert" STORE_MODE_UPSERT = "upsert" _pk = False columns = [] create = True _unicode_errors = 0 _lookup_changed_fields = None def __init__(self, name, connection, columns, label=None): super(SQLTable, self).__init__() self.sa_table = None self.sa_metadata = None self.name = name self.connection = connection self.label = label if label else name self.columns = columns or [] for col in columns: col.sqltable = self def _get_sa_type(self, column): if (column.type == "Integer"): return Integer elif (column.type == "String"): #if (column.length is None): column.length = 128 return Unicode(length=128) elif (column.type == "Float"): return Float elif (column.type == "Boolean"): return Boolean elif (column.type == "AutoIncrement"): return Integer elif (column.type == "Date"): return Date elif (column.type == "Time"): return Time elif (column.type == "DateTime"): return DateTime elif (column.type == "Binary"): return Binary else: raise Exception("Invalid data type (%s): %s" % (column, column.type)) def finalize(self, ctx): if (not SQLTable._finalized): SQLTable._finalized = True if (SQLTable._inserts + SQLTable._selects > 0): logger.info( "SQLTable Totals ins/upd/sel: %d/%d/%d " % (SQLTable._inserts, SQLTable._updates, SQLTable._selects)) if (self._inserts + self._selects > 0): logger.info( "SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " % (self.name, self._inserts, self._updates, self._selects)) if (self._unicode_errors > 0): logger.warning( "SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" % (self.name, self._unicode_errors)) ctx.comp.finalize(self.connection) super(SQLTable, self).finalize(ctx) def initialize(self, ctx): super(SQLTable, self).initialize(ctx) if self._lookup_changed_fields == None: self._lookup_changed_fields = [] ctx.comp.initialize(self.connection) logger.debug("Loading table %s on %s" % (self.name, self)) self.sa_metadata = MetaData() self.sa_table = Table(self.name, self.sa_metadata) self._selects = 0 self._inserts = 0 self._updates = 0 self._unicode_errors = 0 # Drop? columns_ex = [] for column in self.columns: logger.debug("Adding column to %s: %s" % (self, column)) column.sqltable = self # Check for duplicate names if (column.name in columns_ex): raise ETLConfigurationException( "Duplicate column name '%s' in %s" % (column.name, self)) columns_ex.append(column.name) # Configure column if isinstance(column, SQLColumnFK): if column.fk_sqlcolumn.sqltable.sa_table is None: logger.warning( "Column %s foreign key %s table (%s) has not been defined in backend (ignoring).", column, column.fk_sqlcolumn, column.fk_sqlcolumn.sqltable) continue self.sa_table.append_column( Column(column.name, self._get_sa_type(column), ForeignKey( column.fk_sqlcolumn.sqltable.sa_table.columns[ column.fk_sqlcolumn.name]), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) else: self.sa_table.append_column( Column(column.name, self._get_sa_type(column), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) # Check schema: # Create if doesn't exist if (not self.connection.engine().has_table(self.name)): logger.info("Creating table %s" % self.name) self.sa_table.create(self.connection.connection()) # TODO:? Extend? (unsafe, allow read-only connections and make them default?) # TODO:? Delete columns (unsafe, allow read-only connections and make them default?) def pk(self, ctx): """ Returns the primary key column definitToClauion, or None if none defined. """ #if (self._pk == False): if True: pk_cols = [] for col in self.columns: if col.pk: pk_cols.append(col) if (len(pk_cols) > 1): raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols)) elif (len(pk_cols) == 1): self._pk = pk_cols[0] else: self._pk = None return self._pk def _attribsToClause(self, attribs): clauses = [] for k, v in attribs.items(): if isinstance(v, (list, tuple)): clauses.append(self.sa_table.c[k].in_(v)) else: clauses.append(self.sa_table.c[k] == v) return and_(*clauses) def _rowtodict(self, row): d = {} for column in self.columns: #print column d[column.name] = getattr(row, column.name) return d def _find(self, ctx, attribs): self._selects = self._selects + 1 SQLTable._selects = SQLTable._selects + 1 query = self.sa_table.select(self._attribsToClause(attribs)) rows = self.connection.connection().execute(query) for r in rows: # Ensure we return dicts, not RowProxys from SqlAlchemy yield self._rowtodict(r) def lookup(self, ctx, attribs, find_function=None): logger.debug("Lookup on '%s' attribs: %s" % (self, attribs)) if (len(attribs.keys()) == 0): raise Exception( "Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name) find_function = find_function or self._find rows = find_function(ctx, attribs) rows = list(rows) if (len(rows) > 1): raise Exception( "Found more than one row when searching for just one in table %s: %s" % (self.name, attribs)) elif (len(rows) == 1): row = rows[0] else: row = None logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row)) return row def upsert(self, ctx, data, keys=[]): """ Upsert checks if the row exists and has changed. It does a lookup followed by an update or insert as appropriate. """ # TODO: Check for AutoIncrement in keys, shall not be used # If keys qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception( "Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] # Do lookup if len(qfilter) > 0: row = self.lookup(ctx, qfilter) if (row): # Check row is identical for c in self.columns: if c.type != "AutoIncrement": v1 = row[c.name] v2 = data[c.name] if c.type == "Date": v1 = row[c.name].strftime('%Y-%m-%d') v2 = data[c.name].strftime('%Y-%m-%d') if (isinstance(v1, str) or isinstance(v2, str)): if (not isinstance(v1, str)): v1 = str(v1) if (not isinstance(v2, str)): v2 = str(v2) if (v1 != v2): if (c.name not in self._lookup_changed_fields): logger.warning( "%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c.name, v1, v2)) #self._lookup_changed_fields.append(c["name"]) # Update the row row = self.update(ctx, data, keys) return row row_with_id = self.insert(ctx, data) return row_with_id def _prepare_row(self, ctx, data): row = {} for column in self.columns: if column.type != "AutoIncrement": try: row[column.name] = data[column.name] except KeyError as e: raise Exception( "Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data)) # Checks if (column.type == "String") and (not isinstance( row[column.name], str)): self._unicode_errors = self._unicode_errors + 1 if (ctx.debug): logger.warning( "Unicode column %r received non-unicode string: %r " % (column.name, row[column.name])) return row def insert(self, ctx, data): row = self._prepare_row(ctx, data) logger.debug("Inserting in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute(self.sa_table.insert(row)) pk = self.pk(ctx) if pk: row[pk.name] = res.inserted_primary_key[0] self._inserts = self._inserts + 1 SQLTable._inserts = SQLTable._inserts + 1 if pk is not None: return row else: return row # None def update(self, ctx, data, keys=[]): row = self._prepare_row(ctx, data) # Automatically calculate lookup if necessary qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception( "Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] logger.debug("Updating in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute( self.sa_table.update(self._attribsToClause(qfilter), row)) self._updates = self._updates + 1 SQLTable._updates = SQLTable._updates + 1 if pk is not None: return row else: return None
class SQLTable(Component): _selects = 0 _inserts = 0 _updates = 0 _finalized = False STORE_MODE_LOOKUP = "lookup" STORE_MODE_INSERT = "insert" STORE_MODE_UPSERT = "upsert" _pk = False name = None connection = None columns = [] create = True sa_table = None sa_metadata = None _selects = 0 _inserts = 0 _unicode_errors = 0 _lookup_changed_fields = None def __init__(self): super(SQLTable, self).__init__() self.columns = [] def _get_sa_type(self, column): if (column["type"] == "Integer"): return Integer elif (column["type"] == "String"): if (not "length" in column): column["length"] = 128 return Unicode(length = column["length"]) elif (column["type"] == "Float"): return Float elif (column["type"] == "Boolean"): return Boolean elif (column["type"] == "AutoIncrement"): return Integer elif (column["type"] == "Date"): return Date elif (column["type"] == "Time"): return Time elif (column["type"] == "DateTime"): return DateTime else: raise Exception("Invalid data type: %s" % column["type"]) def finalize(self, ctx): if (not SQLTable._finalized): SQLTable._finalized = True if (SQLTable._inserts + SQLTable._selects > 0): logger.info("SQLTable Totals ins/upd/sel: %d/%d/%d " % (SQLTable._inserts, SQLTable._updates, SQLTable._selects)) if (self._inserts + self._selects > 0): logger.info("SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " % (self.name, self._inserts, self._updates, self._selects)) if (self._unicode_errors > 0): logger.warn("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" % (self.name, self._unicode_errors)) ctx.comp.finalize(self.connection) super(SQLTable, self).finalize(ctx) def initialize(self, ctx): super(SQLTable, self).initialize(ctx) if self._lookup_changed_fields == None: self._lookup_changed_fields = [] ctx.comp.initialize(self.connection) logger.debug("Loading table %s on %s" % (self.name, self)) self.sa_metadata = MetaData() self.sa_table = Table(self.name, self.sa_metadata) # Drop? columns_ex = [] for column in self.columns: logger.debug("Adding column to %s: %s" % (self, column)) # Check for duplicate names if (column["name"] in columns_ex): raise Exception("Duplicate column name '%s' in %s" % (column["name"], self)) columns_ex.append(column["name"]) # Configure column column["pk"] = False if (not "pk" in column) else parsebool(column["pk"]) if (not "type" in column): column["type"] = "String" #if (not "value" in column): column["value"] = None self.sa_table.append_column( Column(column["name"], self._get_sa_type(column), primary_key = column["pk"], autoincrement = (True if column["type"] == "AutoIncrement" else False) )) # Check schema # Create if doesn't exist if (not self.connection.engine().has_table(self.name)): logger.info("Creating table %s" % self.name) self.sa_table.create(self.connection.connection()) # Extend? # Delete columns? def pk(self, ctx): """ Returns the primary key column definitToClauion, or None if none defined. """ if (self._pk == False): pk_cols = [] for col in self.columns: if ("pk" in col): if parsebool(col["pk"]): pk_cols.append(col) if (len(pk_cols) > 1): raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols)) elif (len(pk_cols) == 1): self._pk = pk_cols[0] else: self._pk = None return self._pk def _attribsToClause(self, attribs): clauses = [] for k, v in attribs.items(): if isinstance(v, (list, tuple)): clauses.append(self.sa_table.c[k].in_(v)) else: clauses.append(self.sa_table.c[k] == v) return and_(*clauses) def _rowtodict(self, row): d = {} for column in self.columns: #print column d[column["name"]] = getattr(row, column["name"]) return d def _find(self, ctx, attribs): self._selects = self._selects + 1 SQLTable._selects = SQLTable._selects + 1 query = self.sa_table.select(self._attribsToClause(attribs)) rows = self.connection.connection().execute(query) for r in rows: # Ensure we return dicts, not RowProxys from SqlAlchemy yield self._rowtodict(r) def lookup(self, ctx, attribs): logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs)) if (len(attribs.keys()) == 0): raise Exception("Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name) rows = self._find(ctx, attribs) rows = list(rows) if (len(rows) > 1): raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs)) elif (len(rows) == 1): row = rows[0] else: row = None logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row)) return row def upsert(self, ctx, data, keys = []): """ Upsert checks if the row exists and has changed. It does a lookup followe by an update or insert as appropriate. """ # TODO: Check for AutoIncrement in keys, shall not be used # If keys qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk["name"]] = data[pk["name"]] # Do lookup if len(qfilter) > 0: row = self.lookup(ctx, qfilter) if (row): # Check row is identical for c in self.columns: if c["type"] != "AutoIncrement": v1 = row[c['name']] v2 = data[c['name']] if c["type"] == "Date": v1 = row[c['name']].strftime('%Y-%m-%d') v2 = data[c['name']].strftime('%Y-%m-%d') if (isinstance(v1, basestring) or isinstance(v2, basestring)): if (not isinstance(v1, basestring)): v1 = str(v1) if (not isinstance(v2, basestring)): v2 = str(v2) if (v1 != v2): if (c["name"] not in self._lookup_changed_fields): logger.warn("%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c["name"], v1, v2)) #self._lookup_changed_fields.append(c["name"]) # Update the row row = self.update(ctx, data, keys) return row row_with_id = self.insert(ctx, data) return row_with_id def _prepare_row(self, ctx, data): row = {} for column in self.columns: if (column["type"] != "AutoIncrement"): try: row[column["name"]] = data[column["name"]] except KeyError, e: raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data)) # Checks if ((column["type"] == "String") and (not isinstance(row[column["name"]], unicode))): self._unicode_errors = self._unicode_errors + 1 if (ctx.debug): logger.warn("Unicode column %r received non-unicode string: %r " % (column["name"], row[column["name"]])) return row
def test_reflect_select_shared_table(engine): one_row = Table('bigquery-public-data.samples.natality', MetaData(bind=engine), autoload=True) row = one_row.select().limit(1).execute().first() assert len(row) >= 1
metadata = MetaData() #创建user表,继承metadata类 #Engine使用Schama Type创建一个特定的结构对象 testonly_tableobj = Table("testonly", metadata, Column("id", Integer, primary_key=True), Column("name", String(20)), Column('bytes', VARBINARY(20))) # color = Table("color", metadata, # Column("id", Integer, primary_key=True), # Column("name", String(20))) metadata.create_all(engine) #创建表结构 conn = engine.connect() try: # 通过 metadata 动态从数据库中获取表结构,插入数据 metadata = MetaData(engine) testonly_tableobj_autoload = Table('testonly', metadata, autoload=True) conn.execute(testonly_tableobj_autoload.insert(), { 'id': 2, "name": "koka", 'bytes': b'1234' }) logger.info('insert data successfully') sql_str = testonly_tableobj_autoload.select() table_data = conn.execute(sql_str) for content in table_data.fetchall(): logger.info('%s', content) finally: conn.close()
def test_querying_wildcard_tables(engine): table = Table('bigquery-public-data.noaa_gsod.gsod*', MetaData(bind=engine), autoload=True) rows = table.select().limit(1).execute().first() assert len(rows) > 0
class SQLTable(Component): _selects = 0 _inserts = 0 _updates = 0 _finalized = False STORE_MODE_LOOKUP = "lookup" STORE_MODE_INSERT = "insert" STORE_MODE_UPSERT = "upsert" _pk = False columns = [] create = True _unicode_errors = 0 _lookup_changed_fields = None def __init__(self, name, connection, columns, label=None): super(SQLTable, self).__init__() self.sa_table = None self.sa_metadata = None self.name = name self.connection = connection self.label = label if label else name self.columns = columns or [] for col in columns: col.sqltable = self def _get_sa_type(self, column): if (column.type == "Integer"): return Integer elif (column.type == "String"): #if (column.length is None): column.length = 128 return Unicode(length = 128) elif (column.type == "Float"): return Float elif (column.type == "Boolean"): return Boolean elif (column.type == "AutoIncrement"): return Integer elif (column.type == "Date"): return Date elif (column.type == "Time"): return Time elif (column.type == "DateTime"): return DateTime elif (column.type == "Binary"): return Binary else: raise Exception("Invalid data type (%s): %s" % (column, column.type)) def finalize(self, ctx): if (not SQLTable._finalized): SQLTable._finalized = True if (SQLTable._inserts + SQLTable._selects > 0): logger.info("SQLTable Totals ins/upd/sel: %d/%d/%d " % (SQLTable._inserts, SQLTable._updates, SQLTable._selects)) if (self._inserts + self._selects > 0): logger.info("SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " % (self.name, self._inserts, self._updates, self._selects)) if (self._unicode_errors > 0): logger.warning("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" % (self.name, self._unicode_errors)) ctx.comp.finalize(self.connection) super(SQLTable, self).finalize(ctx) def initialize(self, ctx): super(SQLTable, self).initialize(ctx) if self._lookup_changed_fields == None: self._lookup_changed_fields = [] ctx.comp.initialize(self.connection) logger.debug("Loading table %s on %s" % (self.name, self)) self.sa_metadata = MetaData() self.sa_table = Table(self.name, self.sa_metadata) self._selects = 0 self._inserts = 0 self._updates = 0 self._unicode_errors = 0 # Drop? columns_ex = [] for column in self.columns: logger.debug("Adding column to %s: %s" % (self, column)) column.sqltable = self # Check for duplicate names if (column.name in columns_ex): raise ETLConfigurationException("Duplicate column name '%s' in %s" % (column.name, self)) columns_ex.append(column.name) # Configure column if isinstance(column, SQLColumnFK): if column.fk_sqlcolumn.sqltable.sa_table is None: logger.warning("Column %s foreign key %s table (%s) has not been defined in backend (ignoring).", column, column.fk_sqlcolumn, column.fk_sqlcolumn.sqltable) continue self.sa_table.append_column(Column(column.name, self._get_sa_type(column), ForeignKey(column.fk_sqlcolumn.sqltable.sa_table.columns[column.fk_sqlcolumn.name]), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) else: self.sa_table.append_column(Column(column.name, self._get_sa_type(column), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) # Check schema: # Create if doesn't exist if (not self.connection.engine().has_table(self.name)): logger.info("Creating table %s" % self.name) self.sa_table.create(self.connection.connection()) # TODO:? Extend? (unsafe, allow read-only connections and make them default?) # TODO:? Delete columns (unsafe, allow read-only connections and make them default?) def pk(self, ctx): """ Returns the primary key column definitToClauion, or None if none defined. """ #if (self._pk == False): if True: pk_cols = [] for col in self.columns: if col.pk: pk_cols.append(col) if (len(pk_cols) > 1): raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols)) elif (len(pk_cols) == 1): self._pk = pk_cols[0] else: self._pk = None return self._pk def _attribsToClause(self, attribs): clauses = [] for k, v in attribs.items(): if isinstance(v, (list, tuple)): clauses.append(self.sa_table.c[k].in_(v)) else: clauses.append(self.sa_table.c[k] == v) return and_(*clauses) def _rowtodict(self, row): d = {} for column in self.columns: #print column d[column.name] = getattr(row, column.name) return d def _find(self, ctx, attribs): self._selects = self._selects + 1 SQLTable._selects = SQLTable._selects + 1 query = self.sa_table.select(self._attribsToClause(attribs)) rows = self.connection.connection().execute(query) for r in rows: # Ensure we return dicts, not RowProxys from SqlAlchemy yield self._rowtodict(r) def lookup(self, ctx, attribs, find_function=None): logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs)) if (len(attribs.keys()) == 0): raise Exception("Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name) find_function = find_function or self._find rows = find_function(ctx, attribs) rows = list(rows) if (len(rows) > 1): raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs)) elif (len(rows) == 1): row = rows[0] else: row = None logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row)) return row def upsert(self, ctx, data, keys = []): """ Upsert checks if the row exists and has changed. It does a lookup followed by an update or insert as appropriate. """ # TODO: Check for AutoIncrement in keys, shall not be used # If keys qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] # Do lookup if len(qfilter) > 0: row = self.lookup(ctx, qfilter) if (row): # Check row is identical for c in self.columns: if c.type != "AutoIncrement": v1 = row[c.name] v2 = data[c.name] if c.type == "Date": v1 = row[c.name].strftime('%Y-%m-%d') v2 = data[c.name].strftime('%Y-%m-%d') if (isinstance(v1, str) or isinstance(v2, str)): if (not isinstance(v1, str)): v1 = str(v1) if (not isinstance(v2, str)): v2 = str(v2) if (v1 != v2): if (c.name not in self._lookup_changed_fields): logger.warn("%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c.name, v1, v2)) #self._lookup_changed_fields.append(c["name"]) # Update the row row = self.update(ctx, data, keys) return row row_with_id = self.insert(ctx, data) return row_with_id def _prepare_row(self, ctx, data): row = {} for column in self.columns: if column.type != "AutoIncrement": try: row[column.name] = data[column.name] except KeyError as e: raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data)) # Checks if (column.type == "String") and (not isinstance(row[column.name], str)): self._unicode_errors = self._unicode_errors + 1 if (ctx.debug): logger.warn("Unicode column %r received non-unicode string: %r " % (column.name, row[column.name])) return row def insert(self, ctx, data): row = self._prepare_row(ctx, data) logger.debug("Inserting in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute(self.sa_table.insert(row)) pk = self.pk(ctx) if pk: row[pk.name] = res.inserted_primary_key[0] self._inserts = self._inserts + 1 SQLTable._inserts = SQLTable._inserts + 1 if pk is not None: return row else: return row # None def update(self, ctx, data, keys = []): row = self._prepare_row(ctx, data) # Automatically calculate lookup if necessary qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] logger.debug("Updating in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute(self.sa_table.update(self._attribsToClause(qfilter), row)) self._updates = self._updates +1 SQLTable._updates = SQLTable._updates + 1 if pk is not None: return row else: return None
class SQLTable(Component): _selects = 0 _inserts = 0 _finalized = False def __init__(self): super(SQLTable, self).__init__() self._pk = False self.name = None self.connection = None self.columns = [ ] self.create = True self.sa_table = None self.sa_metadata = None self._selects = 0 self._inserts = 0 self._unicode_errors = 0 def _get_sa_type(self, column): if (column["type"] == "Integer"): return Integer elif (column["type"] == "String"): if (not "length" in column): column["length"] = 128 return Unicode(length = column["length"]) elif (column["type"] == "Float"): return Float elif (column["type"] == "Boolean"): return Boolean elif (column["type"] == "AutoIncrement"): return Integer else: raise Exception("Invalid data type: %s" % column["type"]) def finalize(self, ctx): if (not SQLTable._finalized): SQLTable._finalized = True if (SQLTable._inserts + SQLTable._selects > 0): logger.info("SQLTable Totals inserts/selects: %d/%d " % (SQLTable._inserts, SQLTable._selects)) if (self._inserts + self._selects > 0): logger.info("SQLTable %-18s inserts/selects: %6d/%-6d " % (self.name, self._inserts, self._selects)) if (self._unicode_errors > 0): logger.warn("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" % (self.name, self._unicode_errors)) ctx.comp.finalize(self.connection) super(SQLTable, self).finalize(ctx) def initialize(self, ctx): super(SQLTable, self).initialize(ctx) ctx.comp.initialize(self.connection) logger.debug("Loading table %s on %s" % (self.name, self)) self.sa_metadata = MetaData() self.sa_table = Table(self.name, self.sa_metadata) # Drop? columns_ex = [] for column in self.columns: # Check for duplicate names if (column["name"] in columns_ex): raise Exception("Duplicate column name %s in %s" % (column["name"], self)) columns_ex.append(column["name"]) # Configure column column["pk"] = False if (not "pk" in column) else parsebool(column["pk"]) if (not "type" in column): column["type"] = "String" #if (not "value" in column): column["value"] = None logger.debug("Adding column %s" % column) self.sa_table.append_column( Column(column["name"], self._get_sa_type(column), primary_key = column["pk"], autoincrement = (True if column["type"] == "AutoIncrement" else False) )) # Check schema # Create if doesn't exist if (not self.connection.engine().has_table(self.name)): logger.info("Creating table %s" % self.name) self.sa_table.create(self.connection.connection()) # Extend? # Delete columns? def pk(self, ctx): """ Returns the primary key column definitToClauion, or None if none defined. """ if (self._pk == False): pk_cols = [] for col in self.columns: if ("pk" in col): if parsebool(col["pk"]): pk_cols.append(col) if (len(pk_cols) > 1): raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols)) elif (len(pk_cols) == 1): self._pk = pk_cols[0] else: self._pk = None return self._pk def _attribsToClause(self, attribs): clauses = [] for k, v in attribs.items(): if isinstance(v, (list, tuple)): clauses.append(self.sa_table.c[k].in_(v)) else: clauses.append(self.sa_table.c[k] == v) return and_(*clauses) def _rowtodict(self, row): d = {} for column in self.columns: d[column["name"]] = getattr(row, column["name"]) return d def _find(self, ctx, attribs): self._selects = self._selects + 1 SQLTable._selects = SQLTable._selects + 1 query = self.sa_table.select(self._attribsToClause(attribs)) rows = self.connection.connection().execute(query) for r in rows: # Ensure we return dicts, not RowProxys from SqlAlchemy yield self._rowtodict(r) def lookup(self, ctx, attribs): logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs)) if (len(attribs.keys()) == 0): raise Exception("Cannot lookup on table with no criteria (empty attribute set)") rows = self._find(ctx, attribs) rows = list(rows) if (len(rows) > 1): raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs)) elif (len(rows) == 1): row = rows[0] else: row = None logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row)) return row def upsert(self, ctx, data, keys = []): # TODO: Check for AutoIncrement in keys, shall not be used # If keys qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data)) row = self.lookup(ctx, qfilter) if (row): return row row_with_id = self.insert(ctx, data) return row_with_id def _prepare_row(self, ctx, data): row = {} for column in self.columns: if (column["type"] != "AutoIncrement"): try: row[column["name"]] = data[column["name"]] except KeyError, e: raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data)) # Checks if ((column["type"] == "String") and (not isinstance(row[column["name"]], unicode))): self._unicode_errors = self._unicode_errors + 1 if (ctx.debug): logger.warn("Unicode column %r received non-unicode string: %r " % (column["name"], row[column["name"]])) return row
def test_column_proxy(self): t = Table('t', MetaData(), Column('foo', self._add_override_factory()) ) proxied = t.select().c.foo self._assert_add_override(proxied)