def upgrade(migrate_engine): meta.bind = migrate_engine # Load the database tables servers_table = Table('servers', meta, autoload=True) pool_attrib_table = Table('pool_attributes', meta, autoload=True) # Read in all the servers to migrate to pool_attributes table servers = select( columns=[ servers_table.c.id, servers_table.c.created_at, servers_table.c.updated_at, servers_table.c.version, servers_table.c.name ] ).execute().fetchall() for server in servers: pool_attrib_table.insert().execute( id=server.id, created_at=server.created_at, updated_at=server.updated_at, version=server.version, key='name_server', value=server.name, pool_id=default_pool_id )
def downgrade(migrate_engine): meta.bind = migrate_engine # Load the pool_attributes and pool_ns_records table schema pool_attributes_table = Table('pool_attributes', meta, autoload=True) pool_ns_records_table = Table('pool_ns_records', meta, autoload=True) # Find the nameservers for the default_pool_id pool_ns_records = select( columns=[ pool_ns_records_table.c.id, pool_ns_records_table.c.created_at, pool_ns_records_table.c.updated_at, pool_ns_records_table.c.version, pool_ns_records_table.c.hostname, ] ).where(pool_attributes_table.c.pool_id == default_pool_id)\ .execute().fetchall() # Create matching entries in the new table. for pool_ns_record in pool_ns_records: pool_attributes_table.insert().execute( id=pool_ns_record.id, created_at=pool_ns_record.created_at, updated_at=pool_ns_record.updated_at, version=pool_ns_record.version, key='name_server', value=pool_ns_record.hostname, ) # Delete the pool_ns_records table from the DB pool_ns_records_table.drop()
def shovel(eng, tbl_source, tbl_destination, fields_in, fields_out=None, where_not_null=None, preserve_source=True, scalars=None): """ Attempts to move everything in tbl_source to tbl_destination. fields_out must either be None or a list with length len(fields_in). These will be used as "SELECT {field_in} AS {field_out}" to map values to differently named columns. If preserve_source is False, the table will be dropped after the transfer. If scalars is not None, must be a dictionary with key->value pairs to pass as constants for everything shoveled by this function. """ if fields_out is not None: assert len(fields_out) == len(fields_in), \ "If provided, fields_out must have the same length as fields_in." select_fields = [ column(c[0]).label(c[1]) for c in zip(fields_in, fields_out) ] insert_fields = fields_out else: select_fields = [column(c) for c in fields_in] insert_fields = fields_in if scalars is not None: assert isinstance(scalars, dict), \ "If provided, scalars must be a simple key->value dictionary." try: select_fields.extend( [column(v).label(k) for k, v in scalars.iteritems()]) insert_fields.extend([str(k) for k in scalars.keys()]) except: raise Exception( "Unable to tack on select_fields and insert_fields for scalars. Does scalars look like a simple key->value map? (Got %s)" % scalars) sel = select(select_fields).select_from( Table(tbl_source, MetaData(), autoload_with=eng)) if where_not_null is not None: assert isinstance(where_not_null, list), \ "If provided, where_not_null must be a list (got %s)." % (type(where_not_null)) assert all([x in fields_in for x in where_not_null]), \ "If provided, all items in where_not_null must be items in fields_in (got %s and %s, respectively)." % (where_not_null, fields_in) for wnn in where_not_null: sel = sel.where(column(wnn) != None) tbl_destination = Table(tbl_destination, MetaData(), autoload_with=eng) tbl_destination.insert().select_from(insert_fields, sel).execute() return True
class TestDuplicateServiceStatus(tests.TestCase): def setUp(self): super(TestDuplicateServiceStatus, self).setUp() self.engine = session.get_engine('storage:sqlalchemy') self.meta = MetaData() self.meta.bind = self.engine self.service_statuses_table = Table('service_statuses', self.meta, autoload=True) def test_success(self): fake_record = { 'id': '1', 'service_name': 'worker', 'hostname': 'localhost', 'status': 'UP', 'stats': '', 'capabilities': '', } self.service_statuses_table.insert().execute(fake_record) # Different hostname should be fine fake_record['id'] = '2' fake_record['hostname'] = 'otherhost' self.service_statuses_table.insert().execute(fake_record) # Different service_name should be fine fake_record['id'] = '3' fake_record['service_name'] = 'producer' self.service_statuses_table.insert().execute(fake_record) checks = status.Checks() self.assertEqual(upgradecheck.Code.SUCCESS, checks._duplicate_service_status().code) def test_failure(self): # Drop unique constraint so we can test error cases constraint = UniqueConstraint('service_name', 'hostname', table=self.service_statuses_table, name="unique_service_status") constraint.drop() fake_record = { 'id': '1', 'service_name': 'worker', 'hostname': 'localhost', 'status': 'UP', 'stats': '', 'capabilities': '', } self.service_statuses_table.insert().execute(fake_record) fake_record['id'] = '2' self.service_statuses_table.insert().execute(fake_record) checks = status.Checks() self.assertEqual(upgradecheck.Code.FAILURE, checks._duplicate_service_status().code)
def upgrade(migrate_engine): meta.bind = migrate_engine pools.create() pool_attributes.create() # Get the default pool_id from the config file default_pool_id = cfg.CONF['service:central'].default_pool_id # Create the default pool with hard-coded name, which can be changed # later via the api, and the default_pool_id from the config file pools_table = Table('pools', meta, autoload=True) pools_table.insert().execute(id=default_pool_id, name='default', version=1)
class TestDuplicateServiceStatus(tests.TestCase): def setUp(self): super(TestDuplicateServiceStatus, self).setUp() self.engine = session.get_engine('storage:sqlalchemy') self.meta = MetaData() self.meta.bind = self.engine self.service_statuses_table = Table('service_statuses', self.meta, autoload=True) def test_success(self): fake_record = {'id': '1', 'service_name': 'worker', 'hostname': 'localhost', 'status': 'UP', 'stats': '', 'capabilities': '', } self.service_statuses_table.insert().execute(fake_record) # Different hostname should be fine fake_record['id'] = '2' fake_record['hostname'] = 'otherhost' self.service_statuses_table.insert().execute(fake_record) # Different service_name should be fine fake_record['id'] = '3' fake_record['service_name'] = 'producer' self.service_statuses_table.insert().execute(fake_record) checks = status.Checks() self.assertEqual(upgradecheck.Code.SUCCESS, checks._duplicate_service_status().code) def test_failure(self): # Drop unique constraint so we can test error cases constraint = UniqueConstraint('service_name', 'hostname', table=self.service_statuses_table, name="unique_service_status") constraint.drop() fake_record = {'id': '1', 'service_name': 'worker', 'hostname': 'localhost', 'status': 'UP', 'stats': '', 'capabilities': '', } self.service_statuses_table.insert().execute(fake_record) fake_record['id'] = '2' self.service_statuses_table.insert().execute(fake_record) checks = status.Checks() self.assertEqual(upgradecheck.Code.FAILURE, checks._duplicate_service_status().code)
def upgrade(migrate_engine): meta.bind = migrate_engine zone_attibutes_table = Table('zone_attributes', meta, autoload=True) connection = migrate_engine.connect() transaction = connection.begin() try: zone_masters_table.create() masters = select([ zone_attibutes_table.c.id, zone_attibutes_table.c.version, zone_attibutes_table.c.created_at, zone_attibutes_table.c.updated_at, zone_attibutes_table.c.value, zone_attibutes_table.c.zone_id ]).where(zone_attibutes_table.c.key == 'master').execute().fetchall() masters_input = [] for master in masters: host, port = utils.split_host_port( master[zone_attibutes_table.c.value]) masters_input.append({ 'id': master[zone_attibutes_table.c.id], 'version': master[zone_attibutes_table.c.version], 'created_at': master[zone_attibutes_table.c.created_at], 'updated_at': master[zone_attibutes_table.c.updated_at], 'zone_id': master[zone_attibutes_table.c.zone_id], 'host': host, 'port': port }) zone_attibutes_table.insert(masters_input) zone_attibutes_table.delete().where( zone_attibutes_table.c.key == 'master') zone_attibutes_table.c.key.alter(type=String(50)) transaction.commit() except Exception: transaction.rollback() raise
def upgrade(migrate_engine): meta.bind = migrate_engine zone_attibutes_table = Table('zone_attributes', meta, autoload=True) connection = migrate_engine.connect() transaction = connection.begin() try: zone_masters_table.create() masters = select( [ zone_attibutes_table.c.id, zone_attibutes_table.c.version, zone_attibutes_table.c.created_at, zone_attibutes_table.c.updated_at, zone_attibutes_table.c.value, zone_attibutes_table.c.zone_id ] ).where( zone_attibutes_table.c.key == 'master' ).execute().fetchall() masters_input = [] for master in masters: host, port = utils.split_host_port( master[zone_attibutes_table.c.value]) masters_input.append({ 'id': master[zone_attibutes_table.c.id], 'version': master[zone_attibutes_table.c.version], 'created_at': master[zone_attibutes_table.c.created_at], 'updated_at': master[zone_attibutes_table.c.updated_at], 'zone_id': master[zone_attibutes_table.c.zone_id], 'host': host, 'port': port }) zone_attibutes_table.insert(masters_input) zone_attibutes_table.delete().where( zone_attibutes_table.c.key == 'master') zone_attibutes_table.c.key.alter(type=String(50)) transaction.commit() except Exception: transaction.rollback() raise
def insert_model(model: Table, values: Union[ValuesType, Iterable[ValuesType]]) -> None: query = model.insert() if isinstance(values, Dict): metadata.bind.execute(query, **values) else: metadata.bind.execute(query, list(values))
def test_insert_table(engine_testaccount): metadata = MetaData() users = Table('users', metadata, Column('id', Integer, Sequence('user_id_seq'), primary_key=True), Column('name', String), Column('fullname', String), ) metadata.create_all(engine_testaccount) data = [{ 'id': 1, 'name': 'testname1', 'fullname': 'fulltestname1', }, { 'id': 2, 'name': 'testname2', 'fullname': 'fulltestname2', }] conn = engine_testaccount.connect() try: # using multivalue insert conn.execute(users.insert(data)) results = conn.execute(select([users]).order_by('id')) row = results.fetchone() assert row['name'] == 'testname1' finally: conn.close() users.drop(engine_testaccount)
def test_unnest(engine, bigquery_dataset): from sqlalchemy import select, func, String from sqlalchemy_bigquery import ARRAY conn = engine.connect() metadata = MetaData() table = Table( f"{bigquery_dataset}.test_unnest", metadata, Column("objects", ARRAY(String)), ) metadata.create_all(engine) conn.execute(table.insert(), [dict(objects=["a", "b", "c"]), dict(objects=["x", "y"])]) query = select([func.unnest(table.c.objects).alias("foo_objects").column]) compiled = str(query.compile(engine)) assert " ".join(compiled.strip().split()) == ( f"SELECT `foo_objects`" f" FROM" f" `{bigquery_dataset}.test_unnest` `{bigquery_dataset}.test_unnest_1`," f" unnest(`{bigquery_dataset}.test_unnest_1`.`objects`) AS `foo_objects`" ) assert sorted(r[0] for r in conn.execute(query)) == ["a", "b", "c", "x", "y"]
def test_insert_table(engine_testaccount): metadata = MetaData() users = Table( 'users', metadata, Column('id', Integer, Sequence('user_id_seq'), primary_key=True), Column('name', String), Column('fullname', String), ) metadata.create_all(engine_testaccount) data = [{ 'id': 1, 'name': 'testname1', 'fullname': 'fulltestname1', }, { 'id': 2, 'name': 'testname2', 'fullname': 'fulltestname2', }] conn = engine_testaccount.connect() try: # using multivalue insert conn.execute(users.insert(data)) results = conn.execute(select([users]).order_by('id')) row = results.fetchone() assert row['name'] == 'testname1' finally: conn.close() users.drop(engine_testaccount)
def update_item_saved_info(item): engine = get_onitsuka_db_engine() item_owner_id = item['owner_id'] item_id = item['item_id'] user_following = Table('user_following', metaData, autoload=True, autoload_with = engine) s = select([user_following.c.user_id], (user_following.c.following_id==item_owner_id)) result = engine.execute(s) user_feed_update_list = list() for follower in result: item_owner_follower_id = follower['user_id'] print item_owner_follower_id user_feed_update_item = {} user_feed_update_item['user_id'] = item_owner_follower_id user_feed_update_item['owner_id'] = item_owner_id user_feed_update_item['item_id'] = item_id user_feed_update_list.append(user_feed_update_item) result.close() user_feed_table = Table('user_feed', metaData, autoload=True, autoload_with = engine) ins = user_feed_table.insert().values(user_id=bindparam('user_id'), owner_id=bindparam('owner_id'), item_id=bindparam('item_id')) engine.execute(ins, user_feed_update_list)
def upgrade(migrate_engine): meta.bind = migrate_engine pools.create() pool_attributes.create() # Get the default pool_id from the config file default_pool_id = cfg.CONF['service:central'].default_pool_id # Create the default pool with hard-coded name, which can be changed # later via the api, and the default_pool_id from the config file pools_table = Table('pools', meta, autoload=True) pools_table.insert().execute( id=default_pool_id, name='default', version=1 )
def test_insert_values(self, engine, connection): table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer)) table.drop(checkfirst=True) table.create() connection.execute(table.insert([{'a': 1}, {'a': 2}])) result = table.select().execute().fetchall() expected = [(1, ), (2, )] self.assertEqual(result, expected)
def outLog(self,logLevel, userNo, userName, farmNo, farmName, componentNo, componentName, instanceNo, instanceName, code, instanceType, platformNo, additions): # イベントログメッセージの取得 message = getMassage(code, additions) log_table = Table("EVENT_LOG", METADATA2, autoload=True) log_table.insert({"LOG_NO":None, "LOG_DATE":datetime.datetime.today(), "LOG_LEVEL":self.LOGLEVEL[logLevel], "USER_NO":userNo, "USER_NAME":userName, "FARM_NO":farmNo, "FARM_NAME":farmName, "COMPONENT_NO":componentNo, "COMPONENT_NAME":componentName, "INSTANCE_NO":instanceNo, "INSTANCE_NAME":instanceName, "MESSAGE_CODE":code, "MESSAGE":message, "INSTANCE_TYPE":instanceType, "PLATFORM_NO":platformNo}).execute()
async def _populate_table( db: Database, table: Table, values: Iterable[Dict[str, Any]], ): name: str = table.name query = table.insert() logger.info(f"Seeding table {name}") await db.execute_many(query, list(values)) logger.info(f"Seeded table {name} successfully")
def outLog(self,logLevel, userNo, userName, farmNo, farmName, componentNo, componentName, instanceNo, instanceName, code, instanceType, platformNo, additions): # イベントログメッセージの取得 message = getMassage(code, additions) log_table = Table("EVENT_LOG", METADATA2, autoload=True) log_table.insert([None, datetime.datetime.today(), self.LOGLEVEL[logLevel], userNo, userName, farmNo, farmName, componentNo, componentName, instanceNo, instanceName, code, message, instanceType, platformNo]).execute()
def test_insert_values(self, engine, connection): table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer), schema='pyhive_test_database') table.drop(checkfirst=True) table.create() connection.execute(table.insert([{'a': 1}, {'a': 2}])) result = table.select().execute().fetchall() expected = [(1,), (2,)] self.assertEqual(result, expected)
def test_insert_select(self, engine, connection): one_row = Table('one_row', MetaData(bind=engine), autoload=True) table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer)) table.drop(checkfirst=True) table.create() connection.execute(table.insert().from_select(['a'], one_row.select())) result = table.select().execute().fetchall() expected = [(1, )] self.assertEqual(result, expected)
def insert_rows(cls, output_rows: List[Dict[str, Any]], output_spec: Table) -> None: """ Default function for :param output_rows: :param output_spec: :return: """ with output_spec.bind.begin() as conn: conn.execute(output_spec.insert(), *output_rows)
def test_migrate_data(self): meta = MetaData(bind=self.engine) # create TableA table_a = Table('TableA', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False), Column('second', Integer)) table_a.create() # update it with sample data values = [ {'id': 1, 'first': 'a'}, {'id': 2, 'first': 'b'}, {'id': 3, 'first': 'c'} ] for value in values: self.engine.execute(table_a.insert(values=value)) # create TableB similar to TableA, except column 'second' table_b = Table('TableB', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False)) table_b.create() # migrate data migrate_utils.migrate_data(self.engine, table_a, table_b, ['second']) # validate table_a is dropped self.assertTrue(self.engine.dialect.has_table( self.engine.connect(), 'TableA'), 'Data migration failed to drop source table') # validate table_b is updated with data from table_a table_b_rows = list(table_b.select().execute()) self.assertEqual(3, len(table_b_rows), "Data migration is failed") table_b_values = [] for row in table_b_rows: table_b_values.append({'id': row.id, 'first': row.first}) self.assertEqual(values, table_b_values, "Data migration failed with invalid data copy")
def load_raw(eng, handle, dummy_separator='\v', table_name=None): """ Uses SQLalchemy (and optionally psycopg2) to load raw data into a one-column table. Returns SQLAlchemy Table object Parameters ========== eng: required Must be a SQLAlchemy engine object. handle: required Must be a file-like object. I.e., returned by pyhcup.parser._open(). dummy_separator: required (default: "\v") Must be a character not found in the data to be loaded. The psycopg2.cursor.copy_from() method uses a horizontal tab ('\t') by default, but I am afraid of occasional horizontal tabs in HCUP and PUDF data that we just haven't found yet. table_name: optional (default: None) Table name for the load. Will be generated automatically if not provided. """ # get the filename sans extension for use in making a table name base_filename = os.path.split(handle.name)[-1].split('.')[0] # make a timestamp in YYYYMMDDhhmmss format # will be used as part of the table name now = datetime.datetime.now() timestamp = now.strftime('%Y%m%d%H%M%S') if table_name is None: table_name = '%s_%s_raw' % (base_filename, timestamp) # else SQLAlchemy will fail to reflect later (seems to coerce case-sensitive) table_name = table_name.lower() # proceed to table creation table = Table(table_name, MetaData(), Column('line', Text())) table.create(bind=eng) if eng.driver == 'psycopg2': # use Postgres COPY FROM conn = eng.raw_connection() cursor = conn.cursor() # acquire a cursor from the connection object # load the data using psycopg2.cursor.copy_from() method cursor.copy_from(handle, '"%s"' % table_name, sep=dummy_separator) conn.commit() conn.close() else: # fall back to line-by-line insert data = [{'line': l.strip()} for l in handle] eng.execute(table.insert(), data) return table
def upgrade(migrate_engine): meta = MetaData() meta.bind = migrate_engine tables = [define_task_info_table(meta)] create_tables(tables) tasks_table = Table('tasks', meta, autoload=True) task_info_table = Table('task_info', meta, autoload=True) tasks = tasks_table.select().execute().fetchall() for task in tasks: values = { 'task_id': task.id, 'input': task.input, 'result': task.result, 'message': task.message, } task_info_table.insert(values=values).execute() for col_name in TASKS_MIGRATE_COLUMNS: tasks_table.columns[col_name].drop()
def test_insert_select(self, engine, connection): one_row = Table('one_row', MetaData(bind=engine), autoload=True) table = Table('insert_test', MetaData(bind=engine), Column('a', sqlalchemy.types.Integer), schema='pyhive_test_database') table.drop(checkfirst=True) table.create() connection.execute('SET mapred.job.tracker=local') # NOTE(jing) I'm stuck on a version of Hive without INSERT ... VALUES connection.execute(table.insert().from_select(['a'], one_row.select())) result = table.select().execute().fetchall() expected = [(1,)] self.assertEqual(result, expected)
class TableHandler(object): """ Used by automatically generated objects such as datasets and dimensions to generate, write and clear the table under its management. """ def _init_table(self, meta, namespace, name, id_type=Integer): """ Create the given table if it does not exist, otherwise reflect the current table schema from the database. """ name = namespace + '__' + name self.table = Table(name, meta) if id_type is not None: col = Column('id', id_type, primary_key=True) self.table.append_column(col) def _generate_table(self): """ Create the given table if it does not exist. """ # TODO: make this support some kind of migration? if not db.engine.has_table(self.table.name): self.table.create(db.engine) def _upsert(self, bind, data, unique_columns): """ Upsert a set of values into the table. This will query for the set of unique columns and either update an existing row or create a new one. In both cases, the ID of the changed row will be returned. """ key = and_(*[self.table.c[c] == data.get(c) for c in unique_columns]) q = self.table.update(key, data) if bind.execute(q).rowcount == 0: q = self.table.insert(data) rs = bind.execute(q) return rs.inserted_primary_key[0] else: q = self.table.select(key) row = bind.execute(q).fetchone() return row['id'] def _flush(self, bind): """ Delete all rows in the table. """ q = self.table.delete() bind.execute(q) def _drop(self, bind): """ Drop the table and the local reference to it. """ if db.engine.has_table(self.table.name): self.table.drop() del self.table
def outLog(self, logLevel, userNo, userName, farmNo, farmName, componentNo, componentName, instanceNo, instanceName, code, instanceType, platformNo, additions): # イベントログメッセージの取得 message = getMassage(code, additions) log_table = Table("EVENT_LOG", METADATA2, autoload=True) log_table.insert({ "LOG_NO": None, "LOG_DATE": datetime.datetime.today(), "LOG_LEVEL": self.LOGLEVEL[logLevel], "USER_NO": userNo, "USER_NAME": userName, "FARM_NO": farmNo, "FARM_NAME": farmName, "COMPONENT_NO": componentNo, "COMPONENT_NAME": componentName, "INSTANCE_NO": instanceNo, "INSTANCE_NAME": instanceName, "MESSAGE_CODE": code, "MESSAGE": message, "INSTANCE_TYPE": instanceType, "PLATFORM_NO": platformNo }).execute()
def test_migrate_data(self): meta = MetaData(bind=self.engine) # create TableA table_a = Table('TableA', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False), Column('second', Integer)) table_a.create() # update it with sample data values = [{ 'id': 1, 'first': 'a' }, { 'id': 2, 'first': 'b' }, { 'id': 3, 'first': 'c' }] for value in values: self.engine.execute(table_a.insert(values=value)) # create TableB similar to TableA, except column 'second' table_b = Table('TableB', meta, Column('id', Integer, primary_key=True), Column('first', String(8), nullable=False)) table_b.create() # migrate data migrate_utils.migrate_data(self.engine, table_a, table_b, ['second']) # validate table_a is dropped self.assertTrue( self.engine.dialect.has_table(self.engine.connect(), 'TableA'), 'Data migration failed to drop source table') # validate table_b is updated with data from table_a table_b_rows = list(table_b.select().execute()) self.assertEqual(3, len(table_b_rows), "Data migration is failed") table_b_values = [] for row in table_b_rows: table_b_values.append({'id': row.id, 'first': row.first}) self.assertEqual(values, table_b_values, "Data migration failed with invalid data copy")
def dte_load(eng, handle, table_name): """ Uses SQLAlchemy (and optionally psycopg2) to load DaysToEvent data from a csv file into a database table. Uses default schema; no support for specifying schema inside this function. Returns the row count of the newly created table. Parameters ========== eng: required Must be a SQLAlchemy engine object. handle: required Must be a file-like object. I.e., returned by open(path). table_name: required Table name for the load. """ table = Table(table_name, MetaData(), Column('key', BigInteger), Column('visitlink', BigInteger), Column('daystoevent', BigInteger) ) table.create(eng, checkfirst=True) if eng.driver == 'psycopg2': # use Postgres COPY FROM conn = eng.raw_connection() cursor = conn.cursor() # acquire a cursor from the connection object cp_sql = "COPY %s FROM STDIN DELIMITER ',' CSV HEADER;" % (table_name) cursor.copy_expert(cp_sql, handle) conn.commit() conn.close() else: # fall back to generic bulk insert data = [] for line in handle: l = [int(x.strip()) for x in line.strip().split(',')] data.append({ 'key': l[0], 'visitlink': l[1], 'daystoevent': l[2] }) eng.execute(table.insert(), data) row_count = eng.execute(select([func.count()]).select_from(table)).fetchone()[0] return row_count
def _round_trip(self, datatype, data): metadata = self.metadata int_table = Table( "integer_table", metadata, Column("id", Integer, Sequence('id_seq'), primary_key=True), Column("integer_data", datatype), ) metadata.create_all(config.db) config.db.execute(int_table.insert(), {"integer_data": data}) row = config.db.execute(select([int_table.c.integer_data])).first() eq_(row, (data, )) if util.py3k: assert isinstance(row[0], int) else: assert isinstance(row[0], (long, int)) # noqa
def list2db(logger, conn, table, seq, lst): metadata = MetaData(conn) try: table = Table(table, metadata, Column('id', Integer, Sequence(seq), primary_key=True), autoload=True, autoload_with=conn) trans = conn.begin() for x in lst: conn.execute(table.insert(), x) trans.commit() except Exception, e: logger.error("db inert fail[%s]", str(e)) try: trans.rollback() except: logger.error("rollback() fail[%s]", str(e)) return False
def np_to_sql(A, tbl_name, conn): """Converts a numpy structured array to an sql table Parameters ---------- A : numpy structured array Array to convert tbl_name : str Name of table to insert into sql database conn : sqlalchemy.engine.Connectable Connection for the sql database Returns ------- sqlalchemy.schema.table sqlalchemy table corresponding to the uploaded structured array """ dtype = A.dtype col_names = dtype.names def sql_dtype(col_dtype): if col_dtype.char == 'S': return sqlt.VARCHAR(col_dtype.itemsize) return np_to_sql_types[col_dtype][0] cols = [Column(name, sql_dtype(dtype[name])) for name in col_names] md = MetaData() tbl = Table(tbl_name, md, *cols) md.create_all(conn) # http://stackoverflow.com/questions/7043158/insert-numpy-array-into-mysql-database # TODO find a faster way to fix datetimes conn.execute( tbl.insert(), [ dict( it.izip( col_names, [ datetime64_to_datetime(cell) for cell in row])) for row in A]) return tbl
def np_to_sql(A, tbl_name, conn): """Converts a numpy structured array to an sql table Parameters ---------- A : numpy structured array Array to convert tbl_name : str Name of table to insert into sql database conn : sqlalchemy.engine.Connectable Connection for the sql database Returns ------- sqlalchemy.schema.table sqlalchemy table corresponding to the uploaded structured array """ dtype = A.dtype col_names = dtype.names def sql_dtype(col_dtype): if col_dtype.char == 'S': return sqlt.VARCHAR(col_dtype.itemsize) return np_to_sql_types[col_dtype][0] cols = [Column(name, sql_dtype(dtype[name])) for name in col_names] md = MetaData() tbl = Table(tbl_name, md, *cols) md.create_all(conn) # http://stackoverflow.com/questions/7043158/insert-numpy-array-into-mysql-database # TODO find a faster way to fix datetimes conn.execute(tbl.insert(), [ dict(it.izip(col_names, [datetime64_to_datetime(cell) for cell in row])) for row in A ]) return tbl
class SQLTable(Component): _selects = 0 _inserts = 0 _updates = 0 _finalized = False STORE_MODE_LOOKUP = "lookup" STORE_MODE_INSERT = "insert" STORE_MODE_UPSERT = "upsert" _pk = False columns = [] create = True _unicode_errors = 0 _lookup_changed_fields = None def __init__(self, name, connection, columns, label=None): super(SQLTable, self).__init__() self.sa_table = None self.sa_metadata = None self.name = name self.connection = connection self.label = label if label else name self.columns = columns or [] for col in columns: col.sqltable = self def _get_sa_type(self, column): if (column.type == "Integer"): return Integer elif (column.type == "String"): #if (column.length is None): column.length = 128 return Unicode(length=128) elif (column.type == "Float"): return Float elif (column.type == "Boolean"): return Boolean elif (column.type == "AutoIncrement"): return Integer elif (column.type == "Date"): return Date elif (column.type == "Time"): return Time elif (column.type == "DateTime"): return DateTime elif (column.type == "Binary"): return Binary else: raise Exception("Invalid data type (%s): %s" % (column, column.type)) def finalize(self, ctx): if (not SQLTable._finalized): SQLTable._finalized = True if (SQLTable._inserts + SQLTable._selects > 0): logger.info( "SQLTable Totals ins/upd/sel: %d/%d/%d " % (SQLTable._inserts, SQLTable._updates, SQLTable._selects)) if (self._inserts + self._selects > 0): logger.info( "SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " % (self.name, self._inserts, self._updates, self._selects)) if (self._unicode_errors > 0): logger.warning( "SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" % (self.name, self._unicode_errors)) ctx.comp.finalize(self.connection) super(SQLTable, self).finalize(ctx) def initialize(self, ctx): super(SQLTable, self).initialize(ctx) if self._lookup_changed_fields == None: self._lookup_changed_fields = [] ctx.comp.initialize(self.connection) logger.debug("Loading table %s on %s" % (self.name, self)) self.sa_metadata = MetaData() self.sa_table = Table(self.name, self.sa_metadata) self._selects = 0 self._inserts = 0 self._updates = 0 self._unicode_errors = 0 # Drop? columns_ex = [] for column in self.columns: logger.debug("Adding column to %s: %s" % (self, column)) column.sqltable = self # Check for duplicate names if (column.name in columns_ex): raise ETLConfigurationException( "Duplicate column name '%s' in %s" % (column.name, self)) columns_ex.append(column.name) # Configure column if isinstance(column, SQLColumnFK): if column.fk_sqlcolumn.sqltable.sa_table is None: logger.warning( "Column %s foreign key %s table (%s) has not been defined in backend (ignoring).", column, column.fk_sqlcolumn, column.fk_sqlcolumn.sqltable) continue self.sa_table.append_column( Column(column.name, self._get_sa_type(column), ForeignKey( column.fk_sqlcolumn.sqltable.sa_table.columns[ column.fk_sqlcolumn.name]), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) else: self.sa_table.append_column( Column(column.name, self._get_sa_type(column), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) # Check schema: # Create if doesn't exist if (not self.connection.engine().has_table(self.name)): logger.info("Creating table %s" % self.name) self.sa_table.create(self.connection.connection()) # TODO:? Extend? (unsafe, allow read-only connections and make them default?) # TODO:? Delete columns (unsafe, allow read-only connections and make them default?) def pk(self, ctx): """ Returns the primary key column definitToClauion, or None if none defined. """ #if (self._pk == False): if True: pk_cols = [] for col in self.columns: if col.pk: pk_cols.append(col) if (len(pk_cols) > 1): raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols)) elif (len(pk_cols) == 1): self._pk = pk_cols[0] else: self._pk = None return self._pk def _attribsToClause(self, attribs): clauses = [] for k, v in attribs.items(): if isinstance(v, (list, tuple)): clauses.append(self.sa_table.c[k].in_(v)) else: clauses.append(self.sa_table.c[k] == v) return and_(*clauses) def _rowtodict(self, row): d = {} for column in self.columns: #print column d[column.name] = getattr(row, column.name) return d def _find(self, ctx, attribs): self._selects = self._selects + 1 SQLTable._selects = SQLTable._selects + 1 query = self.sa_table.select(self._attribsToClause(attribs)) rows = self.connection.connection().execute(query) for r in rows: # Ensure we return dicts, not RowProxys from SqlAlchemy yield self._rowtodict(r) def lookup(self, ctx, attribs, find_function=None): logger.debug("Lookup on '%s' attribs: %s" % (self, attribs)) if (len(attribs.keys()) == 0): raise Exception( "Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name) find_function = find_function or self._find rows = find_function(ctx, attribs) rows = list(rows) if (len(rows) > 1): raise Exception( "Found more than one row when searching for just one in table %s: %s" % (self.name, attribs)) elif (len(rows) == 1): row = rows[0] else: row = None logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row)) return row def upsert(self, ctx, data, keys=[]): """ Upsert checks if the row exists and has changed. It does a lookup followed by an update or insert as appropriate. """ # TODO: Check for AutoIncrement in keys, shall not be used # If keys qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception( "Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] # Do lookup if len(qfilter) > 0: row = self.lookup(ctx, qfilter) if (row): # Check row is identical for c in self.columns: if c.type != "AutoIncrement": v1 = row[c.name] v2 = data[c.name] if c.type == "Date": v1 = row[c.name].strftime('%Y-%m-%d') v2 = data[c.name].strftime('%Y-%m-%d') if (isinstance(v1, str) or isinstance(v2, str)): if (not isinstance(v1, str)): v1 = str(v1) if (not isinstance(v2, str)): v2 = str(v2) if (v1 != v2): if (c.name not in self._lookup_changed_fields): logger.warning( "%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c.name, v1, v2)) #self._lookup_changed_fields.append(c["name"]) # Update the row row = self.update(ctx, data, keys) return row row_with_id = self.insert(ctx, data) return row_with_id def _prepare_row(self, ctx, data): row = {} for column in self.columns: if column.type != "AutoIncrement": try: row[column.name] = data[column.name] except KeyError as e: raise Exception( "Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data)) # Checks if (column.type == "String") and (not isinstance( row[column.name], str)): self._unicode_errors = self._unicode_errors + 1 if (ctx.debug): logger.warning( "Unicode column %r received non-unicode string: %r " % (column.name, row[column.name])) return row def insert(self, ctx, data): row = self._prepare_row(ctx, data) logger.debug("Inserting in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute(self.sa_table.insert(row)) pk = self.pk(ctx) if pk: row[pk.name] = res.inserted_primary_key[0] self._inserts = self._inserts + 1 SQLTable._inserts = SQLTable._inserts + 1 if pk is not None: return row else: return row # None def update(self, ctx, data, keys=[]): row = self._prepare_row(ctx, data) # Automatically calculate lookup if necessary qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception( "Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] logger.debug("Updating in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute( self.sa_table.update(self._attribsToClause(qfilter), row)) self._updates = self._updates + 1 SQLTable._updates = SQLTable._updates + 1 if pk is not None: return row else: return None
# insert # - insert(values=None, inline=False, \*\*kwargs) # compile # - compile(bind=None, dialect=None, \*\*kw) # - sql 표현식을 컴파일시킨다. # - 리턴값은 컴파일된 객체이다. # - params를 이용하여 컴파일된 객체의 파라미터 이름과 값을 받아올 수 있다. # 예제 # users table의 insert객체 # In[24]: insert = users.insert() # In[25]: type(insert) # In[26]: print(insert) # In[27]: insert = users.insert().values(name='kim', fullname='Anonymou, Kim ') print(insert) # In[28]:
class Table(object): def __init__(self, db, schema, table, columns=None): self.db = db self.schema = schema self.name = table self.engine = create_engine(db.url) self.metadata = MetaData(schema=schema) self.metadata.bind = self.engine # http://docs.sqlalchemy.org/en/rel_1_0/core/metadata.html # if provided columns (SQLAlchemy columns), create the table if table: if columns: self.table = SQLATable( table, self.metadata, schema=self.schema, *columns ) self.table.create() # otherwise just load from db else: self.table = SQLATable( table, self.metadata, schema=self.schema, autoload=True ) self.indexes = dict((i.name, i) for i in self.table.indexes) self._is_dropped = False else: self._is_dropped = True self.table = None @property def _normalized_columns(self): return list(map(normalize_column_name, self.columns)) @property def columns(self): """Return list of all columns in table """ return list(self.table.columns.keys()) @property def sqla_columns(self): """Return all columns in table as sqlalchemy column types """ return self.table.columns @property def column_types(self): """Return a dict mapping column name to type for all columns in table """ column_types = {} for c in self.sqla_columns: column_types[c.name] = c.type return column_types @property def primary_key(self): """Return a list of columns making up the primary key constraint """ return [c.name for c in self.table.primary_key] @property def op(self): ctx = MigrationContext.configure(self.engine.connect()) return Operations(ctx) def _valid_table_name(self, table_name): """Check if the table name is obviously invalid. """ if table_name is None or not len(table_name.strip()): raise ValueError("Invalid table name: %r" % table_name) return table_name.strip() def _update_table(self, table_name): self.metadata = MetaData(schema=self.schema) self.metadata.bind = self.engine return SQLATable(table_name, self.metadata, schema=self.schema) def add_primary_key(self, column="id"): """Add primary key constraint to specified column """ if not self.primary_key: sql = """ALTER TABLE {s}.{t} ADD PRIMARY KEY ({c}) """.format( s=self.schema, t=self.name, c=column ) self.db.execute(sql) def drop(self): """Drop the table from the database """ if self._is_dropped is False: self.table.drop(self.engine) self._is_dropped = True def _check_dropped(self): if self._is_dropped: raise DatasetException( "the table has been dropped. this object should not be used again." ) def _args_to_clause(self, args): clauses = [] for k, v in args.items(): if isinstance(v, (list, tuple)): clauses.append(self.table.c[k].in_(v)) else: clauses.append(self.table.c[k] == v) return and_(*clauses) def create_column(self, name, type): """ Explicitely create a new column ``name`` of a specified type. ``type`` must be a `SQLAlchemy column type <http://docs.sqlalchemy.org/en/rel_0_8/core/types.html>`_. :: table.create_column('created_at', sqlalchemy.DateTime) """ self._check_dropped() if normalize_column_name(name) not in self._normalized_columns: self.op.add_column(self.table.name, Column(name, type), self.table.schema) self.table = self._update_table(self.table.name) def drop_column(self, name): """ Drop the column ``name`` :: table.drop_column('created_at') """ self._check_dropped() if name in list(self.table.columns.keys()): self.op.drop_column(self.table.name, name, schema=self.schema) self.table = self._update_table(self.table.name) def create_index(self, columns, name=None, index_type="btree"): """ Create an index to speed up queries on a table. If no ``name`` is given a random name is created. :: table.create_index(['name', 'country']) """ self._check_dropped() if not name: sig = "||".join(columns + [index_type]) # This is a work-around for a bug in <=0.6.1 which would create # indexes based on hash() rather than a proper hash. key = abs(hash(sig)) name = "ix_%s_%s" % (self.table.name, key) if name in self.indexes: return self.indexes[name] key = sha1(sig.encode("utf-8")).hexdigest()[:16] name = "ix_%s_%s" % (self.table.name, key) if name in self.indexes: return self.indexes[name] # self.db._acquire() columns = [self.table.c[col] for col in columns] idx = Index(name, *columns, postgresql_using=index_type) idx.create(self.engine) # finally: # self.db._release() self.indexes[name] = idx return idx def create_index_geom(self, column="geom"): """Shortcut to create index on geometry """ self.create_index([column], index_type="gist") def distinct(self, *columns, **_filter): """ Returns all rows of a table, but removes rows in with duplicate values in ``columns``. Interally this creates a `DISTINCT statement <http://www.w3schools.com/sql/sql_distinct.asp>`_. :: # returns only one row per year, ignoring the rest table.distinct('year') # works with multiple columns, too table.distinct('year', 'country') # you can also combine this with a filter table.distinct('year', country='China') """ self._check_dropped() qargs = [] try: columns = [self.table.c[c] for c in columns] for col, val in _filter.items(): qargs.append(self.table.c[col] == val) except KeyError: return [] q = expression.select( columns, distinct=True, whereclause=and_(*qargs), order_by=[c.asc() for c in columns], ) # if just looking at one column, return a simple list if len(columns) == 1: return itertools.chain.from_iterable(self.engine.execute(q)) # otherwise return specified row_type else: return ResultIter(self.engine.execute(q), row_type=self.db.row_type) def insert(self, row): """ Add a row (type: dict) by inserting it into the table. Columns must exist. :: data = dict(title='I am a banana!') table.insert(data) Returns the inserted row's primary key. """ self._check_dropped() res = self.engine.execute(self.table.insert(row)) if len(res.inserted_primary_key) > 0: return res.inserted_primary_key[0] def insert_many(self, rows, chunk_size=1000): """ Add many rows at a time, which is significantly faster than adding them one by one. Per default the rows are processed in chunks of 1000 per commit, unless you specify a different ``chunk_size``. See :py:meth:`insert() <dataset.Table.insert>` for details on the other parameters. :: rows = [dict(name='Dolly')] * 10000 table.insert_many(rows) """ def _process_chunk(chunk): self.table.insert().execute(chunk) self._check_dropped() chunk = [] for i, row in enumerate(rows, start=1): chunk.append(row) if i % chunk_size == 0: _process_chunk(chunk) chunk = [] if chunk: _process_chunk(chunk) def rename(self, name): """Rename the table """ sql = """ALTER TABLE {s}.{t} RENAME TO {name} """.format( s=self.schema, t=self.name, name=name ) self.engine.execute(sql) self.table = SQLATable(name, self.metadata, schema=self.schema, autoload=True) def find_one(self, **kwargs): """ Works just like :py:meth:`find() <dataset.Table.find>` but returns one result, or None. :: row = table.find_one(country='United States') """ kwargs["_limit"] = 1 iterator = self.find(**kwargs) try: return next(iterator) except StopIteration: return None def _args_to_order_by(self, order_by): if order_by[0] == "-": return self.table.c[order_by[1:]].desc() else: return self.table.c[order_by].asc() def find( self, _limit=None, _offset=0, _step=5000, order_by="id", return_count=False, **_filter ): """ Performs a simple search on the table. Simply pass keyword arguments as ``filter``. :: results = table.find(country='France') results = table.find(country='France', year=1980) Using ``_limit``:: # just return the first 10 rows results = table.find(country='France', _limit=10) You can sort the results by single or multiple columns. Append a minus sign to the column name for descending order:: # sort results by a column 'year' results = table.find(country='France', order_by='year') # return all rows sorted by multiple columns (by year in descending order) results = table.find(order_by=['country', '-year']) By default :py:meth:`find() <dataset.Table.find>` will break the query into chunks of ``_step`` rows to prevent huge tables from being loaded into memory at once. For more complex queries, please use :py:meth:`db.query()` instead.""" self._check_dropped() if not isinstance(order_by, (list, tuple)): order_by = [order_by] order_by = [ o for o in order_by if (o.startswith("-") and o[1:] or o) in self.table.columns ] order_by = [self._args_to_order_by(o) for o in order_by] args = self._args_to_clause(_filter) # query total number of rows first count_query = alias( self.table.select(whereclause=args, limit=_limit, offset=_offset), name="count_query_alias", ).count() rp = self.engine.execute(count_query) total_row_count = rp.fetchone()[0] if return_count: return total_row_count if _limit is None: _limit = total_row_count if _step is None or _step is False or _step == 0: _step = total_row_count if total_row_count > _step and not order_by: _step = total_row_count log.warn( "query cannot be broken into smaller sections because it is unordered" ) queries = [] for i in count(): qoffset = _offset + (_step * i) qlimit = min(_limit - (_step * i), _step) if qlimit <= 0: break queries.append( self.table.select( whereclause=args, limit=qlimit, offset=qoffset, order_by=order_by ) ) return ResultIter( (self.engine.execute(q) for q in queries), row_type=self.db.row_type ) def count(self, **_filter): """ Return the count of results for the given filter set (same filter options as with ``find()``). """ return self.find(return_count=True, **_filter) def __getitem__(self, item): """ This is an alias for distinct which allows the table to be queried as using square bracket syntax. :: # Same as distinct: print list(table['year']) """ if not isinstance(item, tuple): item = (item,) return self.distinct(*item) def all(self): """ Returns all rows of the table as simple dictionaries. This is simply a shortcut to *find()* called with no arguments. :: rows = table.all()""" return self.find() def __iter__(self): """ Allows for iterating over all rows in the table without explicetly calling :py:meth:`all() <dataset.Table.all>`. :: for row in table: print(row) """ return self.all() def __repr__(self): return "<Table(%s)>" % self.table.name
class SqlAlchemyFdw(ForeignDataWrapper): """An SqlAlchemy foreign data wrapper. The sqlalchemy foreign data wrapper performs simple selects on a remote database using the sqlalchemy framework. Accepted options: db_url -- the sqlalchemy connection string. schema -- (optional) schema name to qualify table name with tablename -- the table name in the remote database. """ def __init__(self, fdw_options, fdw_columns): super(SqlAlchemyFdw, self).__init__(fdw_options, fdw_columns) if 'tablename' not in fdw_options: log_to_postgres('The tablename parameter is required', ERROR) self.metadata = MetaData() url = _parse_url_from_options(fdw_options) self.engine = create_engine(url) schema = fdw_options['schema'] if 'schema' in fdw_options else None tablename = fdw_options['tablename'] sqlacols = [] for col in fdw_columns.values(): col_type = self._get_column_type(col.type_name) sqlacols.append(Column(col.column_name, col_type)) self.table = Table(tablename, self.metadata, schema=schema, *sqlacols) self.transaction = None self._connection = None self._row_id_column = fdw_options.get('primary_key', None) def _need_explicit_null_ordering(self, key): support = SORT_SUPPORT[self.engine.dialect.name] default = support['default'] no = None if key.is_reversed: no = nullsfirst if default == 'higher' else nullslast else: no = nullslast if default == 'higher' else nullsfirst if key.nulls_first: if no != nullsfirst: return nullsfirst return None else: if no != nullslast: return nullslast return None def can_sort(self, sortkeys): if SORT_SUPPORT.get(self.engine.dialect.name) is None: # We have no idea about defaults return [] can_order_null = SORT_SUPPORT[self.engine.dialect.name]['support'] if (any((self._need_explicit_null_ordering(x) is not None for x in sortkeys)) and not can_order_null): return [] return sortkeys def explain(self, quals, columns, sortkeys=None, verbose=False): sortkeys = sortkeys or [] statement = self._build_statement(quals, columns, sortkeys) return [str(statement)] def _build_statement(self, quals, columns, sortkeys): statement = select([self.table]) clauses = [] for qual in quals: operator = OPERATORS.get(qual.operator, None) if operator: clauses.append(operator(self.table.c[qual.field_name], qual.value)) else: log_to_postgres('Qual not pushed to foreign db: %s' % qual, WARNING) if clauses: statement = statement.where(and_(*clauses)) if columns: columns = [self.table.c[col] for col in columns] else: columns = self.table.c statement = statement.with_only_columns(columns) orders = [] for sortkey in sortkeys: column = self.table.c[sortkey.attname] if sortkey.is_reversed: column = column.desc() if sortkey.collate: column = column.collate('"%s"' % sortkey.collate) null_ordering = self._need_explicit_null_ordering(sortkey) if null_ordering: column = null_ordering(column) statement = statement.order_by(column) return statement def execute(self, quals, columns, sortkeys=None): """ The quals are turned into an and'ed where clause. """ sortkeys = sortkeys or [] statement = self._build_statement(quals, columns, sortkeys) log_to_postgres(str(statement), DEBUG) rs = (self.connection .execution_options(stream_results=True) .execute(statement)) # Workaround pymssql "trash old results on new query" # behaviour (See issue #100) if self.engine.driver == 'pymssql' and self.transaction is not None: rs = list(rs) for item in rs: yield dict(item) @property def connection(self): if self._connection is None: self._connection = self.engine.connect() return self._connection def begin(self, serializable): self.transaction = self.connection.begin() def pre_commit(self): if self.transaction is not None: self.transaction.commit() self.transaction = None def commit(self): # Pre-commit hook does this on 9.3 if self.transaction is not None: self.transaction.commit() self.transaction = None def rollback(self): if self.transaction is not None: self.transaction.rollback() self.transaction = None @property def rowid_column(self): if self._row_id_column is None: log_to_postgres( 'You need to declare a primary key option in order ' 'to use the write features') return self._row_id_column def insert(self, values): self.connection.execute(self.table.insert(values=values)) def update(self, rowid, newvalues): self.connection.execute( self.table.update() .where(self.table.c[self._row_id_column] == rowid) .values(newvalues)) def delete(self, rowid): self.connection.execute( self.table.delete() .where(self.table.c[self._row_id_column] == rowid)) def _get_column_type(self, format_type): """Blatant ripoff from PG_Dialect.get_column_info""" # strip (*) from character varying(5), timestamp(5) # with time zone, geometry(POLYGON), etc. attype = re.sub(r'\(.*\)', '', format_type) # strip '[]' from integer[], etc. attype = re.sub(r'\[\]', '', attype) is_array = format_type.endswith('[]') charlen = re.search('\(([\d,]+)\)', format_type) if charlen: charlen = charlen.group(1) args = re.search('\((.*)\)', format_type) if args and args.group(1): args = tuple(re.split('\s*,\s*', args.group(1))) else: args = () kwargs = {} if attype == 'numeric': if charlen: prec, scale = charlen.split(',') args = (int(prec), int(scale)) else: args = () elif attype == 'double precision': args = (53, ) elif attype == 'integer': args = () elif attype in ('timestamp with time zone', 'time with time zone'): kwargs['timezone'] = True if charlen: kwargs['precision'] = int(charlen) args = () elif attype in ('timestamp without time zone', 'time without time zone', 'time'): kwargs['timezone'] = False if charlen: kwargs['precision'] = int(charlen) args = () elif attype == 'bit varying': kwargs['varying'] = True if charlen: args = (int(charlen),) else: args = () elif attype in ('interval', 'interval year to month', 'interval day to second'): if charlen: kwargs['precision'] = int(charlen) args = () elif charlen: args = (int(charlen),) coltype = ischema_names.get(attype, None) if coltype: coltype = coltype(*args, **kwargs) if is_array: coltype = ARRAY(coltype) else: coltype = sqltypes.NULLTYPE return coltype @classmethod def import_schema(self, schema, srv_options, options, restriction_type, restricts): """ Reflects the remote schema. """ metadata = MetaData() url = _parse_url_from_options(srv_options) engine = create_engine(url) dialect = PGDialect() if restriction_type == 'limit': only = restricts elif restriction_type == 'except': only = lambda t, _: t not in restricts else: only = None metadata.reflect(bind=engine, schema=schema, only=only) to_import = [] for _, table in sorted(metadata.tables.items()): ftable = TableDefinition(table.name) ftable.options['schema'] = schema ftable.options['tablename'] = table.name for c in table.c: # Force collation to None to prevent imcompatibilities setattr(c.type, "collation", None) # If the type is specialized, call the generic # superclass method if type(c.type) in CONVERSION_MAP: class_name = CONVERSION_MAP[type(c.type)] old_args = c.type.__dict__ c.type = class_name() c.type.__dict__.update(old_args) if c.primary_key: ftable.options['primary_key'] = c.name ftable.columns.append(ColumnDefinition( c.name, type_name=c.type.compile(dialect))) to_import.append(ftable) return to_import
# -*- coding: utf-8 -*- from sqlalchemy import create_engine from sqlalchemy.schema import MetaData, Table engine = create_engine('mysql+mysqlconnector://[email protected]/momoka') meta = MetaData(bind=engine) t1 = Table('users', meta, autoload=True) s = t1.insert([t1.c.name]).values({'id': 1, 'name': 'hello'}) rs = engine.execute(s) if rs.rowcount < 0: print(rs)
class SQLTable(Component): _selects = 0 _inserts = 0 _updates = 0 _finalized = False STORE_MODE_LOOKUP = "lookup" STORE_MODE_INSERT = "insert" STORE_MODE_UPSERT = "upsert" _pk = False columns = [] create = True _unicode_errors = 0 _lookup_changed_fields = None def __init__(self, name, connection, columns, label=None): super(SQLTable, self).__init__() self.sa_table = None self.sa_metadata = None self.name = name self.connection = connection self.label = label if label else name self.columns = columns or [] for col in columns: col.sqltable = self def _get_sa_type(self, column): if (column.type == "Integer"): return Integer elif (column.type == "String"): #if (column.length is None): column.length = 128 return Unicode(length = 128) elif (column.type == "Float"): return Float elif (column.type == "Boolean"): return Boolean elif (column.type == "AutoIncrement"): return Integer elif (column.type == "Date"): return Date elif (column.type == "Time"): return Time elif (column.type == "DateTime"): return DateTime elif (column.type == "Binary"): return Binary else: raise Exception("Invalid data type (%s): %s" % (column, column.type)) def finalize(self, ctx): if (not SQLTable._finalized): SQLTable._finalized = True if (SQLTable._inserts + SQLTable._selects > 0): logger.info("SQLTable Totals ins/upd/sel: %d/%d/%d " % (SQLTable._inserts, SQLTable._updates, SQLTable._selects)) if (self._inserts + self._selects > 0): logger.info("SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " % (self.name, self._inserts, self._updates, self._selects)) if (self._unicode_errors > 0): logger.warning("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" % (self.name, self._unicode_errors)) ctx.comp.finalize(self.connection) super(SQLTable, self).finalize(ctx) def initialize(self, ctx): super(SQLTable, self).initialize(ctx) if self._lookup_changed_fields == None: self._lookup_changed_fields = [] ctx.comp.initialize(self.connection) logger.debug("Loading table %s on %s" % (self.name, self)) self.sa_metadata = MetaData() self.sa_table = Table(self.name, self.sa_metadata) self._selects = 0 self._inserts = 0 self._updates = 0 self._unicode_errors = 0 # Drop? columns_ex = [] for column in self.columns: logger.debug("Adding column to %s: %s" % (self, column)) column.sqltable = self # Check for duplicate names if (column.name in columns_ex): raise ETLConfigurationException("Duplicate column name '%s' in %s" % (column.name, self)) columns_ex.append(column.name) # Configure column if isinstance(column, SQLColumnFK): if column.fk_sqlcolumn.sqltable.sa_table is None: logger.warning("Column %s foreign key %s table (%s) has not been defined in backend (ignoring).", column, column.fk_sqlcolumn, column.fk_sqlcolumn.sqltable) continue self.sa_table.append_column(Column(column.name, self._get_sa_type(column), ForeignKey(column.fk_sqlcolumn.sqltable.sa_table.columns[column.fk_sqlcolumn.name]), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) else: self.sa_table.append_column(Column(column.name, self._get_sa_type(column), primary_key=column.pk, nullable=column.nullable, autoincrement=(True if column.type == "AutoIncrement" else False))) # Check schema: # Create if doesn't exist if (not self.connection.engine().has_table(self.name)): logger.info("Creating table %s" % self.name) self.sa_table.create(self.connection.connection()) # TODO:? Extend? (unsafe, allow read-only connections and make them default?) # TODO:? Delete columns (unsafe, allow read-only connections and make them default?) def pk(self, ctx): """ Returns the primary key column definitToClauion, or None if none defined. """ #if (self._pk == False): if True: pk_cols = [] for col in self.columns: if col.pk: pk_cols.append(col) if (len(pk_cols) > 1): raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols)) elif (len(pk_cols) == 1): self._pk = pk_cols[0] else: self._pk = None return self._pk def _attribsToClause(self, attribs): clauses = [] for k, v in attribs.items(): if isinstance(v, (list, tuple)): clauses.append(self.sa_table.c[k].in_(v)) else: clauses.append(self.sa_table.c[k] == v) return and_(*clauses) def _rowtodict(self, row): d = {} for column in self.columns: #print column d[column.name] = getattr(row, column.name) return d def _find(self, ctx, attribs): self._selects = self._selects + 1 SQLTable._selects = SQLTable._selects + 1 query = self.sa_table.select(self._attribsToClause(attribs)) rows = self.connection.connection().execute(query) for r in rows: # Ensure we return dicts, not RowProxys from SqlAlchemy yield self._rowtodict(r) def lookup(self, ctx, attribs, find_function=None): logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs)) if (len(attribs.keys()) == 0): raise Exception("Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name) find_function = find_function or self._find rows = find_function(ctx, attribs) rows = list(rows) if (len(rows) > 1): raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs)) elif (len(rows) == 1): row = rows[0] else: row = None logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row)) return row def upsert(self, ctx, data, keys = []): """ Upsert checks if the row exists and has changed. It does a lookup followed by an update or insert as appropriate. """ # TODO: Check for AutoIncrement in keys, shall not be used # If keys qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] # Do lookup if len(qfilter) > 0: row = self.lookup(ctx, qfilter) if (row): # Check row is identical for c in self.columns: if c.type != "AutoIncrement": v1 = row[c.name] v2 = data[c.name] if c.type == "Date": v1 = row[c.name].strftime('%Y-%m-%d') v2 = data[c.name].strftime('%Y-%m-%d') if (isinstance(v1, str) or isinstance(v2, str)): if (not isinstance(v1, str)): v1 = str(v1) if (not isinstance(v2, str)): v2 = str(v2) if (v1 != v2): if (c.name not in self._lookup_changed_fields): logger.warn("%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c.name, v1, v2)) #self._lookup_changed_fields.append(c["name"]) # Update the row row = self.update(ctx, data, keys) return row row_with_id = self.insert(ctx, data) return row_with_id def _prepare_row(self, ctx, data): row = {} for column in self.columns: if column.type != "AutoIncrement": try: row[column.name] = data[column.name] except KeyError as e: raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data)) # Checks if (column.type == "String") and (not isinstance(row[column.name], str)): self._unicode_errors = self._unicode_errors + 1 if (ctx.debug): logger.warn("Unicode column %r received non-unicode string: %r " % (column.name, row[column.name])) return row def insert(self, ctx, data): row = self._prepare_row(ctx, data) logger.debug("Inserting in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute(self.sa_table.insert(row)) pk = self.pk(ctx) if pk: row[pk.name] = res.inserted_primary_key[0] self._inserts = self._inserts + 1 SQLTable._inserts = SQLTable._inserts + 1 if pk is not None: return row else: return row # None def update(self, ctx, data, keys = []): row = self._prepare_row(ctx, data) # Automatically calculate lookup if necessary qfilter = {} if (len(keys) > 0): for key in keys: try: qfilter[key] = data[key] except KeyError as e: raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data)) else: pk = self.pk(ctx) qfilter[pk.name] = data[pk.name] logger.debug("Updating in table '%s' row: %s" % (self.name, row)) res = self.connection.connection().execute(self.sa_table.update(self._attribsToClause(qfilter), row)) self._updates = self._updates +1 SQLTable._updates = SQLTable._updates + 1 if pk is not None: return row else: return None
def rename_vo(old_vo, new_vo, insert_new_vo=False, description=None, email=None, commit_changes=False, skip_history=False): """ Updates rows so that entries associated with `old_vo` are now associated with `new_vo` as part of multi-VO migration. :param old_vo: The 3 character string for the current VO (for a single-VO instance this will be 'def'). :param new_vo: The 3 character string for the new VO. :param insert_new_vo: If True then an entry for `new_vo` is created in the database. :param description: Full description of the new VO, unused if `insert_new_vo` is False. :param email: Admin email for the new VO, unused if `insert_new_vo` is False. :param commit_changes: If True then changes are made against the database directly. If False, then nothing is commited and the commands needed are dumped to be run later. :param skip_history: If True then tables without FKC containing historical data will not be converted to save time. """ success = True engine = session.get_engine() conn = engine.connect() trans = conn.begin() inspector = reflection.Inspector.from_engine(engine) metadata = MetaData(bind=conn, reflect=True) dialect = engine.dialect.name # Gather all the columns that need updating and all relevant foreign key constraints all_fks = [] tables_and_columns = [] for table_name in inspector.get_table_names(): if skip_history and ('_history' in table_name or '_hist_recent' in table_name): continue fks = [] table = Table(table_name, metadata) for column in table.c: if 'scope' in column.name or column.name == 'account': tables_and_columns.append((table, column)) for fk in inspector.get_foreign_keys(table_name): if not fk['name']: continue if 'scope' in fk['referred_columns'] or 'account' in fk['referred_columns']: fks.append(ForeignKeyConstraint(fk['constrained_columns'], [fk['referred_table'] + '.' + r for r in fk['referred_columns']], name=fk['name'], table=table, **fk['options'])) all_fks.extend(fks) try: bound_params = {'old_vo': old_vo, 'new_vo': new_vo, 'old_vo_suffix': '' if old_vo == 'def' else old_vo, 'new_vo_suffix': '' if new_vo == 'def' else '@%s' % new_vo, 'split_character': '@', 'int_1': 1, 'int_2': 2, 'new_description': description, 'new_email': email, 'datetime': datetime.utcnow()} bound_params_text = {} for key in bound_params: if isinstance(bound_params[key], int): bound_params_text[key] = bound_params[key] else: bound_params_text[key] = "'%s'" % bound_params[key] if insert_new_vo: table = Table('vos', metadata) insert_command = table.insert().values(vo=bindparam('new_vo'), description=bindparam('new_description'), email=bindparam('new_email'), updated_at=bindparam('datetime'), created_at=bindparam('datetime')) print(str(insert_command) % bound_params_text + ';') if commit_changes: conn.execute(insert_command, bound_params) # Drop all FKCs affecting InternalAccounts/Scopes for fk in all_fks: print(str(DropConstraint(fk)) + ';') if commit_changes: conn.execute(DropConstraint(fk)) # Update columns for table, column in tables_and_columns: update_command = table.update().where(split_vo(dialect, column, return_vo=True) == bindparam('old_vo_suffix')) if new_vo == 'def': update_command = update_command.values({column.name: split_vo(dialect, column)}) else: update_command = update_command.values({column.name: split_vo(dialect, column) + cast(bindparam('new_vo_suffix'), CHAR(4))}) print(str(update_command) % bound_params_text + ';') if commit_changes: conn.execute(update_command, bound_params) table = Table('rses', metadata) update_command = table.update().where(table.c.vo == bindparam('old_vo')).values(vo=bindparam('new_vo')) print(str(update_command) % bound_params_text + ';') if commit_changes: conn.execute(update_command, bound_params) # Re-add the FKCs we dropped for fkc in all_fks: print(str(AddConstraint(fkc)) + ';') if commit_changes: conn.execute(AddConstraint(fkc)) except: success = False print(format_exc()) print('Exception occured, changes not committed to DB.') if commit_changes and success: trans.commit() trans.close() return success
loader = PostgresLoader(engine) testmodule = loader.load_module('testmodule') metadata = MetaData(bind=engine) table = Table('testtable', metadata, Column('test', Unicode), Column('test2', Unicode)) table.drop(checkfirst=True) table.create(checkfirst=True) for i in range(20): table.insert({'test': 'test%d' % i, 'test2': 'test%d' %i}).execute() print(engine.execute(testmodule.pyconcat(table.c.test, table.c.test2)).fetchall()) statement = """ CREATE TRIGGER mytrigger BEFORE INSERT ON %s FOR EACH ROW EXECUTE PROCEDURE %s(); """ engine.execute(statement % (table.name, testmodule.nullifying_trigger.__name__)) table.insert({'test': 'grou', 'test2': 'grou'}).execute() print(engine.execute(testmodule.pyconcat(table.c.test, table.c.test2)).fetchall());
meta = MetaData() test_table = Table('test_table', meta, Column('id', Integer, primary_key=True), Column('hash', HSTORE) ) if __name__ == '__main__': user = os.getenv('USER') engine = create_engine('postgresql://%s@localhost/%s' % (user, user)) meta.create_all(engine) conn = engine.connect() ins = test_table.insert().values(hash={'foo': '1', 'bar': None}) conn.execute(ins) hashcol = test_table.c.hash where_tests = [ hashcol.has_key('foo'), hashcol.has_all(array(['foo', 'bar'])), hashcol.has_any(array(['baz', 'foo'])), hashcol.defined('bar'), hashcol.contains({'foo': '1'}), hashcol.contained_by({'foo': '1', 'bar': None}), hashcol['bar'] == None, ] select_tests = [ hashcol['foo'], hashcol.delete('foo'),
from sqlalchemy.dialects.postgresql import array from hstore import HSTORE, hstore meta = MetaData() test_table = Table('test_table', meta, Column('id', Integer, primary_key=True), Column('hash', HSTORE)) if __name__ == '__main__': user = os.getenv('USER') engine = create_engine('postgresql://%s@localhost/%s' % (user, user)) meta.create_all(engine) conn = engine.connect() ins = test_table.insert().values(hash={'foo': '1', 'bar': None}) conn.execute(ins) hashcol = test_table.c.hash where_tests = [ hashcol.has_key('foo'), hashcol.has_all(array(['foo', 'bar'])), hashcol.has_any(array(['baz', 'foo'])), hashcol.defined('bar'), hashcol.contains({'foo': '1'}), hashcol.contained_by({ 'foo': '1', 'bar': None }), hashcol['bar'] == None, ]
class BaseTestCase(unittest.TestCase): def setUp(self): engine = create_engine('postgresql://postgres@localhost/pypet') self.metadata = MetaData(bind=engine) self.store_table = Table('store', self.metadata, Column('store_id', types.Integer, primary_key=True), Column('store_name', types.String), Column('country_id', types.Integer, ForeignKey('country.country_id'))) self.country_table = Table('country', self.metadata, Column('country_id', types.Integer, primary_key=True), Column('country_name', types.String), Column('region_id', types.Integer, ForeignKey('region.region_id'))) self.region_table = Table('region', self.metadata, Column('region_id', types.Integer, primary_key=True), Column('region_name', types.String)) self.product_table = Table('product', self.metadata, Column('product_id', types.Integer, primary_key=True), Column('product_name', types.String), Column('product_category_id', types.Integer, ForeignKey('product_category.product_category_id'))) self.product_category_table = Table('product_category', self.metadata, Column('product_category_id', types.Integer, primary_key=True), Column('product_category_name', types.String)) self.facts_table = Table('facts_table', self.metadata, Column('store_id', types.Integer, ForeignKey('store.store_id')), Column('date', types.Date), Column('product_id', types.Integer, ForeignKey('product.product_id')), Column('price', types.Float), Column('qty', types.Integer)) agg_name = ('agg_time_month_product_product_store_store' '_Unit Price_Quantity') self.agg_by_month_table = Table(agg_name, self.metadata, Column('store_store', types.Integer, ForeignKey('store.store_id')), Column('time_month', types.Date), Column('product_product', types.Integer, ForeignKey('product.product_id')), Column('Unit Price', types.Float), Column('Quantity', types.Integer), Column('fact_count', types.Integer)) agg_name = ('agg_time_year_store_country_product_product' '_Unit Price_Quantity') self.agg_by_year_country_table = Table(agg_name, self.metadata, Column('store_country', types.Integer, ForeignKey('country.country_id')), Column('time_year', types.Date), Column('product_product', types.Integer, ForeignKey('product.product_id')), Column('Unit Price', types.Float), Column('Quantity', types.Integer), Column('fact_count', types.Integer)) self.metadata.create_all() self.store_dim = Dimension('store', [ Hierarchy('default', [ Level('region', self.region_table.c.region_id, self.region_table.c.region_name), Level('country', self.country_table.c.country_id, self.country_table.c.country_name), Level('store', self.store_table.c.store_id, self.store_table.c.store_name)])]) self.product_dim = Dimension('product', [ Hierarchy('default', [ Level('category', self.product_category_table.c.product_category_id, self.product_category_table.c .product_category_name), Level('product', self.product_table.c.product_id, self.product_table.c.product_name)])]) self.time_dim = TimeDimension('time', self.facts_table.c.date, ['year', 'month', 'day']) unit_price = Measure('Unit Price', self.facts_table.c.price, aggregates.avg) quantity = Measure('Quantity', self.facts_table.c.qty, aggregates.sum) price = ((unit_price.aggregate_with(None) * quantity.aggregate_with(None)) .aggregate_with(aggregates.sum).label('Price')) self.cube = Cube(self.metadata, self.facts_table, [self.store_dim, self.product_dim, self.time_dim], [unit_price, quantity, price], fact_count_column=self.facts_table.c.qty) self.region_table.insert({'region_id': 1, 'region_name': 'Europe'}).execute() self.country_table.insert({'region_id': 1, 'country_name': 'France', 'country_id': 1}).execute() self.country_table.insert({'region_id': 1, 'country_name': 'Germany', 'country_id': 2}).execute() self.region_table.insert({'region_id': 2, 'region_name': 'America'}).execute() self.country_table.insert({'region_id': 2, 'country_name': 'USA', 'country_id': 3}).execute() self.country_table.insert({'region_id': 2, 'country_name': 'Canada', 'country_id': 4}).execute() self.store_table.insert({ 'store_id': 1, 'store_name': 'ACME.fr', 'country_id': 1}).execute() self.store_table.insert({ 'store_id': 2, 'store_name': 'ACME.de', 'country_id': 2}).execute() self.store_table.insert({ 'store_id': 3, 'store_name': 'Food Mart.fr', 'country_id': 1}).execute() self.store_table.insert({ 'store_id': 4, 'store_name': 'Food Mart.de', 'country_id': 2}).execute() self.store_table.insert({ 'store_id': 5, 'store_name': 'ACME.us', 'country_id': 3}).execute() self.store_table.insert({ 'store_id': 6, 'store_name': 'Food Mart.us', 'country_id': 3}).execute() self.store_table.insert({ 'store_id': 7, 'store_name': 'ACME.ca', 'country_id': 4}).execute() self.store_table.insert({ 'store_id': 8, 'store_name': 'Food Mart.ca', 'country_id': 4}).execute() self.product_category_table.insert({ 'product_category_id': 1, 'product_category_name': 'Vegetables'}).execute() self.product_category_table.insert({ 'product_category_id': 2, 'product_category_name': 'Shoes'}).execute() self.product_table.insert({ 'product_id': 1, 'product_category_id': 1, 'product_name': 'Carrots'}).execute() self.product_table.insert({ 'product_id': 2, 'product_category_id': 1, 'product_name': 'Bananas'}).execute() self.product_table.insert({ 'product_id': 3, 'product_category_id': 2, 'product_name': 'Red shoes'}).execute() self.product_table.insert({ 'product_id': 4, 'product_category_id': 2, 'product_name': 'Green shoes'}).execute() self.product_table.insert({ 'product_id': 5, 'product_category_id': 2, 'product_name': 'Blue shoes'}).execute() years = cycle([2009, 2010, 2011]) months = cycle([1, 5, 8, 9, 11]) days = cycle([3, 12, 21, 29]) prices = iter(cycle([100, 500, 1000])) quantities = iter(cycle([1, 5, 1, 2, 3, 20, 8])) values = iter((date(*value) for value in izip(years, months, days))) for value in self.product_table.select().with_only_columns([ self.product_table.c.product_id, self.store_table.c.store_id]).execute(): self.facts_table.insert({ 'product_id': value.product_id, 'store_id': value.store_id, 'date': next(values), 'qty': next(quantities), 'price': next(prices)}).execute() results = (self.facts_table.select().with_only_columns([ (func.sum(self.facts_table.c.price * self.facts_table.c.qty) / func.sum(self.facts_table.c.qty)) .label('Unit Price'), func.sum(self.facts_table.c.qty).label('Quantity'), func.sum(self.facts_table.c.qty).label('fact_count'), self.facts_table.c.product_id.label('product_product'), self.facts_table.c.store_id.label('store_store'), func.date_trunc('month', self.facts_table.c.date).label('time_month')]) .group_by(func.date_trunc('month', self.facts_table.c.date), self.facts_table.c.product_id, self.facts_table.c.store_id) .execute()) for res in results: self.agg_by_month_table.insert().execute(dict(res)) second_agg = (self.facts_table.select().with_only_columns([ (func.sum(self.facts_table.c.price * self.facts_table.c.qty) / func.sum(self.facts_table.c.qty)) .label('Unit Price'), func.sum(self.facts_table.c.qty).label('Quantity'), func.sum(self.facts_table.c.qty).label('fact_count'), self.facts_table.c.product_id.label('product_product'), self.store_table.c.country_id.label('store_country'), func.date_trunc('year', self.facts_table.c.date).label('time_year')]) .where(self.facts_table.c.store_id == self.store_table.c.store_id) .group_by(self.facts_table.c.product_id.label('product_product'), self.store_table.c.country_id.label('store_country'), func.date_trunc('year', self.facts_table.c.date).label('time_year')) .execute()) for res in second_agg: self.agg_by_year_country_table.insert().execute(dict(res)) def tearDown(self): self.metadata.drop_all()
metadata = MetaData() #创建user表,继承metadata类 #Engine使用Schama Type创建一个特定的结构对象 testonly_tableobj = Table("testonly", metadata, Column("id", Integer, primary_key=True), Column("name", String(20)), Column('bytes', VARBINARY(20))) # color = Table("color", metadata, # Column("id", Integer, primary_key=True), # Column("name", String(20))) metadata.create_all(engine) #创建表结构 conn = engine.connect() try: # 通过 metadata 动态从数据库中获取表结构,插入数据 metadata = MetaData(engine) testonly_tableobj_autoload = Table('testonly', metadata, autoload=True) conn.execute(testonly_tableobj_autoload.insert(), { 'id': 2, "name": "koka", 'bytes': b'1234' }) logger.info('insert data successfully') sql_str = testonly_tableobj_autoload.select() table_data = conn.execute(sql_str) for content in table_data.fetchall(): logger.info('%s', content) finally: conn.close()
class SqlAlchemyFdw(ForeignDataWrapper): """An SqlAlchemy foreign data wrapper. The sqlalchemy foreign data wrapper performs simple selects on a remote database using the sqlalchemy framework. Accepted options: db_url -- the sqlalchemy connection string. schema -- (optional) schema name to qualify table name with tablename -- the table name in the remote database. """ def __init__(self, fdw_options, fdw_columns): super(SqlAlchemyFdw, self).__init__(fdw_options, fdw_columns) if 'tablename' not in fdw_options: log_to_postgres('The tablename parameter is required', ERROR) self.metadata = MetaData() url = _parse_url_from_options(fdw_options) self.engine = create_engine(url) schema = fdw_options['schema'] if 'schema' in fdw_options else None tablename = fdw_options['tablename'] sqlacols = [] for col in fdw_columns.values(): col_type = self._get_column_type(col.type_name) sqlacols.append(Column(col.column_name, col_type)) self.table = Table(tablename, self.metadata, schema=schema, *sqlacols) self.transaction = None self._connection = None self._row_id_column = fdw_options.get('primary_key', None) def _need_explicit_null_ordering(self, key): support = SORT_SUPPORT[self.engine.dialect.name] default = support['default'] no = None if key.is_reversed: no = nullsfirst if default == 'higher' else nullslast else: no = nullslast if default == 'higher' else nullsfirst if key.nulls_first: if no != nullsfirst: return nullsfirst return None else: if no != nullslast: return nullslast return None def can_sort(self, sortkeys): if SORT_SUPPORT.get(self.engine.dialect.name) is None: # We have no idea about defaults return [] can_order_null = SORT_SUPPORT[self.engine.dialect.name]['support'] if (any((self._need_explicit_null_ordering(x) is not None for x in sortkeys)) and not can_order_null): return [] return sortkeys def explain(self, quals, columns, sortkeys=None, verbose=False): sortkeys = sortkeys or [] statement = self._build_statement(quals, columns, sortkeys) return [str(statement)] def _build_statement(self, quals, columns, sortkeys): statement = select([self.table]) clauses = [] for qual in quals: operator = OPERATORS.get(qual.operator, None) if operator: clauses.append( operator(self.table.c[qual.field_name], qual.value)) else: log_to_postgres('Qual not pushed to foreign db: %s' % qual, WARNING) if clauses: statement = statement.where(and_(*clauses)) if columns: columns = [self.table.c[col] for col in columns] else: columns = self.table.c statement = statement.with_only_columns(columns) orders = [] for sortkey in sortkeys: column = self.table.c[sortkey.attname] if sortkey.is_reversed: column = column.desc() if sortkey.collate: column = column.collate('"%s"' % sortkey.collate) null_ordering = self._need_explicit_null_ordering(sortkey) if null_ordering: column = null_ordering(column) statement = statement.order_by(column) return statement def execute(self, quals, columns, sortkeys=None): """ The quals are turned into an and'ed where clause. """ sortkeys = sortkeys or [] statement = self._build_statement(quals, columns, sortkeys) log_to_postgres(str(statement), DEBUG) rs = (self.connection.execution_options( stream_results=True).execute(statement)) # Workaround pymssql "trash old results on new query" # behaviour (See issue #100) if self.engine.driver == 'pymssql' and self.transaction is not None: rs = list(rs) for item in rs: yield dict(item) @property def connection(self): if self._connection is None: self._connection = self.engine.connect() return self._connection def begin(self, serializable): self.transaction = self.connection.begin() def pre_commit(self): if self.transaction is not None: self.transaction.commit() self.transaction = None def commit(self): # Pre-commit hook does this on 9.3 if self.transaction is not None: self.transaction.commit() self.transaction = None def rollback(self): if self.transaction is not None: self.transaction.rollback() self.transaction = None @property def rowid_column(self): if self._row_id_column is None: log_to_postgres( 'You need to declare a primary key option in order ' 'to use the write features') return self._row_id_column def insert(self, values): self.connection.execute(self.table.insert(values=values)) def update(self, rowid, newvalues): self.connection.execute(self.table.update().where( self.table.c[self._row_id_column] == rowid).values(newvalues)) def delete(self, rowid): self.connection.execute(self.table.delete().where( self.table.c[self._row_id_column] == rowid)) def _get_column_type(self, format_type): """Blatant ripoff from PG_Dialect.get_column_info""" # strip (*) from character varying(5), timestamp(5) # with time zone, geometry(POLYGON), etc. attype = re.sub(r'\(.*\)', '', format_type) # strip '[]' from integer[], etc. attype = re.sub(r'\[\]', '', attype) is_array = format_type.endswith('[]') charlen = re.search('\(([\d,]+)\)', format_type) if charlen: charlen = charlen.group(1) args = re.search('\((.*)\)', format_type) if args and args.group(1): args = tuple(re.split('\s*,\s*', args.group(1))) else: args = () kwargs = {} if attype == 'numeric': if charlen: prec, scale = charlen.split(',') args = (int(prec), int(scale)) else: args = () elif attype == 'double precision': args = (53, ) elif attype == 'integer': args = () elif attype in ('timestamp with time zone', 'time with time zone'): kwargs['timezone'] = True if charlen: kwargs['precision'] = int(charlen) args = () elif attype in ('timestamp without time zone', 'time without time zone', 'time'): kwargs['timezone'] = False if charlen: kwargs['precision'] = int(charlen) args = () elif attype == 'bit varying': kwargs['varying'] = True if charlen: args = (int(charlen), ) else: args = () elif attype in ('interval', 'interval year to month', 'interval day to second'): if charlen: kwargs['precision'] = int(charlen) args = () elif charlen: args = (int(charlen), ) coltype = ischema_names.get(attype, None) if coltype: coltype = coltype(*args, **kwargs) if is_array: coltype = ARRAY(coltype) else: coltype = sqltypes.NULLTYPE return coltype @classmethod def import_schema(self, schema, srv_options, options, restriction_type, restricts): """ Reflects the remote schema. """ metadata = MetaData() url = _parse_url_from_options(srv_options) engine = create_engine(url) dialect = PGDialect() if restriction_type == 'limit': only = restricts elif restriction_type == 'except': only = lambda t, _: t not in restricts else: only = None metadata.reflect(bind=engine, schema=schema, only=only) to_import = [] for _, table in sorted(metadata.tables.items()): ftable = TableDefinition(table.name) ftable.options['schema'] = schema ftable.options['tablename'] = table.name for c in table.c: # Force collation to None to prevent imcompatibilities setattr(c.type, "collation", None) # If the type is specialized, call the generic # superclass method if type(c.type) in CONVERSION_MAP: class_name = CONVERSION_MAP[type(c.type)] old_args = c.type.__dict__ c.type = class_name() c.type.__dict__.update(old_args) if c.primary_key: ftable.options['primary_key'] = c.name ftable.columns.append( ColumnDefinition(c.name, type_name=c.type.compile(dialect))) to_import.append(ftable) return to_import
class AlchemySqlStore(Model): """ datastore using SQLAlchemy meta-SQL Python package create table vesper_stmts ( subject UNIQUE predicate UNIQUE object UNIQUE objecttype UNIQUE context UNIQUE ) """ def __init__(self, source=None, defaultStatements=None, autocommit=False, **kw): if source is None: # this seems like a reasonable default thing to do source = "sqlite://" log.debug("SQLite in-memory database being opened") # We take source to be a SQLAlchemy-style dbapi spec: # dialect+driver://username:password@host:port/database # connection is made JIT on first connect() log.debug("sqla engine being created with:", source) self.engine = create_engine(source) self.md = sqlalchemy.schema.MetaData() # utterly insufficient datatypes. just for first pass # technically the keep_existing bool is redundant as create_all() default is "check first" self.vesper_stmts = Table( "vesper_stmts", self.md, Column("subject", String(255)), # primary_key = True), Column("predicate", String(255)), # primary_key = True), Column("object", String(255)), # primary_key = True), Column("objecttype", String(8)), Column("context", String(8)), UniqueConstraint("subject", "predicate", "object", "objecttype", "context"), keep_existing=True, ) Index("idx_vs", self.vesper_stmts.c.subject, self.vesper_stmts.c.predicate, self.vesper_stmts.c.object) self.md.create_all(self.engine) # Set up our state machine and grab a connection from the sqlalchemy pool self.conn = self.engine.connect() self.trans = None self.autocommit = autocommit def _checkConnection(self): if self.conn is None: self.conn = self.engine.connect() if self.autocommit is False: if not self.conn.in_transaction(): self.trans = self.conn.begin() self.conn.execution_options(autocommit=self.autocommit) def getStatements( self, subject=None, predicate=None, object=None, objecttype=None, context=None, asQuad=True, hints=None ): """ Return all the statements in the model that match the given arguments. Any combination of subject and predicate can be None, and any None slot is treated as a wildcard that matches any value in the model. """ fs = subject is not None fp = predicate is not None fo = object is not None fot = objecttype is not None fc = context is not None hints = hints or {} limit = hints.get("limit") offset = hints.get("offset") log.debug("s p o ot c quad lim offset: ", fs, fp, fo, fot, fc, asQuad, limit, offset) if fo: if isinstance(object, ResourceUri): object = object.uri fot = True objecttype = OBJECT_TYPE_RESOURCE elif not fot: objecttype = OBJECT_TYPE_LITERAL if not asQuad and not fc: query = select( [ self.vesper_stmts.c.subject, self.vesper_stmts.c.predicate, self.vesper_stmts.c.object, self.vesper_stmts.c.objecttype, func.min(self.vesper_stmts.c.context).label("context"), ] ) else: # asQuad is True query = self.vesper_stmts.select() if fs: query = query.where(self.vesper_stmts.c.subject == subject) if fp: query = query.where(self.vesper_stmts.c.predicate == predicate) if fo: query = query.where(self.vesper_stmts.c.object == object) if fot: query = query.where(self.vesper_stmts.c.objecttype == objecttype) if fc: query = query.where(self.vesper_stmts.c.context == context) if not asQuad and not fc: query = query.group_by( self.vesper_stmts.c.subject, self.vesper_stmts.c.predicate, self.vesper_stmts.c.object, self.vesper_stmts.c.objecttype, ) if limit is not None: query = query.limit(limit) if offset is not None: query = query.offset(offset) stmts = [] self._checkConnection() result = self.conn.execute(query) for r in result: stmts.append(Statement(r["subject"], r["predicate"], r["object"], r["objecttype"], r["context"])) log.debug("stmts returned: ", len(stmts), stmts) return stmts def addStatement(self, stmt): """add the specified statement to the model""" log.debug("addStatement called with ", stmt) self._checkConnection() result = self.conn.execute( self.vesper_stmts.insert(prefixes=["OR IGNORE"]), {"subject": stmt[0], "predicate": stmt[1], "object": stmt[2], "objecttype": stmt[3], "context": stmt[4]}, ) return result.rowcount def addStatements(self, stmts): """adds multiple statements to the model""" log.debug("addStatement called with ", stmts) self._checkConnection() result = self.conn.execute( self.vesper_stmts.insert(prefixes=["OR IGNORE"]), [ {"subject": stmt[0], "predicate": stmt[1], "object": stmt[2], "objecttype": stmt[3], "context": stmt[4]} for stmt in stmts ], ) return result.rowcount def removeStatement(self, stmt): """removes the statement from the model""" log.debug("removeStatement called with: ", stmt) rmv = self.vesper_stmts.delete().where( (self.vesper_stmts.c.subject == stmt[0]) & (self.vesper_stmts.c.predicate == stmt[1]) & (self.vesper_stmts.c.object == stmt[2]) & (self.vesper_stmts.c.objecttype == stmt[3]) & (self.vesper_stmts.c.context == stmt[4]) ) self._checkConnection() result = self.conn.execute(rmv) return result.rowcount def removeStatements(self, stmts): """removes multiple statements from the model""" log.debug("removeStatements called with: ", stmts) wc = [] [ wc.append( (self.vesper_stmts.c.subject == stmt[0]) & (self.vesper_stmts.c.predicate == stmt[1]) & (self.vesper_stmts.c.object == stmt[2]) & (self.vesper_stmts.c.objecttype == stmt[3]) & (self.vesper_stmts.c.context == stmt[4]) ) for stmt in stmts ] # no protection for singleton stmt here! rmv = self.vesper_stmts.delete().where(or_(*wc)) self._checkConnection() result = self.conn.execute(rmv) return result.rowcount def commit(self, **kw): if self.conn is not None: if self.conn.in_transaction(): self.trans.commit() def rollback(self): if self.conn is not None: if self.conn.in_transaction(): self.trans.rollback() def close(self): log.debug("closing!") if self.conn is not None: self.conn.close() self.conn = None
class SqlAlchemyFdw(ForeignDataWrapper): """An SqlAlchemy foreign data wrapper. The sqlalchemy foreign data wrapper performs simple selects on a remote database using the sqlalchemy framework. Accepted options: db_url -- the sqlalchemy connection string. schema -- (optional) schema name to qualify table name with tablename -- the table name in the remote database. """ def __init__(self, fdw_options, fdw_columns): super(SqlAlchemyFdw, self).__init__(fdw_options, fdw_columns) if 'db_url' not in fdw_options: log_to_postgres('The db_url parameter is required', ERROR) if 'tablename' not in fdw_options: log_to_postgres('The tablename parameter is required', ERROR) self.engine = create_engine(fdw_options.get('db_url')) self.metadata = MetaData() schema = fdw_options['schema'] if 'schema' in fdw_options else None tablename = fdw_options['tablename'] self.table = Table(tablename, self.metadata, schema=schema, *[Column(col.column_name, ischema_names[col.type_name]) for col in fdw_columns.values()]) self.transaction = None self._connection = None self._row_id_column = fdw_options.get('primary_key', None) def execute(self, quals, columns): """ The quals are turned into an and'ed where clause. """ statement = select([self.table]) clauses = [] for qual in quals: operator = OPERATORS.get(qual.operator, None) if operator: clauses.append(operator(self.table.c[qual.field_name], qual.value)) else: log_to_postgres('Qual not pushed to foreign db: %s' % qual, WARNING) if clauses: statement = statement.where(and_(*clauses)) if columns: columns = [self.table.c[col] for col in columns] else: columns = self.table.c.values() statement = statement.with_only_columns(columns) log_to_postgres(str(statement), DEBUG) for item in self.connection.execute(statement): yield dict(item) @property def connection(self): if self._connection is None: self._connection = self.engine.connect() return self._connection def begin(self, serializable): self.transaction = self.connection.begin() def pre_commit(self): if self.transaction is not None: self.transaction.commit() self.transaction = None def commit(self): # Pre-commit hook does this on 9.3 if self.transaction is not None: self.transaction.commit() self.transaction = None def rollback(self): if self.transaction is not None: self.transaction.rollback() self.transaction = None @property def rowid_column(self): if self._row_id_column is None: log_to_postgres( 'You need to declare a primary key option in order ' 'to use the write features') return self._row_id_column def insert(self, values): self.connection.execute(self.table.insert(values=values)) def update(self, rowid, newvalues): self.connection.execute( self.table.update() .where(self.table.c[self._row_id_column] == rowid) .values(newvalues)) def delete(self, rowid): self.connection.execute( self.table.delete() .where(self.table.c[self._row_id_column] == rowid))
class SqlAlchemyFdw(ForeignDataWrapper): """An SqlAlchemy foreign data wrapper. The sqlalchemy foreign data wrapper performs simple selects on a remote database using the sqlalchemy framework. Accepted options: db_url -- the sqlalchemy connection string. schema -- (optional) schema name to qualify table name with tablename -- the table name in the remote database. """ def __init__(self, fdw_options, fdw_columns): super(SqlAlchemyFdw, self).__init__(fdw_options, fdw_columns) if 'tablename' not in fdw_options: log_to_postgres('The tablename parameter is required', ERROR) self.metadata = MetaData() if fdw_options.get('db_url'): url = make_url(fdw_options.get('db_url')) else: if 'drivername' not in fdw_options: log_to_postgres('Either a db_url, or drivername and other ' 'connection infos are needed', ERROR) url = URL(fdw_options['drivername']) for param in ('username', 'password', 'host', 'database', 'port'): if param in fdw_options: setattr(url, param, fdw_options[param]) self.engine = create_engine(url) schema = fdw_options['schema'] if 'schema' in fdw_options else None tablename = fdw_options['tablename'] sqlacols = [] for col in fdw_columns.values(): col_type = self._get_column_type(col.type_name) sqlacols.append(Column(col.column_name, col_type)) self.table = Table(tablename, self.metadata, schema=schema, *sqlacols) self.transaction = None self._connection = None self._row_id_column = fdw_options.get('primary_key', None) def execute(self, quals, columns): """ The quals are turned into an and'ed where clause. """ statement = select([self.table]) clauses = [] for qual in quals: operator = OPERATORS.get(qual.operator, None) if operator: clauses.append(operator(self.table.c[qual.field_name], qual.value)) else: log_to_postgres('Qual not pushed to foreign db: %s' % qual, WARNING) if clauses: statement = statement.where(and_(*clauses)) if columns: columns = [self.table.c[col] for col in columns] else: columns = self.table.c.values() statement = statement.with_only_columns(columns) log_to_postgres(str(statement), DEBUG) rs = (self.connection .execution_options(stream_results=True) .execute(statement)) for item in rs: yield dict(item) @property def connection(self): if self._connection is None: self._connection = self.engine.connect() return self._connection def begin(self, serializable): self.transaction = self.connection.begin() def pre_commit(self): if self.transaction is not None: self.transaction.commit() self.transaction = None def commit(self): # Pre-commit hook does this on 9.3 if self.transaction is not None: self.transaction.commit() self.transaction = None def rollback(self): if self.transaction is not None: self.transaction.rollback() self.transaction = None @property def rowid_column(self): if self._row_id_column is None: log_to_postgres( 'You need to declare a primary key option in order ' 'to use the write features') return self._row_id_column def insert(self, values): self.connection.execute(self.table.insert(values=values)) def update(self, rowid, newvalues): self.connection.execute( self.table.update() .where(self.table.c[self._row_id_column] == rowid) .values(newvalues)) def delete(self, rowid): self.connection.execute( self.table.delete() .where(self.table.c[self._row_id_column] == rowid)) def _get_column_type(self, format_type): """Blatant ripoff from PG_Dialect.get_column_info""" ## strip (*) from character varying(5), timestamp(5) # with time zone, geometry(POLYGON), etc. attype = re.sub(r'\(.*\)', '', format_type) # strip '[]' from integer[], etc. attype = re.sub(r'\[\]', '', attype) is_array = format_type.endswith('[]') charlen = re.search('\(([\d,]+)\)', format_type) if charlen: charlen = charlen.group(1) args = re.search('\((.*)\)', format_type) if args and args.group(1): args = tuple(re.split('\s*,\s*', args.group(1))) else: args = () kwargs = {} if attype == 'numeric': if charlen: prec, scale = charlen.split(',') args = (int(prec), int(scale)) else: args = () elif attype == 'double precision': args = (53, ) elif attype == 'integer': args = () elif attype in ('timestamp with time zone', 'time with time zone'): kwargs['timezone'] = True if charlen: kwargs['precision'] = int(charlen) args = () elif attype in ('timestamp without time zone', 'time without time zone', 'time'): kwargs['timezone'] = False if charlen: kwargs['precision'] = int(charlen) args = () elif attype == 'bit varying': kwargs['varying'] = True if charlen: args = (int(charlen),) else: args = () elif attype in ('interval', 'interval year to month', 'interval day to second'): if charlen: kwargs['precision'] = int(charlen) args = () elif charlen: args = (int(charlen),) coltype = ischema_names.get(attype, None) if coltype: coltype = coltype(*args, **kwargs) if is_array: coltype = ARRAY(coltype) else: coltype = sqltypes.NULLTYPE return coltype