def test_reflect_no_such_table(self, engine, conn): self.assertRaises( NoSuchTableError, lambda: Table('this_does_not_exist', MetaData(bind=engine), autoload=True)) self.assertRaises( NoSuchTableError, lambda: Table('this_does_not_exist', MetaData(bind=engine), schema='also_does_not_exist', autoload=True))
def diff_table_data_complete(self, tablename): try: firsttable = Table(tablename, self.firstmeta, autoload=True) firstquery = self.firstsession.query(firsttable) secondtable = Table(tablename, self.secondmeta, autoload=True) secondquery = self.secondsession.query(secondtable) #-- to be able to run long queries self.firstsession.close() self.secondsession.close() if firstquery.count() != secondquery.count(): return False, f"counts are different" \ f" {firstquery.count()} != {secondquery.count()}" if firstquery.count() == 0: return None, "tables are empty" if self.count_only is True: return True, "Counts are the same" pk = ",".join( self.firstinspector.get_pk_constraint(tablename) ['constrained_columns']) if not pk: return None, "no primary key(s) on this table." \ " Comparision is not possible." except NoSuchTableError: return False, "table is missing" SQL_TEMPLATE_HASH = f""" SELECT md5(array_agg(md5((t.*)::varchar))::varchar) FROM ( SELECT * FROM {tablename} ORDER BY {pk} limit :row_limit offset :row_offset ) AS t; """ position = 0 while position <= firstquery.count(): firstresult = self.firstsession.execute(SQL_TEMPLATE_HASH, { "row_limit": self.chunk_size, "row_offset": position }).fetchone() secondresult = self.secondsession.execute(SQL_TEMPLATE_HASH, { "row_limit": self.chunk_size, "row_offset": position }).fetchone() #-- to be able to run long queries self.firstsession.close() self.secondsession.close() if firstresult != secondresult: return False, f"data is different - position {position} -" \ f" {position + self.chunk_size}" position += self.chunk_size return True, "data and count is identical."
def test_semantic_version_field(self) -> None: id_colname = "id" version_colname = "version" id_col = Column(id_colname, Integer, primary_key=True) version_col = Column(version_colname, SemanticVersionColType) table = Table("testtable", self.meta, id_col, version_col) table.create() v1 = Version("1.1.0") v2 = Version("2.0.1") v3 = Version("14.0.0") table.insert().values([ { id_colname: 1, version_colname: v1 }, { id_colname: 2, version_colname: v2 }, { id_colname: 3, version_colname: v3 }, ]).execute() select_fields = [id_col, version_col] rows = list( select(select_fields).select_from(table).order_by( id_col).execute()) self._assert_version_equal(rows[0][version_col], v1) self._assert_version_equal(rows[1][version_col], v2) self._assert_version_equal(rows[2][version_col], v3)
def test_iso_duration_field(self) -> None: id_colname = "id" duration_colname = "duration_iso" id_col = Column(id_colname, Integer, primary_key=True) duration_col = Column(duration_colname, PendulumDurationAsIsoTextColType) table = Table("testtable", self.meta, id_col, duration_col) table.create() d1 = Duration(years=1, months=3, seconds=3, microseconds=4) d2 = Duration(seconds=987.654321) d3 = Duration(days=-5) table.insert().values([ { id_colname: 1, duration_colname: d1 }, { id_colname: 2, duration_colname: d2 }, { id_colname: 3, duration_colname: d3 }, ]).execute() select_fields = [id_col, duration_col] rows = list( select(select_fields).select_from(table).order_by( id_col).execute()) self._assert_duration_equal(rows[0][duration_col], d1) self._assert_duration_equal(rows[1][duration_col], d2) self._assert_duration_equal(rows[2][duration_col], d3)
def test_reflect_table_with_schema(self, engine, conn): one_row = Table("one_row", MetaData(bind=engine), schema=SCHEMA, autoload=True) self.assertEqual(len(one_row.c), 1) self.assertIsNotNone(one_row.c.number_of_rows)
class Municipality(Entity): """Entity for municipalities.""" __name__ = 'municipality' __table__ = Table( 'municipalities', Entity.metadata, Column('id', Integer, nullable=False, primary_key=True), Column('microregion_id', Integer, ForeignKey('microregions.id', use_alter=True), nullable=False, index=True), Column('mesoregion_id', SmallInteger, ForeignKey('mesoregions.id', use_alter=True), nullable=False, index=True), Column('state_id', SmallInteger, ForeignKey('states.id', use_alter=True), nullable=False, index=True), Column('name', String(64), nullable=False, index=True)) # Relationships state = Relationship('State', 'municipalities') mesoregion = Relationship('Mesoregion', 'municipalities') microregion = Relationship('Microregion', 'municipalities') districts = Relationship('District', 'municipality') subdistricts = Relationship('Subdistrict', 'municipality')
def test_reflect_table_include_columns(self, engine, connection): one_row_complex = Table('one_row_complex', MetaData(bind=engine)) engine.dialect.reflecttable( connection, one_row_complex, include_columns=['col_int'], exclude_columns=[]) self.assertEqual(len(one_row_complex.c), 1) self.assertIsNotNone(one_row_complex.c.col_int) self.assertRaises(AttributeError, lambda: one_row_complex.c.col_tinyint)
def save_data_to_database(self, df: pd.DataFrame, engine: Engine): if not AbstractPreprocessor._check_write_privilege(engine): AbstractPreprocessor.create_dir(self.output_dir, os.getlogin()) path = Path.joinpath(self.output_dir, os.getlogin(), datetime.now().isoformat() + '.json') with path.open("a") as f: df.to_json(f) return for idx, row in df.iterrows(): with engine.connect() as conn: t = Table('judgment_map', MetaData(), autoload_with=engine) if row['judgments']: # only insert, when we find judgments # Delete and reinsert as no upsert command is available stmt = t.delete().where(delete_stmt_decisions_with_df(df)) conn.execute(stmt) for k in row['judgments']: judgment_type_id = Judgment(k).value stmt = t.insert().values([{ "decision_id": str(row['decision_id']), "judgment_id": judgment_type_id }]) conn.execute(stmt) else: self.logger.warning( f"No judgments found for {row['html_url']}")
class LegacyMunicipality(LegacyEntity): ''' Legacy entity for municipalities. ''' __table__ = Table( _('municipalities'), LegacyEntity.metadata, Column(_('id'), Integer, nullable=False, primary_key=True), Column(_('state_id'), SmallInteger, ForeignKey(_('states.id'), use_alter=True), nullable=False, index=True), Column(_('name'), String(64), nullable=False, index=True) ) # Columns mapping __columns__ = { _('id'): 'municipality_id', _('state_id'): 'state_id', _('name'): 'municipality_name', }
class Microregion(Entity): ''' Entity for microregions. ''' __table__ = Table( _('microregions'), Entity.metadata, Column(_('id'), Integer, nullable=False, primary_key=True), Column(_('mesoregion_id'), SmallInteger, ForeignKey(_('mesoregions.id'), use_alter=True), nullable=False, index=True), Column(_('state_id'), SmallInteger, ForeignKey(_('states.id'), use_alter=True), nullable=False, index=True), Column(_('name'), String(64), nullable=False, index=True) ) # Columns mapping __columns__ = { _('id'): 'microregion_id', _('mesoregion_id'): 'mesoregion_id', _('state_id'): 'state_id', _('name'): 'microregion_name', }
def connection(sync_engine): with sync_engine.connect() as conn: metadata = MetaData() Table("table", metadata, Column("column1", Integer, primary_key=True)) Table("table2", metadata, Column("fk_column", ForeignKey("table.column1"))) if conn.dialect.name != "sqlite": conn.execute(CreateSchema("altschema")) Table("table3", metadata, Column("fk_column", Integer), schema="altschema") metadata.create_all(conn) yield conn if conn.dialect.name != "sqlite": metadata.drop_all(conn) conn.execute(DropSchema("altschema"))
def test_unicode(self, engine, conn): unicode_str = "密林" one_row = Table("one_row", MetaData(bind=engine)) returned_str = sqlalchemy.select( [expression.bindparam("あまぞん", unicode_str)], from_obj=one_row, ).scalar() self.assertEqual(returned_str, unicode_str)
def create_single_table(self, table_name, engine): meta_data = MetaData(engine) stock = Table( table_name, meta_data, Column('id', Integer, primary_key=True), Column('PB', Numeric(8, 4)), Column('PE', Numeric(8, 4)), Column('PE1', Numeric(8, 4)), Column('accumAdjFactor', Integer), Column('actPreClosePrice', Numeric(8, 4)), Column('chgPct', Numeric(8, 4)), Column('closePrice', Numeric(8, 4)), Column('dealAmount', BigInteger), Column('highestPrice', Numeric(8, 4)), Column('isOpen', Integer), Column('lowestPrice', Numeric(8, 4)), Column('marketValue', BigInteger), Column('negMarketValue', BigInteger), Column('openPrice', Numeric(8, 4)), Column('preClosePrice', Numeric(8, 4)), Column('turnoverRate', Numeric(8, 4)), Column('turnoverValue', BigInteger), Column('turnoverVol', BigInteger), Column('vwap', Numeric(8, 4)), Column('date', DateTime), ) meta_data.create_all(engine)
def get_dest_table_for_est(self, est: "ExtraSummaryTable", add_extra_id_cols: bool = False) -> Table: """ Add an additional summary table to the dump, if it's not there already. Return the table (from the destination database). Args: est: a :class:`camcops_server.cc_modules.cc_summaryelement.ExtraSummaryTable` add_extra_id_cols: Add extra ID columns, for the ``DB_PATIENT_ID_PER_ROW`` export option? """ # noqa tablename = est.tablename if tablename in self.dst_tables: return self.dst_tables[tablename] columns = est.columns.copy() if add_extra_id_cols: columns.extend(all_extra_id_columns(self.req)) columns.extend(est.extra_task_xref_columns()) table = Table(tablename, self.dst_metadata, *columns) # ... that modifies the metadata, so: self.dst_tables[tablename] = table return table
def drop_all(session): inspector = reflection.Inspector.from_engine(session.bind) # gather all data first before dropping anything. # some DBs lock after things have been dropped in # a transaction. metadata = MetaData() tbs = [] all_fks = [] for table_name in inspector.get_table_names(): fks = [] for fk in inspector.get_foreign_keys(table_name): if not fk['name']: continue fks.append(ForeignKeyConstraint((), (), name=fk['name'])) t = Table(table_name, metadata, *fks) tbs.append(t) all_fks.extend(fks) for fkc in all_fks: session.execute(DropConstraint(fkc)) for table in tbs: session.execute(DropTable(table)) session.commit()
def test_reflect_table_include_columns(self, engine, conn): one_row_complex = Table("one_row_complex", MetaData(bind=engine)) version = float( re.search(r"^([\d]+\.[\d]+)\..+", sqlalchemy.__version__).group(1)) if version <= 1.2: engine.dialect.reflecttable(conn, one_row_complex, include_columns=["col_int"], exclude_columns=[]) elif version == 1.3: # https://docs.sqlalchemy.org/en/13/changelog/changelog_13.html # #change-64ac776996da1a5c3e3460b4c0f0b257 engine.dialect.reflecttable( conn, one_row_complex, include_columns=["col_int"], exclude_columns=[], resolve_fks=True, ) else: # version >= 1.4 # https://docs.sqlalchemy.org/en/14/changelog/changelog_14.html # #change-0215fae622c01f9409eb1ba2754f4792 # https://docs.sqlalchemy.org/en/14/core/reflection.html # #sqlalchemy.engine.reflection.Inspector.reflect_table insp = sqlalchemy.inspect(engine) insp.reflect_table( one_row_complex, include_columns=["col_int"], exclude_columns=[], resolve_fks=True, ) self.assertEqual(len(one_row_complex.c), 1) self.assertIsNotNone(one_row_complex.c.col_int) self.assertRaises(AttributeError, lambda: one_row_complex.c.col_tinyint)
def test_reserved_words(self, engine, connection): """Presto uses double quotes, not backticks""" fake_table = Table('select', MetaData(bind=engine), Column('current_timestamp', STRINGTYPE)) query = str(fake_table.select(fake_table.c.current_timestamp == 'a')) self.assertIn('"select"', query) self.assertIn('"current_timestamp"', query) self.assertNotIn('`select`', query) self.assertNotIn('`current_timestamp`', query)
class WxArticle(Base): __table__ = Table( 'wx_public_no', Base.metadata, Column('id', Integer, primary_key=True, autoincrement=True), Column('article_name', VARCHAR(256)), Column('article_content', TEXT), Column('article_link', VARCHAR(256)), Column('link_md5', VARCHAR(128)), Column('public_no', VARCHAR(128)), Column('public_no_link', VARCHAR(256)), Column('published_time', DATE))
def test_char_length(self, engine, conn): one_row_complex = Table("one_row_complex", MetaData(bind=engine), autoload=True) result = (sqlalchemy.select([ sqlalchemy.func.char_length(one_row_complex.c.col_string) ]).execute().scalar()) self.assertEqual(result, len("a string"))
class WhaleTransactionsStats(Base): __table__ = Table('WhaleTransactionsStats', Base.metadata, Column('address', String(255), primary_key=True), Column('transaction_type', String(255)), Column('total_sum_in_usdt', Float), Column('total_transactions', Integer), Column('count_small', Integer), Column('count_medium', Integer), Column('count_big', Integer))
class Collection(Base): __table__ = Table('collection', Base.metadata, autoload=True, autoload_with=engine) __mapper_args__ = { 'polymorphic_identity': 'collection', 'polymorphic_on': 'is_calendar' }
def save_data_to_database(self, df: pd.DataFrame, engine: Engine): with engine.connect() as conn: t = Table('lower_court', MetaData(), autoload_with=conn) # Delete and reinsert as no upsert command is available stmt = t.delete().where(delete_stmt_decisions_with_df(df)) conn.execute(stmt) for _, row in df.iterrows(): if not 'lower_court' in row or row['lower_court'] is None: continue lower_court = row["lower_court"] res = {} if 'court' in lower_court and lower_court['court'] is not None: res['court_id'] = list( self.select( engine, 'court', 'court_id', f"court_string = '{lower_court['court']}'") )[0]['court_id'][0] res['court_id'] = int( res['court_id'] ) if res['court_id'] is not None else None if 'canton' in lower_court and lower_court[ 'canton'] is not None: res['canton_id'] = list( self.select(engine, 'canton', 'canton_id', f"short_code = '{lower_court['canton']}'") )[0]['canton_id'][0] res['canton_id'] = int( res['canton_id'] ) if res['canton_id'] is not None else None if 'chamber' in lower_court and lower_court[ 'chamber'] is not None: res['chamber_id'] = list( self.select( engine, 'chamber', 'chamber_id', f"chamber_string = '{lower_court['chamber']}'") )[0]['chamber_id'][0] res['chamber_id'] = int( res['chamber_id'] ) if res['chamber_id'] is not None else None stmt = t.insert().values([{ "decision_id": str(row['decision_id']), "court_id": res.get('court_id'), "canton_id": res.get('canton_id'), "chamber_id": res.get('chamber_id'), "date": lower_court.get('date'), "file_number": lower_court.get('file_number') }]) conn.execute(stmt)
def table_override(original, base, overrides={}): result = Table(original.name, base.metadata, schema=original.schema) for col in original.columns: if col.name not in overrides: result.append_column(col.copy()) else: new_col = overrides[col.name].copy() new_col.name = col.name result.append_column(new_col) return result
class RapidEyeFootPrintsMexicoOld(BASE): # TODO: this is just an example of three columns in the table and two columns mapped __table__ = Table('rapideye_footprints_mexico_old', BASE.metadata, Column('gid', Integer, primary_key=True), Column('code', Integer), Column('mapgrid', Integer), Column('mapgrid2', Integer)) __table_args__ = {'autoload': True, 'autoload_with': ENGINE} code = __table__.c.code mapgrid = __table__.c.mapgrid mapgrid2 = __table__.c.mapgrid2
def budgets_labels_table(): """ The association table for linking budgets to labels """ return Table( 'budgets_labels', EntityManager.get_base().metadata, Column('budget_id', Integer, ForeignKey('budgets.id')), Column('label_id', Integer, ForeignKey('labels.id')) )
def diff_table_data(self, tablename): try: firsttable = Table(tablename, self.firstmeta, autoload=True) secondtable = Table(tablename, self.secondmeta, autoload=True) pk = ",".join( self.firstinspector.get_pk_constraint(tablename) ['constrained_columns']) if not pk: return None, "no primary key(s) on this table." \ " Comparision is not possible." except NoSuchTableError: return False, "table is missing" SQL_TEMPLATE_HASH = f""" SELECT md5(array_agg(md5((t.*)::varchar))::varchar) hash FROM ( SELECT * FROM ( (SELECT * FROM {tablename} ORDER BY {pk} limit :row_limit) UNION (SELECT * FROM {tablename} ORDER BY {pk} DESC limit :row_limit) ) as topbottomselect order by {pk} ) AS t; """ position = 0 firstresult = self.firstsession.execute(SQL_TEMPLATE_HASH, { "row_limit": self.chunk_size, "row_offset": position }).fetchone() secondresult = self.secondsession.execute(SQL_TEMPLATE_HASH, { "row_limit": self.chunk_size, "row_offset": position }).fetchone() #-- to be able to run long queries self.firstsession.close() self.secondsession.close() if firstresult != secondresult: return False, f"data is different - position {position} -" \ f" {position + self.chunk_size}" position += self.chunk_size return True, "data and count(implicit) is identical."
def test_basic_create_table(self): expected_result = re.sub( r'\s+', '', "CREATE TABLE testtable (col1 INTEGER)").strip() engine = create_engine('redshift+psycopg2://') table_model = Table("testtable", MetaData(), Column("col1", Integer)) ddl_statement = re.sub( r'\s+', '', str(CreateTable(table_model).compile(engine)).strip()) self.assertEqual(expected_result, ddl_statement)
class jianshu_blog(Base): __table__ = Table( 'jianshu_blog', Base.metadata, Column('id', Integer, primary_key=True, autoincrement=True), Column('main_topic', VARCHAR(64)), Column('topic_link', VARCHAR(256)), Column('blog_name', VARCHAR(256)), Column('blog_content', TEXT), Column('blog_link', VARCHAR(256)), Column('published_time', DATE), Column('blog_author', VARCHAR(128)), Column('author_link', VARCHAR(256)), Column('read_num', Integer), Column('comment_num', Integer), Column('like_num', Integer), Column('support_num', Integer))
class NewsContent(Base): __table__ = Table( 'news_content', Base.metadata, Column('id', Integer, primary_key=True, autoincrement=True), Column('WEBSITE_ID', Integer), Column('CRAWL_URL', VARCHAR(100)), Column('NEWS_NAME', VARCHAR(100)), Column('NEWS_URL', VARCHAR(100)), Column('NEWS_IMAGE', VARCHAR(100)), Column('NEWS_DESC', TEXT), Column('KEYWORDS', VARCHAR(100)), Column('PUBLISH_TIME', DATE), Column('NEWS_RESOURCE', VARCHAR(50)), Column('NEWS_AUTHOR', VARCHAR(50)), Column('COMMENT_NUM', Integer), Column('READ_NUM', Integer))
def test_to_sql(self, engine, conn): # TODO pyathena.error.OperationalError: SYNTAX_ERROR: line 1:305: # Column 'foobar' cannot be resolved. # def _format_bytes(formatter, escaper, val): # return val.decode() table_name = "to_sql_{0}".format(str(uuid.uuid4()).replace("-", "")) df = pd.DataFrame({ "col_int": np.int32([1]), "col_bigint": np.int64([12345]), "col_float": np.float32([1.0]), "col_double": np.float64([1.2345]), "col_string": ["a"], "col_boolean": np.bool_([True]), "col_timestamp": [datetime(2020, 1, 1, 0, 0, 0)], "col_date": [date(2020, 12, 31)], # "col_binary": "foobar".encode(), }) # Explicitly specify column order df = df[[ "col_int", "col_bigint", "col_float", "col_double", "col_string", "col_boolean", "col_timestamp", "col_date", # "col_binary", ]] df.to_sql( table_name, engine, schema=SCHEMA, index=False, if_exists="replace", method="multi", ) table = Table(table_name, MetaData(bind=engine), autoload=True) self.assertEqual( table.select().execute().fetchall(), [( 1, 12345, 1.0, 1.2345, "a", True, datetime(2020, 1, 1, 0, 0, 0), date(2020, 12, 31), # "foobar".encode(), )], )