def get_physical_table_metadata( database: Database, table_name: str, schema_name: Optional[str] = None, ) -> List[Dict[str, str]]: """Use SQLAlchemy inspector to get table metadata""" db_engine_spec = database.db_engine_spec db_dialect = database.get_dialect() # ensure empty schema _schema_name = schema_name if schema_name else None # Table does not exist or is not visible to a connection. if not database.has_table_by_name(table_name, schema=_schema_name): raise NoSuchTableError cols = database.get_columns(table_name, schema=_schema_name) for col in cols: try: if isinstance(col["type"], TypeEngine): db_type = db_engine_spec.column_datatype_to_string( col["type"], db_dialect ) type_spec = db_engine_spec.get_column_spec(db_type) col.update( { "type": db_type, "type_generic": type_spec.generic_type if type_spec else None, "is_dttm": type_spec.is_dttm if type_spec else None, } ) # Broad exception catch, because there are multiple possible exceptions # from different drivers that fall outside CompileError except Exception: # pylint: disable=broad-except col.update( {"type": "UNKNOWN", "generic_type": None, "is_dttm": None,} ) return cols
def get_table_metadata(database: Database, table_name: str, schema_name: Optional[str]) -> Dict: """ Get table metadata information, including type, pk, fks. This function raises SQLAlchemyError when a schema is not found. :param database: The database model :param table_name: Table name :param schema_name: schema name :return: Dict table metadata ready for API response """ keys: List = [] columns = database.get_columns(table_name, schema_name) # define comment dict by tsl comment_dict = {} primary_key = database.get_pk_constraint(table_name, schema_name) if primary_key and primary_key.get("constrained_columns"): primary_key["column_names"] = primary_key.pop("constrained_columns") primary_key["type"] = "pk" keys += [primary_key] # get dialect name dialect_name = database.get_dialect().name if isinstance(dialect_name, bytes): dialect_name = dialect_name.decode() # get column comment, presto & hive if dialect_name == "presto" or dialect_name == "hive": db_engine_spec = database.db_engine_spec sql = ParsedQuery("desc {a}.{b}".format(a=schema_name, b=table_name)).stripped() engine = database.get_sqla_engine(schema_name) conn = engine.raw_connection() cursor = conn.cursor() query = Query() session = Session(bind=engine) query.executed_sql = sql query.__tablename__ = table_name session.commit() db_engine_spec.execute(cursor, sql, async_=False) data = db_engine_spec.fetch_data(cursor, query.limit) # parse list data into dict by tsl; hive and presto is different if dialect_name == "presto": for d in data: d[3] comment_dict[d[0]] = d[3] else: for d in data: d[2] comment_dict[d[0]] = d[2] conn.commit() foreign_keys = get_foreign_keys_metadata(database, table_name, schema_name) indexes = get_indexes_metadata(database, table_name, schema_name) keys += foreign_keys + indexes payload_columns: List[Dict] = [] for col in columns: dtype = get_col_type(col) if len(comment_dict) > 0: payload_columns.append({ "name": col["name"], "type": dtype.split("(")[0] if "(" in dtype else dtype, "longType": dtype, "keys": [k for k in keys if col["name"] in k.get("column_names")], "comment": comment_dict[col["name"]], }) elif dialect_name == "mysql": payload_columns.append({ "name": col["name"], "type": dtype.split("(")[0] if "(" in dtype else dtype, "longType": dtype, "keys": [k for k in keys if col["name"] in k.get("column_names")], "comment": col["comment"], }) else: payload_columns.append({ "name": col["name"], "type": dtype.split("(")[0] if "(" in dtype else dtype, "longType": dtype, "keys": [k for k in keys if col["name"] in k.get("column_names")], # "comment": col["comment"], }) return { "name": table_name, "columns": payload_columns, "selectStar": database.select_star( table_name, schema=schema_name, show_cols=True, indent=True, cols=columns, latest_partition=True, ), "primaryKey": primary_key, "foreignKeys": foreign_keys, "indexes": keys, }