def create_hive_external_table(db, table, conn, **op_kwargs): sqoopSchema = SqoopSchemaUpdate() response = sqoopSchema.update_hive_schema( hive_db=hive_db, hive_table=hive_table.format(bs=table), mysql_db=db, mysql_table=table, mysql_conn=conn ) #if response: # return True mysql_conn = get_db_conn(conn) mcursor = mysql_conn.cursor() sql = ''' select COLUMN_NAME, DATA_TYPE, COLUMN_COMMENT, COLUMN_TYPE from information_schema.COLUMNS where TABLE_SCHEMA='{db}' and TABLE_NAME='{table}' order by ORDINAL_POSITION '''.format(db=db, table=table) # logging.info(sql) mcursor.execute(sql) res = mcursor.fetchall() # logging.info(res) columns = [] for (name, type, comment, co_type) in res: if type.upper() == 'DECIMAL': columns.append("`%s` %s comment '%s'" % (name, co_type.replace('unsigned', '').replace('signed', ''), comment)) else: columns.append("`%s` %s comment '%s'" % (name, mysql_type_to_hive.get(type.upper(), 'string'), comment)) mysql_conn.close() # 创建hive数据表的sql hql = ods_create_table_hql.format( db_name=hive_db, table_name=hive_table.format(bs=table), columns=",\n".join(columns), hdfs_path=hdfs_path.format(bs=table) ) logging.info(hql) hive_hook = HiveCliHook() logging.info('Executing: %s', hql) hive_hook.run_cli(hql)
def run_sqoop_check_table(mysql_db_name, mysql_table_name, conn_id, hive_table_name, **kwargs): sqoopSchema = SqoopSchemaUpdate() response = sqoopSchema.update_hive_schema( hive_db=HIVE_SQOOP_TEMP_DB, hive_table=hive_table_name, mysql_db=mysql_db_name, mysql_table=mysql_table_name, mysql_conn=conn_id ) if response: return True # SHOW TABLES in oride_db LIKE 'data_aa' check_sql = 'SHOW TABLES in %s LIKE \'%s\'' % (HIVE_DB, hive_table_name) hive2_conn = HiveServer2Hook().get_conn() cursor = hive2_conn.cursor() cursor.execute(check_sql) if len(cursor.fetchall()) == 0: logging.info('Create Hive Table: %s.%s', HIVE_DB, hive_table_name) # get table column column_sql = ''' SELECT COLUMN_NAME, DATA_TYPE, NUMERIC_PRECISION, NUMERIC_SCALE,COLUMN_COMMENT FROM information_schema.columns WHERE table_schema='{db_name}' and table_name='{table_name}' '''.format(db_name=mysql_db_name, table_name=mysql_table_name) mysql_hook = MySqlHook(conn_id) mysql_conn = mysql_hook.get_conn() mysql_cursor = mysql_conn.cursor() mysql_cursor.execute(column_sql) results = mysql_cursor.fetchall() rows = [] for result in results: if result[0] == 'dt': col_name = '_dt' else: col_name = result[0] if result[1] == 'timestamp' or result[1] == 'varchar' or result[1] == 'char' or result[1] == 'text' or \ result[1] == 'longtext' or result[1] == 'mediumtext' or result[1] == 'enum' or \ result[1] == 'datetime': data_type = 'string' elif result[1] == 'decimal': data_type = result[1] + "(" + str(result[2]) + "," + str(result[3]) + ")" elif result[1] == 'mediumint': data_type = 'int' else: data_type = result[1] rows.append("`%s` %s comment '%s'" % (col_name, data_type, result[4])) mysql_conn.close() # hive create table hive_hook = HiveCliHook() sql = ODS_SQOOP_CREATE_TABLE_SQL.format( db_name=HIVE_SQOOP_TEMP_DB, table_name=hive_table_name, columns=",\n".join(rows), ufile_path=UFILE_PATH % (mysql_db_name, mysql_table_name) ) logging.info('Executing: %s', sql) hive_hook.run_cli(sql) return