def test_postgres_to_postgres(self): sql = "SELECT * FROM INFORMATION_SCHEMA.TABLES LIMIT 100;" op = GenericTransfer( task_id='test_p2p', preoperator=[ "DROP TABLE IF EXISTS test_postgres_to_postgres", "CREATE TABLE IF NOT EXISTS " "test_postgres_to_postgres (LIKE INFORMATION_SCHEMA.TABLES)" ], source_conn_id='postgres_default', destination_conn_id='postgres_default', destination_table="test_postgres_to_postgres", sql=sql, dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_mysql_to_mysql(self): sql = "SELECT * FROM INFORMATION_SCHEMA.TABLES LIMIT 100;" op = GenericTransfer( task_id='test_m2m', preoperator=[ "DROP TABLE IF EXISTS test_mysql_to_mysql", "CREATE TABLE IF NOT EXISTS " "test_mysql_to_mysql LIKE INFORMATION_SCHEMA.TABLES" ], source_conn_id='airflow_db', destination_conn_id='airflow_db', destination_table="test_mysql_to_mysql", sql=sql, dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_postgres_to_postgres(self): sql = "SELECT * FROM INFORMATION_SCHEMA.TABLES LIMIT 100;" from airflow.operators.generic_transfer import GenericTransfer t = GenericTransfer( task_id='test_p2p', preoperator=[ "DROP TABLE IF EXISTS test_postgres_to_postgres", "CREATE TABLE IF NOT EXISTS " "test_postgres_to_postgres (LIKE INFORMATION_SCHEMA.TABLES)" ], source_conn_id='postgres_default', destination_conn_id='postgres_default', destination_table="test_postgres_to_postgres", sql=sql, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_mysql_to_mysql(self, client): with MySqlContext(client): sql = "SELECT * FROM connection;" op = GenericTransfer( task_id='test_m2m', preoperator=[ "DROP TABLE IF EXISTS test_mysql_to_mysql", "CREATE TABLE IF NOT EXISTS test_mysql_to_mysql LIKE connection", ], source_conn_id='airflow_db', destination_conn_id='airflow_db', destination_table="test_mysql_to_mysql", sql=sql, dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def match_datatransfer_operater(self): source_db_type = self.get_conn_db_type(self.source_conn_id) destination_db_type = self.get_conn_db_type(self.destination_conn_id) if source_db_type == 'mysql' and destination_db_type == 'hiveserver2': return MySqlToHiveTransfer( sql=self.sql, hive_table=self.destination_table, create=False, recreate=False, partition=None, delimiter=chr(1), mysql_conn_id=self.source_conn_id, hive_cli_conn_id=self.destination_conn_id, tblproperties=None) if source_db_type == 'mssql' and destination_db_type == 'hiveserver2': return MsSqlToHiveTransfer( sql=self.sql, hive_table=self.destination_table, create=False, recreate=False, partition=None, delimiter=chr(1), mysql_conn_id=self.source_conn_id, hive_cli_conn_id=self.destination_conn_id, tblproperties=None) if source_db_type == 'hiveserver2' and destination_db_type == 'mysql': return HiveToMySqlTransfer(sql=self.sql, mysql_table=self.destination_table, hiveserver2_conn_id=self.source_conn_id, mysql_conn_id=self.destination_conn_id, mysql_preoperator=None, mysql_postoperator=None, bulk_load=False) if source_db_type == 'oracle' and destination_db_type == 'oracle': return OracleToOracleTransfer( source_sql=self.sql, destination_table=self.destination_table, oracle_source_conn_id=self.source_conn_id, oracle_destination_conn_id=self.destination_conn_id, source_sql_params=None, rows_chunk=5000) return GenericTransfer(sql=self.sql, destination_table=self.destination_table, source_conn_id=self.source_conn_id, destination_conn_id=self.destination_conn_id, preoperator=None)
from airflow.operators.generic_transfer import GenericTransfer from airflow.models import DAG from datetime import datetime, timedelta two_days_ago = datetime.combine(datetime.today() - timedelta(2), datetime.min.time()) args = { 'owner': 'viet', 'start_date': two_days_ago, 'email': '*****@*****.**' } dag = DAG(dag_id='example_generic_transfer', default_args=args, schedule_interval='@once', catchup=False) sql = "SELECT * FROM holy;" generic_transfer = GenericTransfer( task_id='generic_transfer', dag=dag, source_conn_id='airflow_pg', destination_conn_id='airflow_pg', destination_table='sensei', sql=sql, )
from mydb1.sample2 ) select id , name from ( select id , name from w1 union all select id , name from w2 ) w3 """ t2 = GenericTransfer(task_id='test_m2m', source_conn_id='database-1-aws', destination_conn_id='database-2-aws', destination_table="mydb2.sample", sql=sql, dag=dag) t1 >> t2