示例#1
0
    def test_execute_mysql_preoperator(self, mock_hive_hook, mock_mysql_hook):
        self.kwargs.update(dict(mysql_preoperator='preoperator'))

        HiveToMySqlTransfer(**self.kwargs).execute(context={})

        mock_mysql_hook.return_value.run.assert_called_once_with(
            self.kwargs['mysql_preoperator'])
    def test_execute(self, mock_hive_hook, mock_mysql_hook):
        HiveToMySqlTransfer(**self.kwargs).execute(context={})

        mock_hive_hook.assert_called_once_with(hiveserver2_conn_id=self.kwargs['hiveserver2_conn_id'])
        mock_hive_hook.return_value.get_records.assert_called_once_with('sql', hive_conf={})
        mock_mysql_hook.assert_called_once_with(mysql_conn_id=self.kwargs['mysql_conn_id'])
        mock_mysql_hook.return_value.insert_rows.assert_called_once_with(
            table=self.kwargs['mysql_table'],
            rows=mock_hive_hook.return_value.get_records.return_value
        )
示例#3
0
    def test_execute_with_hive_conf(self, mock_hive_hook, mock_mysql_hook):
        context = {}
        self.kwargs.update(
            dict(hive_conf={'mapreduce.job.queuename': 'fake_queue'}))

        HiveToMySqlTransfer(**self.kwargs).execute(context=context)

        hive_conf = context_to_airflow_vars(context)
        hive_conf.update(self.kwargs['hive_conf'])
        mock_hive_hook.return_value.get_records.assert_called_once_with(
            self.kwargs['sql'], hive_conf=hive_conf)
示例#4
0
    def test_hive_to_mysql(self):
        test_hive_results = 'test_hive_results'

        mock_hive_hook = MockHiveServer2Hook()
        mock_hive_hook.get_records = MagicMock(return_value=test_hive_results)

        mock_mysql_hook = MockMySqlHook()
        mock_mysql_hook.run = MagicMock()
        mock_mysql_hook.insert_rows = MagicMock()

        with patch('airflow.operators.hive_to_mysql.HiveServer2Hook',
                   return_value=mock_hive_hook):
            with patch('airflow.operators.hive_to_mysql.MySqlHook',
                       return_value=mock_mysql_hook):

                op = HiveToMySqlTransfer(
                    mysql_conn_id='airflow_db',
                    task_id='hive_to_mysql_check',
                    sql="""
                        SELECT name
                        FROM airflow.static_babynames
                        LIMIT 100
                        """,
                    mysql_table='test_static_babynames',
                    mysql_preoperator=[
                        'DROP TABLE IF EXISTS test_static_babynames;',
                        'CREATE TABLE test_static_babynames (name VARCHAR(500))',
                    ],
                    dag=self.dag)
                op.clear(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
                op.run(start_date=DEFAULT_DATE,
                       end_date=DEFAULT_DATE,
                       ignore_ti_state=True)

        raw_select_name_query = mock_hive_hook.get_records.call_args_list[0][
            0][0]
        actual_select_name_query = re.sub(r'\s{2,}', ' ',
                                          raw_select_name_query).strip()
        expected_select_name_query = 'SELECT name FROM airflow.static_babynames LIMIT 100'
        self.assertEqual(expected_select_name_query, actual_select_name_query)

        actual_hive_conf = mock_hive_hook.get_records.call_args_list[0][1][
            'hive_conf']
        expected_hive_conf = {
            'airflow.ctx.dag_owner': 'airflow',
            'airflow.ctx.dag_id': 'test_dag_id',
            'airflow.ctx.task_id': 'hive_to_mysql_check',
            'airflow.ctx.execution_date': '2015-01-01T00:00:00+00:00'
        }
        self.assertEqual(expected_hive_conf, actual_hive_conf)

        expected_mysql_preoperator = [
            'DROP TABLE IF EXISTS test_static_babynames;',
            'CREATE TABLE test_static_babynames (name VARCHAR(500))'
        ]
        mock_mysql_hook.run.assert_called_with(expected_mysql_preoperator)

        mock_mysql_hook.insert_rows.assert_called_with(
            table='test_static_babynames', rows=test_hive_results)
示例#5
0
    def match_datatransfer_operater(self):
        source_db_type = self.get_conn_db_type(self.source_conn_id)
        destination_db_type = self.get_conn_db_type(self.destination_conn_id)

        if source_db_type == 'mysql' and destination_db_type == 'hiveserver2':
            return MySqlToHiveTransfer(
                sql=self.sql,
                hive_table=self.destination_table,
                create=False,
                recreate=False,
                partition=None,
                delimiter=chr(1),
                mysql_conn_id=self.source_conn_id,
                hive_cli_conn_id=self.destination_conn_id,
                tblproperties=None)

        if source_db_type == 'mssql' and destination_db_type == 'hiveserver2':
            return MsSqlToHiveTransfer(
                sql=self.sql,
                hive_table=self.destination_table,
                create=False,
                recreate=False,
                partition=None,
                delimiter=chr(1),
                mysql_conn_id=self.source_conn_id,
                hive_cli_conn_id=self.destination_conn_id,
                tblproperties=None)

        if source_db_type == 'hiveserver2' and destination_db_type == 'mysql':
            return HiveToMySqlTransfer(sql=self.sql,
                                       mysql_table=self.destination_table,
                                       hiveserver2_conn_id=self.source_conn_id,
                                       mysql_conn_id=self.destination_conn_id,
                                       mysql_preoperator=None,
                                       mysql_postoperator=None,
                                       bulk_load=False)

        if source_db_type == 'oracle' and destination_db_type == 'oracle':
            return OracleToOracleTransfer(
                source_sql=self.sql,
                destination_table=self.destination_table,
                oracle_source_conn_id=self.source_conn_id,
                oracle_destination_conn_id=self.destination_conn_id,
                source_sql_params=None,
                rows_chunk=5000)

        return GenericTransfer(sql=self.sql,
                               destination_table=self.destination_table,
                               source_conn_id=self.source_conn_id,
                               destination_conn_id=self.destination_conn_id,
                               preoperator=None)
示例#6
0
    def test_execute_bulk_load(self, mock_hive_hook, mock_tmp_file,
                               mock_mysql_hook):
        type(mock_tmp_file).name = PropertyMock(return_value='tmp_file')
        context = {}
        self.kwargs.update(dict(bulk_load=True))

        HiveToMySqlTransfer(**self.kwargs).execute(context=context)

        mock_tmp_file.assert_called_once_with()
        mock_hive_hook.return_value.to_csv.assert_called_once_with(
            self.kwargs['sql'],
            mock_tmp_file.return_value.name,
            delimiter='\t',
            lineterminator='\n',
            output_header=False,
            hive_conf=context_to_airflow_vars(context))
        mock_mysql_hook.return_value.bulk_load.assert_called_once_with(
            table=self.kwargs['mysql_table'],
            tmp_file=mock_tmp_file.return_value.name)
        mock_tmp_file.return_value.close.assert_called_once_with()
示例#7
0
 def test_hive_to_mysql(self):
     op = HiveToMySqlTransfer(
         mysql_conn_id='airflow_db',
         task_id='hive_to_mysql_check',
         create=True,
         sql="""
             SELECT name
             FROM airflow.static_babynames
             LIMIT 100
             """,
         mysql_table='test_static_babynames',
         mysql_preoperator=[
             'DROP TABLE IF EXISTS test_static_babynames;',
             'CREATE TABLE test_static_babynames (name VARCHAR(500))',
         ],
         dag=self.dag)
     op.clear(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
     op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
            ignore_ti_state=True)
示例#8
0
    dag=dag)

insert_bdm_dim_metrics = HiveToMySqlTransfer(
    task_id='insert_bdm_dim_metrics',
    sql=""" 
        
        select  
        null,
        dt,
        area_name,
        bdm_id, 
        bdm_name, 
        hbdm_name, 
        take_time_avg, 
        delivery_time_avg, 
        score_peisong_avg,
        cancel_order_cnt, 
        user_cancel_order_cnt, 
        merchant_cancel_order_cnt, 
        sys_cancel_order_cnt 
        
        from ofood_bi.ofood_bdm_area_metrics_report 
        where dt = '{{ ds }}'


        """,
    mysql_conn_id='mysql_bi',
    mysql_table='ofood_bdm_area_metrics_report',
    dag=dag)

insert_shop_list_metrics = HiveToMySqlTransfer(
示例#9
0
    """,
    mysql_conn_id='opay_spread_mysql',
    dag=dag)

promoter_detail_to_msyql = HiveToMySqlTransfer(
    task_id='promoter_detail_to_msyql',
    sql="""
            SELECT
                null as id,
                t.code as code,
                t.bind_number as phone,
                t.bind_device as device_id,
                0 as register_time,
                t.bind_time as bind_time,
                {{ ds_nodash }},
                {{ execution_date.hour}},
                ip
            FROM oride_dw.dwd_oride_driver_cheating_detection_hi 
            LATERAL VIEW json_tuple(event_value, 'bind_refferal_code', 'bind_number', 'bind_device_id', 'bind_time') t AS 
                code, bind_number, bind_device, bind_time  
            WHERE
                  dt='{{ ds }}'
                  AND hour='{{ execution_date.strftime("%H") }}'
        """,
    mysql_conn_id='opay_spread_mysql',
    mysql_table='promoter_users_device',
    dag=dag
)

promoter_hour_to_msyql = HiveToMySqlTransfer(
    task_id='promoter_hour_to_msyql',
    sql="""
示例#10
0
        ''',
    schema='ofood_bi',
    dag=dag)

insert_mkt_metrics = HiveToMySqlTransfer(
    task_id='insert_mkt_metrics',
    sql=""" 

        select 
        null,
        dt,
        mkt_id,
        mkt_phone,
        bdm_id,
        bdm_name,
        new_user_cnt,
        pay_order_cnt
        from 
        ofood_bi.ofood_mkt_daily_metrics_info
        where dt = '{{ ds }}'

        """,
    mysql_conn_id='mysql_bi',
    mysql_table='ofood_mkt_daily_metrics_info',
    dag=dag)

validate_partition_data >> jh_order_validate_task >> create_mkt_metrics
validate_partition_data >> bd_admin_users_validate_task >> create_mkt_metrics
validate_partition_data >> bd_invitation_info_validate_task >> create_mkt_metrics
create_mkt_metrics >> insert_mkt_metrics
示例#11
0
from airflow import DAG
from airflow.operators.hive_to_mysql import HiveToMySqlTransfer
from airflow.operators.mysql_operator import MySqlOperator
from airflow.utils.dates import days_ago
from datetime import datetime, timedelta

default_args = {
    'owner': 'airflow',
    'start_date': days_ago(2),
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=1)
}

dag = DAG(dag_id='hive_to_mysql',
          schedule_interval='@once',
          default_args=default_args)

t1 = MySqlOperator(task_id='drop_table',
                   sql='drop table if exists temp_drivers',
                   dag=dag)
t2 = HiveToMySqlTransfer(
    task_id='hive_to_mysql',
    mysql_table='temp_drivers',
    mysql_preoperator='create table temp_drivers(col_value varchar(256))',
    sql='select * from temp_drivers',
    dag=dag)

t1 >> t2
示例#12
0
insertoverwrite_final = HiveOperator(task_id='insertoverwrite_final',
                                     hql=hiveSQL_insertoverwrite_final,
                                     hive_cli_conn_id='beeline',
                                     dag=dag)

createTableToMysql = MySqlOperator(mysql_conn_id='mysql',
                                   task_id='createTableToMysql',
                                   sql=hiveSQL_create_mysql,
                                   database='spotify_data',
                                   dag=dag)

exportToMysql = HiveToMySqlTransfer(
    task_id='exportToMysql',
    sql="SELECT * FROM final",
    mysql_preoperator='DELETE FROM spotify_data.titledata',
    mysql_table='spotify_data.titledata',
    mysql_conn_id='mysql',
    hiveserver2_conn_id='beeline',
    dag=dag)

########################################################### C O P Y ################################################################
i = 0
if os.path.exists('/home/airflow/requestData/playlist'):
    for filename in os.listdir('/home/airflow/requestData/playlist'):
        if filename.endswith(".json"):
            reqDataCopyPlaylist = HdfsPutFileOperator(
                task_id='requestdata_copy_playlist' + str(i),
                local_file='/home/airflow/requestData/playlist/' + filename,
                remote_file='/user/hadoop/spotify/rawPlaylist/' + filename,
                hdfs_conn_id='hdfs',
                dag=dag,
示例#13
0
    bucket_key=
    'okash/okash/{table}/dt={{{{ ds }}}}/hour={{{{ execution_date.strftime("%H") }}}}/'
    .format(table='client'),
    bucket_name='okash',
    timeout=3600,
    dag=dag)

add_client_partitions = HiveOperator(task_id='add_client_partitions',
                                     hql="""
            ALTER TABLE ods_log_client_hi ADD IF NOT EXISTS PARTITION (dt = '{{ ds }}', hour = '{{ execution_date.strftime("%H") }}');
    """,
                                     schema='okash_dw',
                                     dag=dag)

export_to_mysql = HiveToMySqlTransfer(task_id='export_to_mysql',
                                      sql="""
            --SET mapreduce.job.queuename=root.airflow;
            SELECT
                *,
                from_unixtime(unix_timestamp())
            FROM
                okash_dw.ods_log_client_hi
            WHERE
                dt='{{ ds }}'
                AND hour='{{ execution_date.strftime("%H") }}'
        """,
                                      mysql_conn_id='mysql_bi',
                                      mysql_table='okash_dw.ods_log_client_hi',
                                      dag=dag)

check_client_file >> add_client_partitions >> export_to_mysql
insert_crm_metrics = HiveToMySqlTransfer(
    task_id='insert_crm_metrics',
    sql=""" 
        select 
        null,
        bd_id,
        dt,
        username,
        area_name,
        shop_id,
        title,
        total_number_of_merchants,
        total_number_of_new_merchants,
        trade_number_of_merchants,
        have_price_number_of_merchants,
        number_of_new_merchants,
        number_of_pay_orders,
        number_of_new_users,
        gmv,
        paid_in_amount,
        platform_subsidies,
        amount_and_first_price,
        net_turnover,
        total_number_of_order,
        number_of_valid_order,
        number_of_cancel_order_before_payment,
        number_of_cancel_order_after_payment,
        total_number_of_invitation_new_users,
        number_of_invitation_new_users,
        roof_plat_coupon,
        roof_coupon
        
        from 
        ofood_bi.ofood_area_shop_metrics_info
        where dt = '{{ ds }}'
        

        """,
    mysql_conn_id='mysql_bi',
    mysql_table='ofood_area_shop_metrics_info',
    dag=dag)
示例#15
0
order_location_info_to_msyql = HiveToMySqlTransfer(
    task_id='order_location_info_to_msyql',
    sql="""
            select 
            null,
            dt,
            order_id  ,
            user_id  ,
            driver_id  ,
            create_time ,
            status ,
            start_loc ,
            end_loc ,
            looking_for_a_driver_show ,
            successful_order_show ,
            accept_order_click ,
            rider_arrive_show ,
            confirm_arrive_click_arrived ,
            pick_up_passengers_sliding_arrived ,
            start_ride_show ,
            start_ride_sliding ,
            complete_the_order_show ,
            start_ride_sliding_arrived ,
            loc_list 
            from oride_dw.dwd_oride_order_location_di
            where country_code = 'nal' and dt='{{ ds }}'

        """,
    mysql_conn_id='mysql_oride_location',
    mysql_table='oride_order_location_info',
    dag=dag)
示例#16
0
hive_insert_overwrite_final_address_data >> create_table_remote

# --------------------------------------------------------------------------------
# clear table remote
# --------------------------------------------------------------------------------

delete_from_remote = MySqlOperator(
    task_id='delete_from_remote',
    sql="DELETE FROM final_address_data;",
    mysql_conn_id='mysql_default',
    database='db'
)

create_table_remote >> delete_from_remote

# --------------------------------------------------------------------------------
# copy table to remote
# --------------------------------------------------------------------------------

previous_task = delete_from_remote
for country in coutry_list:

    hive_to_mysql = HiveToMySqlTransfer(
    task_id='hive_to_mysql_' + country,
    sql="select * from default.final_address_data where country like '" + country + "'",
    hiveserver2_conn_id='hiveserver2',
    mysql_table='db.final_address_data',
    mysql_conn_id='mysql_default',
    )
    previous_task >> hive_to_mysql
    previous_task = hive_to_mysql