def test_runs_for_hive_stats(self, mock_hive_metastore_hook): mock_mysql_hook = MockMySqlHook() mock_presto_hook = MockPrestoHook() with patch( 'airflow.providers.apache.hive.operators.hive_stats.PrestoHook', return_value=mock_presto_hook): with patch( 'airflow.providers.apache.hive.operators.hive_stats.MySqlHook', return_value=mock_mysql_hook): op = HiveStatsCollectionOperator( task_id='hive_stats_check', table="airflow.static_babynames_partitioned", partition={'ds': DEFAULT_DATE_DS}, dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) select_count_query = ( "SELECT COUNT(*) AS __count FROM airflow." + "static_babynames_partitioned WHERE ds = '2015-01-01';") mock_presto_hook.get_first.assert_called_with(hql=select_count_query) expected_stats_select_query = ( "SELECT 1 FROM hive_stats WHERE table_name='airflow." + "static_babynames_partitioned' AND " + "partition_repr='{\"ds\": \"2015-01-01\"}' AND " + "dttm='2015-01-01T00:00:00+00:00' " + "LIMIT 1;") raw_stats_select_query = mock_mysql_hook.get_records.call_args_list[0][ 0][0] actual_stats_select_query = re.sub(r'\s{2,}', ' ', raw_stats_select_query).strip() assert expected_stats_select_query == actual_stats_select_query insert_rows_val = [( '2015-01-01', '2015-01-01T00:00:00+00:00', 'airflow.static_babynames_partitioned', '{"ds": "2015-01-01"}', '', 'count', ['val_0', 'val_1'], )] mock_mysql_hook.insert_rows.assert_called_with( table='hive_stats', rows=insert_rows_val, target_fields=[ 'ds', 'dttm', 'table_name', 'partition_repr', 'col', 'metric', 'value', ], )
def test_hive_stats(self): op = HiveStatsCollectionOperator( task_id='hive_stats_check', table="airflow.static_babynames_partitioned", partition={'ds': DEFAULT_DATE_DS}, dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)