def test_execute_delete_previous_runs_rows(self, mock_hive_metastore_hook, mock_presto_hook, mock_mysql_hook, mock_json_dumps): mock_hive_metastore_hook.return_value.get_table.return_value.sd.cols = [ fake_col ] mock_mysql_hook.return_value.get_records.return_value = True hive_stats_collection_operator = HiveStatsCollectionOperator( **self.kwargs) hive_stats_collection_operator.execute(context={}) sql = """ DELETE FROM hive_stats WHERE table_name='{}' AND partition_repr='{}' AND dttm='{}'; """.format( hive_stats_collection_operator.table, mock_json_dumps.return_value, hive_stats_collection_operator.dttm, ) mock_mysql_hook.return_value.run.assert_called_once_with(sql)
def test_execute(self, mock_hive_metastore_hook, mock_presto_hook, mock_mysql_hook, mock_json_dumps): mock_hive_metastore_hook.return_value.get_table.return_value.sd.cols = [ fake_col ] mock_mysql_hook.return_value.get_records.return_value = False hive_stats_collection_operator = HiveStatsCollectionOperator( **self.kwargs) hive_stats_collection_operator.execute(context={}) mock_hive_metastore_hook.assert_called_once_with( metastore_conn_id=hive_stats_collection_operator.metastore_conn_id) mock_hive_metastore_hook.return_value.get_table.assert_called_once_with( table_name=hive_stats_collection_operator.table) mock_presto_hook.assert_called_once_with( presto_conn_id=hive_stats_collection_operator.presto_conn_id) mock_mysql_hook.assert_called_once_with( hive_stats_collection_operator.mysql_conn_id) mock_json_dumps.assert_called_once_with( hive_stats_collection_operator.partition, sort_keys=True) field_types = { col.name: col.type for col in mock_hive_metastore_hook.return_value.get_table. return_value.sd.cols } exprs = {('', 'count'): 'COUNT(*)'} for col, col_type in list(field_types.items()): exprs.update( hive_stats_collection_operator.get_default_exprs( col, col_type)) exprs = OrderedDict(exprs) rows = [ (hive_stats_collection_operator.ds, hive_stats_collection_operator.dttm, hive_stats_collection_operator.table, mock_json_dumps.return_value) + (r[0][0], r[0][1], r[1]) for r in zip(exprs, mock_presto_hook.return_value.get_first.return_value) ] mock_mysql_hook.return_value.insert_rows.assert_called_once_with( table='hive_stats', rows=rows, target_fields=[ 'ds', 'dttm', 'table_name', 'partition_repr', 'col', 'metric', 'value', ])
def test_execute_with_assignment_func(self, mock_hive_metastore_hook, mock_presto_hook, mock_mysql_hook, mock_json_dumps): def assignment_func(col, _): return {(col, 'test'): 'TEST({})'.format(col)} self.kwargs.update(dict(assignment_func=assignment_func)) mock_hive_metastore_hook.return_value.get_table.return_value.sd.cols = [ fake_col ] mock_mysql_hook.return_value.get_records.return_value = False hive_stats_collection_operator = HiveStatsCollectionOperator( **self.kwargs) hive_stats_collection_operator.execute(context={}) field_types = { col.name: col.type for col in mock_hive_metastore_hook.return_value.get_table. return_value.sd.cols } exprs = {('', 'count'): 'COUNT(*)'} for col, col_type in list(field_types.items()): exprs.update( hive_stats_collection_operator.assignment_func(col, col_type)) exprs = OrderedDict(exprs) rows = [ (hive_stats_collection_operator.ds, hive_stats_collection_operator.dttm, hive_stats_collection_operator.table, mock_json_dumps.return_value) + (r[0][0], r[0][1], r[1]) for r in zip(exprs, mock_presto_hook.return_value.get_first.return_value) ] mock_mysql_hook.return_value.insert_rows.assert_called_once_with( table='hive_stats', rows=rows, target_fields=[ 'ds', 'dttm', 'table_name', 'partition_repr', 'col', 'metric', 'value', ])