def test_load_df_with_data_types(self, mock_run_cli): ord_dict = OrderedDict() ord_dict['b'] = [True] ord_dict['i'] = [-1] ord_dict['t'] = [1] ord_dict['f'] = [0.0] ord_dict['c'] = ['c'] ord_dict['M'] = [datetime.datetime(2018, 1, 1)] ord_dict['O'] = [object()] ord_dict['S'] = [b'STRING'] ord_dict['U'] = ['STRING'] ord_dict['V'] = [None] df = pd.DataFrame(ord_dict) hook = MockHiveCliHook() hook.load_df(df, 't') query = """ CREATE TABLE IF NOT EXISTS t ( `b` BOOLEAN, `i` BIGINT, `t` BIGINT, `f` DOUBLE, `c` STRING, `M` TIMESTAMP, `O` STRING, `S` STRING, `U` STRING, `V` STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS textfile ; """ assert_equal_ignore_multiple_spaces(self, mock_run_cli.call_args_list[0][0][0], query)
def test_execute(self, mock_run): s3_keys = ['1.csv', '2.csv'] table = 'table' stage = 'stage' file_format = 'file_format' schema = 'schema' S3ToSnowflakeTransferOperator(s3_keys=s3_keys, table=table, stage=stage, file_format=file_format, schema=schema, columns_array=None, task_id="task_id", dag=None).execute(None) files = str(s3_keys) files = files.replace('[', '(') files = files.replace(']', ')') base_sql = """ FROM @{stage}/ files={files} file_format={file_format} """.format(stage=stage, files=files, file_format=file_format) copy_query = """ COPY INTO {schema}.{table} {base_sql} """.format(schema=schema, table=table, base_sql=base_sql) assert mock_run.call_count == 1 assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0], copy_query)
def test_execute(self, mock_run, mock_session): access_key = "aws_access_key_id" secret_key = "aws_secret_access_key" mock_session.return_value = Session(access_key, secret_key) schema = "schema" table = "table" s3_bucket = "bucket" s3_key = "key" copy_options = "" op = S3ToRedshiftOperator( schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, copy_options=copy_options, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", dag=None, ) op.execute(None) copy_query = f""" COPY {schema}.{table} FROM 's3://{s3_bucket}/{s3_key}' with credentials 'aws_access_key_id={access_key};aws_secret_access_key={secret_key}' {copy_options}; """ assert mock_run.call_count == 1 assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0], copy_query)
def test_execute(self, mock_run, mock_session): access_key = "aws_access_key_id" secret_key = "aws_secret_access_key" mock_session.return_value = Session(access_key, secret_key) mock_session.return_value.access_key = access_key mock_session.return_value.secret_key = secret_key mock_session.return_value.token = None schema = "schema" table = "table" s3_bucket = "bucket" s3_key = "key" copy_options = "" op = S3ToRedshiftOperator( schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, copy_options=copy_options, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", dag=None, ) op.execute(None) credentials_block = build_credentials_block(mock_session.return_value) copy_query = op._build_copy_query(credentials_block, copy_options) assert mock_run.call_count == 1 assert access_key in copy_query assert secret_key in copy_query assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0], copy_query)
def test_execute( self, table_as_file_name, expected_s3_key, mock_run, mock_session, ): access_key = "aws_access_key_id" secret_key = "aws_secret_access_key" mock_session.return_value = Session(access_key, secret_key) schema = "schema" table = "table" s3_bucket = "bucket" s3_key = "key" unload_options = [ 'HEADER', ] RedshiftToS3Operator( schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, unload_options=unload_options, include_header=True, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", table_as_file_name=table_as_file_name, dag=None, ).execute(None) unload_options = '\n\t\t\t'.join(unload_options) select_query = "SELECT * FROM {schema}.{table}".format(schema=schema, table=table) unload_query = """ UNLOAD ('{select_query}') TO 's3://{s3_bucket}/{s3_key}' with credentials 'aws_access_key_id={access_key};aws_secret_access_key={secret_key}' {unload_options}; """.format( select_query=select_query, s3_bucket=s3_bucket, s3_key=expected_s3_key, access_key=access_key, secret_key=secret_key, unload_options=unload_options, ) assert mock_run.call_count == 1 assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0], unload_query)
def test_execute_sts_token( self, table_as_file_name, expected_s3_key, mock_run, mock_session, ): access_key = "ASIA_aws_access_key_id" secret_key = "aws_secret_access_key" token = "token" mock_session.return_value = Session(access_key, secret_key, token) mock_session.return_value.access_key = access_key mock_session.return_value.secret_key = secret_key mock_session.return_value.token = token schema = "schema" table = "table" s3_bucket = "bucket" s3_key = "key" unload_options = [ 'HEADER', ] op = RedshiftToS3Operator( schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, unload_options=unload_options, include_header=True, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", table_as_file_name=table_as_file_name, dag=None, ) op.execute(None) unload_options = '\n\t\t\t'.join(unload_options) select_query = f"SELECT * FROM {schema}.{table}" credentials_block = build_credentials_block(mock_session.return_value) unload_query = op._build_unload_query( credentials_block, select_query, expected_s3_key, unload_options ) assert mock_run.call_count == 1 assert access_key in unload_query assert secret_key in unload_query assert token in unload_query assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0], unload_query)
def test_custom_select_query_unloading( self, table, table_as_file_name, expected_s3_key, mock_run, mock_session, ): access_key = "aws_access_key_id" secret_key = "aws_secret_access_key" mock_session.return_value = Session(access_key, secret_key) mock_session.return_value.access_key = access_key mock_session.return_value.secret_key = secret_key mock_session.return_value.token = None s3_bucket = "bucket" s3_key = "key" unload_options = [ 'HEADER', ] select_query = "select column from table" op = RedshiftToS3Operator( select_query=select_query, table=table, table_as_file_name=table_as_file_name, s3_bucket=s3_bucket, s3_key=s3_key, unload_options=unload_options, include_header=True, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", dag=None, ) op.execute(None) unload_options = '\n\t\t\t'.join(unload_options) credentials_block = build_credentials_block(mock_session.return_value) unload_query = op._build_unload_query(credentials_block, select_query, expected_s3_key, unload_options) assert mock_run.call_count == 1 assert access_key in unload_query assert secret_key in unload_query assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0], unload_query)
def test_mysql_hook_test_bulk_dump_mock(self, mock_get_conn): mock_execute = mock.MagicMock() mock_get_conn.return_value.cursor.return_value.execute = mock_execute hook = MySqlHook('airflow_db') table = "INFORMATION_SCHEMA.TABLES" tmp_file = "/path/to/output/file" hook.bulk_dump(table, tmp_file) from tests.test_utils.asserts import assert_equal_ignore_multiple_spaces assert mock_execute.call_count == 1 query = """ SELECT * INTO OUTFILE '{tmp_file}' FROM {table} """.format(tmp_file=tmp_file, table=table) assert_equal_ignore_multiple_spaces(self, mock_execute.call_args[0][0], query)
def test_truncate(self, mock_run, mock_session): access_key = "aws_access_key_id" secret_key = "aws_secret_access_key" mock_session.return_value = Session(access_key, secret_key) schema = "schema" table = "table" s3_bucket = "bucket" s3_key = "key" copy_options = "" op = S3ToRedshiftOperator( schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, copy_options=copy_options, truncate_table=True, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", dag=None, ) op.execute(None) copy_statement = f""" COPY {schema}.{table} FROM 's3://{s3_bucket}/{s3_key}' with credentials 'aws_access_key_id={access_key};aws_secret_access_key={secret_key}' {copy_options}; """ truncate_statement = f'TRUNCATE TABLE {schema}.{table};' transaction = f""" BEGIN; {truncate_statement} {copy_statement} COMMIT """ assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0], transaction) assert mock_run.call_count == 1
def test_truncate(self, mock_run, mock_session): access_key = "aws_access_key_id" secret_key = "aws_secret_access_key" mock_session.return_value = Session(access_key, secret_key) mock_session.return_value.access_key = access_key mock_session.return_value.secret_key = secret_key mock_session.return_value.token = None schema = "schema" table = "table" s3_bucket = "bucket" s3_key = "key" copy_options = "" op = S3ToRedshiftOperator( schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, copy_options=copy_options, truncate_table=True, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", dag=None, ) op.execute(None) credentials_block = build_credentials_block(mock_session.return_value) copy_statement = op._build_copy_query(credentials_block, copy_options) truncate_statement = f'TRUNCATE TABLE {schema}.{table};' transaction = f""" BEGIN; {truncate_statement} {copy_statement} COMMIT """ assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0], transaction) assert mock_run.call_count == 1