def test_file_present(self, sftp_hook_mock): sftp_hook_mock.return_value.get_mod_time.return_value = '19700101000000' sftp_sensor = SFTPSensor(task_id='unit_test', path='/path/to/file/1970-01-01.txt') context = {'ds': '1970-01-01'} output = sftp_sensor.poke(context) sftp_hook_mock.return_value.get_mod_time.assert_called_with( '/path/to/file/1970-01-01.txt') self.assertTrue(output)
def test_sftp_failure(self, sftp_hook_mock): sftp_hook_mock.return_value.get_mod_time.side_effect = OSError( SFTP_FAILURE, 'SFTP failure') sftp_sensor = SFTPSensor(task_id='unit_test', path='/path/to/file/1970-01-01.txt') context = {'ds': '1970-01-01'} with self.assertRaises(OSError): sftp_sensor.poke(context) sftp_hook_mock.return_value.get_mod_time.assert_called_with( '/path/to/file/1970-01-01.txt')
def test_file_absent(self, sftp_hook_mock): sftp_hook_mock.return_value.get_mod_time.side_effect = OSError( SFTP_NO_SUCH_FILE, 'File missing') sftp_sensor = SFTPSensor(task_id='unit_test', path='/path/to/file/1970-01-01.txt') context = {'ds': '1970-01-01'} output = sftp_sensor.poke(context) sftp_hook_mock.return_value.get_mod_time.assert_called_with( '/path/to/file/1970-01-01.txt') self.assertFalse(output)
def test_file_present(self, sftp_hook_mock): sftp_hook_mock.return_value.get_mod_time.return_value = '19700101000000' sftp_sensor = SFTPSensor( task_id='unit_test', path='/path/to/file/1970-01-01.txt') context = { 'ds': '1970-01-01' } output = sftp_sensor.poke(context) sftp_hook_mock.return_value.get_mod_time.assert_called_with( '/path/to/file/1970-01-01.txt') self.assertTrue(output)
def test_sftp_failure(self, sftp_hook_mock): sftp_hook_mock.return_value.get_mod_time.side_effect = IOError( SFTP_FAILURE, 'SFTP failure') sftp_sensor = SFTPSensor( task_id='unit_test', path='/path/to/file/1970-01-01.txt') context = { 'ds': '1970-01-01' } with self.assertRaises(IOError): sftp_sensor.poke(context) sftp_hook_mock.return_value.get_mod_time.assert_called_with( '/path/to/file/1970-01-01.txt')
def test_file_absent(self, sftp_hook_mock): sftp_hook_mock.return_value.get_mod_time.side_effect = IOError( SFTP_NO_SUCH_FILE, 'File missing') sftp_sensor = SFTPSensor( task_id='unit_test', path='/path/to/file/1970-01-01.txt') context = { 'ds': '1970-01-01' } output = sftp_sensor.poke(context) sftp_hook_mock.return_value.get_mod_time.assert_called_with( '/path/to/file/1970-01-01.txt') self.assertFalse(output)
'email': [my_email_address], 'email_on_failure': True, 'email_on_retry': True, 'retries': 1, 'retry_delay': timedelta(minutes=5), } dag = DAG(dag_name, catchup=False, default_args=default_args, schedule_interval="30 8 * * * *") # the following tasks are created by instantiating operators detect_file = SFTPSensor(task_id='detect_file', poke_interval=10, timeout=3600, sftp_conn_id='sftp_default', path=source_path + filename, dag=dag) update_nb_of_chunks = SFTPUpdateNbOfChunksOperator( task_id='update_nb_of_chunks', conn_id='sftp_default', file_path=source_path + filename, master_variable=dag_name, chunks_variable_name="number_of_chunks", chunk_size=chunk_size, dag=dag) # These are passed in as args. Seems that they are not sent : airflow bug. dag.start_date = default_args['start_date']
} dag = DAG( 'sftpSensorTest', default_args=default_args, schedule_interval=timedelta(days=1), dagrun_timeout=timedelta(minutes=5), ) start = DummyOperator(task_id='run_this_first', dag=dag) sftp = SFTPSensor( task_id='sftp_check', path='data/filelist.txt', sftp_conn_id='sftp_beefy', poke_interval=10, mode='poke', soft_fail=False, dag=dag, ) nextStep = KubernetesPodOperator( namespace='airflow', image="python:3.6-stretch", image_pull_policy="Always", cmds=["python", "-c"], arguments=["print('hello world')"], name="python", task_id="startPython", is_delete_operator_pod=True, hostnetwork=False,
def load_file_subdag(parent_dag_name, child_dag_name, sftp_conn_id, args): """This dag will iteratively call the listed tasks.""" dag_subdag_subdag = DAG( dag_id='{0}.{1}'.format(parent_dag_name, child_dag_name), default_args=args, ) with dag_subdag_subdag: file_check = \ SFTPSensor( task_id='file_check', sftp_conn_id=sftp_conn_id, poke_interval=60, timeout=600, soft_fail=False, path='{}{}{}{}{}'.format( SFTP_PATH_DICT[child_dag_name], parent_dag_name.split(".")[1], SFTP_FILE_NAME[child_dag_name], '%s', SFTP_FILE_EXTN[child_dag_name]) % (DATE_STR_FORMATTED) ) file_transfer_raw = \ SFTPToS3Operator( task_id='file_transfer_raw', sftp_conn_id=sftp_conn_id, sftp_path='{}{}{}{}{}'.format( SFTP_PATH_DICT[child_dag_name], parent_dag_name.split(".")[1], SFTP_FILE_NAME[child_dag_name], '%s', SFTP_FILE_EXTN[child_dag_name]) % (DATE_STR_FORMATTED), s3_conn_id=JOB_ARGS['s3_conn_id'], s3_bucket=BUCKET_NAME_RAW[parent_dag_name.split(".")[1]], s3_key='{}{}{}{}{}'.format( S3_KEY_DICT[child_dag_name], parent_dag_name.split(".")[1], SFTP_FILE_NAME[child_dag_name], '%s', SFTP_FILE_EXTN[child_dag_name]) % (DATE_STR_FORMATTED) ) abc_validations = \ SFTPS3FileSizeOperator( task_id='abc_validations', sftp_conn_id=sftp_conn_id, sftp_path='{}{}{}{}{}'.format( SFTP_PATH_DICT[child_dag_name], parent_dag_name.split(".")[1], SFTP_FILE_NAME[child_dag_name], '%s', SFTP_FILE_EXTN[child_dag_name]) % (DATE_STR_FORMATTED), s3_conn_id=JOB_ARGS['s3_conn_id'], s3_bucket=BUCKET_NAME_RAW[parent_dag_name.split(".")[1]], s3_key='{}{}{}{}{}'.format( S3_KEY_DICT[child_dag_name], parent_dag_name.split(".")[1], SFTP_FILE_NAME[child_dag_name], '%s', SFTP_FILE_EXTN[child_dag_name]) % (DATE_STR_FORMATTED) ) file_stage_copy = \ SSHOperator( task_id='file_stage_copy', ssh_conn_id=ADSALES_EMR, command='{}{}{}'.format( JOB_ARGS['spark_submit'], JOB_ARGS['spark_jars'], FW_STAGE_CODE_PATH ) + '{}{}{}{}{}{}'.format( S3_RAW_BUCKET[parent_dag_name.split(".")[1]], S3_KEY_DICT[child_dag_name], parent_dag_name.split(".")[1], SFTP_FILE_NAME[child_dag_name], '%s', STAGE_SFTP_FILE_EXTN[child_dag_name] ) % (DATE_STR_FORMATTED) + '{}{}{}{}{}'.format( S3_STAGE_BUCKET[parent_dag_name.split(".")[1]], S3_KEY_DICT_STAGE[child_dag_name], parent_dag_name.split(".")[1], STAGE_SFTP_FILE_NAME[child_dag_name], '/%s/ ') % (DATE_STR_FORMATTED ) + SRC_SYS_ID[parent_dag_name.split(".")[1]] ) dq_check = \ SSHOperator( task_id='dq_check', ssh_conn_id=ADSALES_EMR, command='{}{}{}'.format( JOB_ARGS['spark_submit'], JOB_ARGS['spark_jars'], FW_STAGE_DQ_CODE_PATH ) + ' ' + '{}{}{}{}{}'.format( S3_STAGE_BUCKET[parent_dag_name.split(".")[1]], S3_KEY_DICT_STAGE[child_dag_name], parent_dag_name.split(".")[1], STAGE_SFTP_FILE_NAME[child_dag_name], '/%s/' ) % (DATE_STR_FORMATTED) + ' ' + '{}{}{}{}{}'.format( S3_STAGE_BUCKET[parent_dag_name.split(".")[1]], S3_KEY_DICT_STAGE[child_dag_name], parent_dag_name.split(".")[1], STAGE_SFTP_FILE_NAME[child_dag_name], '/%s/' ) % (DATE_STR_FORMATTED) + ' ' + '{}{}{}{}{}'.format( S3_STAGE_BUCKET[parent_dag_name.split(".")[1]], S3_KEY_DICT_STAGE_OUT[child_dag_name], parent_dag_name.split(".")[1], STAGE_SFTP_FILE_NAME[child_dag_name], '/%s/' ) % (DATE_STR_FORMATTED) + ' ' + '{}{}{}{}'.format( DUPLICATE_COLUMN_LIST[child_dag_name], NULL_COLUMN_LIST[child_dag_name], str(JOB_ARGS['stage_dup_check']), str(JOB_ARGS['stage_null_check']) ) ) file_check >> file_transfer_raw >> abc_validations >> file_stage_copy >> dq_check return dag_subdag_subdag
def test_hook_not_created_during_init(self): sftp_sensor = SFTPSensor(task_id='unit_test', path='/path/to/file/1970-01-01.txt') self.assertIsNone(sftp_sensor.hook)