def test_arg_checking(self): # Exception should be raised if neither ssh_hook nor ssh_conn_id is provided with pytest.raises( AirflowException, match="Cannot operate without ssh_hook or ssh_conn_id."): task_0 = SFTPOperator( task_id="test_sftp_0", local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, dag=self.dag, ) task_0.execute(None) # if ssh_hook is invalid/not provided, use ssh_conn_id to create SSHHook task_1 = SFTPOperator( task_id="test_sftp_1", ssh_hook="string_rather_than_SSHHook", # invalid ssh_hook ssh_conn_id=TEST_CONN_ID, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, dag=self.dag, ) try: task_1.execute(None) except Exception: # pylint: disable=broad-except pass assert task_1.ssh_hook.ssh_conn_id == TEST_CONN_ID task_2 = SFTPOperator( task_id="test_sftp_2", ssh_conn_id=TEST_CONN_ID, # no ssh_hook provided local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, dag=self.dag, ) try: task_2.execute(None) except Exception: # pylint: disable=broad-except pass assert task_2.ssh_hook.ssh_conn_id == TEST_CONN_ID # if both valid ssh_hook and ssh_conn_id are provided, ignore ssh_conn_id task_3 = SFTPOperator( task_id="test_sftp_3", ssh_hook=self.hook, ssh_conn_id=TEST_CONN_ID, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, dag=self.dag, ) try: task_3.execute(None) except Exception: # pylint: disable=broad-except pass assert task_3.ssh_hook.ssh_conn_id == self.hook.ssh_conn_id
def test_pickle_file_transfer_put(self): test_local_file_content = \ b"This is local file content \n which is multiline " \ b"continuing....with other character\nanother line here \n this is last line" # create a test file locally with open(self.test_local_filepath, 'wb') as file: file.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator(task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, create_intermediate_dirs=True, dag=self.dag) self.assertIsNotNone(put_test_task) ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() # check the remote file content check_file_task = SSHOperator(task_id="test_check_file", ssh_hook=self.hook, command="cat {0}".format( self.test_remote_filepath), do_xcom_push=True, dag=self.dag) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(), test_local_file_content)
def test_file_transfer_no_intermediate_dir_error_get(self): test_remote_file_content = \ "This is remote file content \n which is also multiline " \ "another line here \n this is last line. EOF" # create a test file remotely create_file_task = SSHOperator(task_id="test_create_file", ssh_hook=self.hook, command="echo '{0}' > {1}".format( test_remote_file_content, self.test_remote_filepath), do_xcom_push=True, dag=self.dag) self.assertIsNotNone(create_file_task) ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # Try to GET test file from remote # This should raise an error with "No such file" as the directory # does not exist with self.assertRaises(Exception) as error: get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag) self.assertIsNotNone(get_test_task) ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() self.assertIn('No such file', str(error.exception))
def test_pickle_file_transfer_get(self): test_remote_file_content = \ "This is remote file content \n which is also multiline " \ "another line here \n this is last line. EOF" # create a test file remotely create_file_task = SSHOperator(task_id="test_create_file", ssh_hook=self.hook, command="echo '{0}' > {1}".format( test_remote_file_content, self.test_remote_filepath), do_xcom_push=True, dag=self.dag) self.assertIsNotNone(create_file_task) ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # get remote file to local get_test_task = SFTPOperator(task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag) self.assertIsNotNone(get_test_task) ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() # test the received content content_received = None with open(self.test_local_filepath, 'r') as file: content_received = file.read() self.assertEqual(content_received.strip(), test_remote_file_content)
def test_file_transfer_no_intermediate_dir_error_put(self): test_local_file_content = \ b"This is local file content \n which is multiline " \ b"continuing....with other character\nanother line here \n this is last line" # create a test file locally with open(self.test_local_filepath, 'wb') as file: file.write(test_local_file_content) # Try to put test file to remote # This should raise an error with "No such file" as the directory # does not exist with self.assertRaises(Exception) as error: put_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath_int_dir, operation=SFTPOperation.PUT, create_intermediate_dirs=False, dag=self.dag) self.assertIsNotNone(put_test_task) ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() self.assertIn('No such file', str(error.exception))
def test_pickle_file_transfer_put(self): test_local_file_content = ( b"This is local file content \n which is multiline " b"continuing....with other character\nanother line here \n this is last line" ) # create a test file locally with open(self.test_local_filepath, 'wb') as file: file.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="put_test_task", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, create_intermediate_dirs=True, dag=self.dag, ) assert put_test_task is not None ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() # check the remote file content check_file_task = SSHOperator( task_id="check_file_task", ssh_hook=self.hook, command=f"cat {self.test_remote_filepath}", do_xcom_push=True, dag=self.dag, ) assert check_file_task is not None ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() assert (ti3.xcom_pull( task_ids=check_file_task.task_id, key='return_value').strip() == test_local_file_content)
def test_file_transfer_with_intermediate_dir_error_get(self): test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command= f"echo '{test_remote_file_content}' > {self.test_remote_filepath}", do_xcom_push=True, dag=self.dag, ) assert create_file_task is not None ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # get remote file to local get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, create_intermediate_dirs=True, dag=self.dag, ) assert get_test_task is not None ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() # test the received content content_received = None with open(self.test_local_filepath_int_dir) as file: content_received = file.read() assert content_received.strip() == test_remote_file_content
def test_file_transfer_no_intermediate_dir_error_get(self): test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command= f"echo '{test_remote_file_content}' > {self.test_remote_filepath}", do_xcom_push=True, dag=self.dag, ) assert create_file_task is not None ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # Try to GET test file from remote # This should raise an error with "No such file" as the directory # does not exist with pytest.raises(Exception) as ctx: get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag, ) assert get_test_task is not None ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() assert 'No such file' in str(ctx.value)
queries = [] for service in services: delete = PythonOperator(task_id=f'delete_older_{service}_file', python_callable=delete_older_file, op_kwargs={'service': service}, dag=dag) query = PythonOperator(task_id=f'query_narrativedx_{service}', python_callable=query_narrativedx, op_kwargs={'service': service}, dag=dag) sftp = SFTPOperator( task_id=f'upload_{service}_to_sftp', ssh_conn_id='coh_sftp', local_filepath=str( basepath.joinpath(f'NarrativeDX - {service} - {exec_date}.csv')), remote_filepath=f'/sftp/NarrativeDX - {service} - {exec_date}.csv', operation='put', create_intermediate_dirs=True, dag=dag) # set each query downstream from the previous one in order not to slam the db if len(queries) > 0: queries[-1] >> query queries.append(query) delete >> query >> sftp
@task def delete_sftp_file(): """Delete a file at SFTP SERVER""" SFTPHook().delete_file(SFTP_FILE_COMPLETE_PATH) with DAG( "example_sftp_to_wasb", schedule_interval=None, catchup=False, start_date=datetime(2021, 1, 1), # Override to match your needs ) as dag: transfer_files_to_sftp_step = SFTPOperator( task_id="transfer_files_from_local_to_sftp", local_filepath=FILE_COMPLETE_PATH, remote_filepath=SFTP_FILE_COMPLETE_PATH, ) # [START how_to_sftp_to_wasb] transfer_files_to_azure = SFTPToWasbOperator( task_id="transfer_files_from_sftp_to_wasb", # SFTP args sftp_source_path=SFTP_SRC_PATH, # AZURE args container_name=AZURE_CONTAINER_NAME, blob_prefix=BLOB_PREFIX, ) # [END how_to_sftp_to_wasb] delete_blob_file_step = WasbDeleteBlobOperator(
command=query_cmd_patient, dag=dag) copy_patient = SSHOperator(ssh_conn_id='tableau_server', task_id='copy_claro_patient', command=copy_cmd_patient, dag=dag) encrypt_patient = BashOperator(task_id='encrypt_file_patient', bash_command=encrypt_cmd_patient, dag=dag) sftp_patient = SFTPOperator( task_id='upload_claro_to_sftp_patient', ssh_conn_id='claro_sftp', local_filepath=f'{basepath}/files/{output_file_patient}.gpg', remote_filepath=f'/{output_file_patient}.gpg', create_intermediate_dirs=True, dag=dag) query_patient >> copy_patient >> encrypt_patient >> sftp_patient # physician roster output_file_roster = 'Claro_Physician_Roster_{{ next_ds_nodash }}.txt' output_path_roster = f'C:\\Airflow\\claro\\{output_file_roster}' query_cmd_roster = (f'sqlcmd -S {claro_server} -d Clarity_PRD_Report ' f'-i {Variable.get("claro_query_filepath_roster")} ' f'-o {output_path_roster} ' f'-s"," -W -X -I -l 30 -h -1')