Exemplo n.º 1
0
    def execute(self, context):
        file_msg = None
        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException(
                    "can not operate without ssh_hook or ssh_conn_id")

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            ssh_client = self.ssh_hook.get_conn()
            sftp_client = ssh_client.open_sftp()
            if self.operation.lower() == SFTPOperation.GET:
                file_msg = "from {0} to {1}".format(self.remote_filepath,
                                                    self.local_filepath)
                self.log.debug("Starting to transfer %s", file_msg)
                sftp_client.get(self.remote_filepath, self.local_filepath)
            else:
                file_msg = "from {0} to {1}".format(self.local_filepath,
                                                    self.remote_filepath)
                self.log.debug("Starting to transfer file %s", file_msg)
                sftp_client.put(self.local_filepath,
                                self.remote_filepath,
                                confirm=self.confirm)

        except Exception as e:
            raise AirflowException(
                "Error while transferring {0}, error: {1}".format(
                    file_msg, str(e)))

        return None
    def setUp(self):
        configuration.load_test_config()
        from airflow.contrib.hooks.ssh_hook import SSHHook
        from airflow.hooks.S3_hook import S3Hook

        hook = SSHHook(ssh_conn_id='ssh_default')
        s3_hook = S3Hook('aws_default')
        hook.no_host_key_check = True
        args = {
            'owner': 'airflow',
            'start_date': DEFAULT_DATE,
            'provide_context': True
        }
        dag = DAG(TEST_DAG_ID + 'test_schedule_dag_once', default_args=args)
        dag.schedule_interval = '@once'

        self.hook = hook
        self.s3_hook = s3_hook

        self.ssh_client = self.hook.get_conn()
        self.sftp_client = self.ssh_client.open_sftp()

        self.dag = dag
        self.s3_bucket = BUCKET
        self.sftp_path = SFTP_PATH
        self.s3_key = S3_KEY
    def execute(self, context):
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        ssh_client = ssh_hook.get_conn()
        sftp_client = ssh_client.open_sftp()

        # Get list of files in sftp_path
        self.log.info(f"Getting list of files in sftp_path: `{self.sftp_folder_path}`")
        path_content = sftp_client.listdir(self.sftp_folder_path)
        files = [
            file for file in path_content if fnmatch.fnmatch(file, self.sftp_filename)
        ]
        sftp_object = None
        try:
            if not files:
                self.log.info(
                    f"No files found in folder that matches `{self.sftp_filename}` parameter."
                )
            for file in files:
                sftp_object = os.path.join(self.sftp_folder_path, file)
                sftp_client.remove(path=sftp_object)
                self.log.info(f"Deleted file `{sftp_object}`")
        except IOError as ex:
            # IOError raised by client does not consistently use the same
            # number of arguments when raised. When a file does not exist
            # the first argument is the error code `2`. If a folder is
            # passed then only a text error is used. If a permissions
            # error occurs then the first argument is error code 13.
            #
            # We only want to handle when a file does not exist, all other
            # exceptions should be reraised to fail the Airflow task.
            if ex.args[0] == 2:
                self.log.info(f"File does not exist `{sftp_object}`")
            else:
                raise
Exemplo n.º 4
0
 def execute(self, context):
     try:
         s3_hook = S3Hook(self.s3_conn_id)
         logging.info("Connected to S3 hook")
     except AirflowException as e:
         logging.info("Error in Connecting to S3 Hook")
         exit(1)
     try:
         ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
         logging.info("Connected to SSH Hook")
         ssh_client = ssh_hook.get_conn()
         sftp_client = ssh_client.open_sftp()
         logging.info("Connecting to SFTP")
     except AirflowException as e:
         logging.info("Error in Connecting to SFTP")
         exit(1)
     try:
         with NamedTemporaryFile("w") as f:
             sftp_client.get(self.sftp_path, f.name)
             s3_hook.load_file(filename=f.name,
                               key=self.s3_key,
                               bucket_name=self.s3_bucket,
                               replace=True)
             logging.info("SUCCEEDED")
     except AirflowException as e:
         logging.info("Transfer to S3 FAILED", str(e))
         exit(1)
Exemplo n.º 5
0
def runDAP(**kwargs):
    """
    Connects to App Server via SSH and executes script, capturing and reporting output.
    """

    sshSource = SSHHook(ssh_conn_id='DAP_App_Server')

    command = 'E:\\Airflow_Test\\DAP\DAPConsoleProcessor.exe -config "E:\\Airflow_Test\\DAP\\Configuration\\DAPOrderCancellation.xml" -jobname "DAPOrderCancellation.xml"'

    try:
        sshConn = sshSource.get_conn()

        stdIn, stdOut, stdErr = sshConn.exec_command(command=command)

        exitStatus = stdOut.channel.recv_exit_status()
        errorMessage = stdErr.read().decode('ascii')
        stdOutput = stdOut.read().decode('ascii')

        if exitStatus == 0:
            print('DAP Started Successfully.')

        if errorMessage:
            logging.error('DAP Processor Failure. See Exception')
            raise Exception(errorMessage)

    finally:
        print('Exit Status: {}'.format(exitStatus))
        print('StdOut: {}'.format(stdOutput))
        print('StdErr: {}'.format(errorMessage))

        if sshConn:
            sshConn.close()
    def setUp(self):
        from airflow.contrib.hooks.ssh_hook import SSHHook
        from airflow.hooks.S3_hook import S3Hook

        hook = SSHHook(ssh_conn_id='ssh_default')
        s3_hook = S3Hook('aws_default')
        hook.no_host_key_check = True
        args = {
            'owner': 'airflow',
            'start_date': DEFAULT_DATE,
            'provide_context': True
        }
        dag = DAG(TEST_DAG_ID + 'test_schedule_dag_once', default_args=args)
        dag.schedule_interval = '@once'

        self.hook = hook
        self.s3_hook = s3_hook

        self.ssh_client = self.hook.get_conn()
        self.sftp_client = self.ssh_client.open_sftp()

        self.dag = dag
        self.s3_bucket = BUCKET
        self.sftp_path = SFTP_PATH
        self.s3_key = S3_KEY
Exemplo n.º 7
0
    def execute(self, context):
        try:
            if self.ssh_conn_id:
                if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
                    self.log.info("ssh_conn_id is ignored when ssh_hook is provided.")
                else:
                    self.log.info("ssh_hook is not provided or invalid. " +
                                  "Trying ssh_conn_id to create SSHHook.")
                    self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("Cannot operate without ssh_hook or ssh_conn_id.")

            with self.ssh_hook.get_conn() as ssh_client:
                sftp_client = ssh_client.open_sftp()

                remote_folder = os.path.dirname(self.remote_filepath)
                if self.create_intermediate_dirs:
                    _make_intermediate_dirs(
                        sftp_client=sftp_client,
                        remote_directory=remote_folder,
                    )
                self.log.info("Starting to transfer file to %s", self.remote_filepath)

                file_contents_fo = StringIO(self.file_contents)
                sftp_client.putfo(file_contents_fo, self.remote_filepath)

                if self.file_mode is not None:
                    sftp_client.chmod(self.remote_filepath, self.file_mode)
        except Exception as e:
            raise AirflowException("Error while uploading to {0}, error: {1}"
                                   .format(self.remote_filepath, str(e)))

        return self.remote_filepath
Exemplo n.º 8
0
    def setUp(self):
        from airflow.contrib.hooks.ssh_hook import SSHHook

        hook = SSHHook(ssh_conn_id='ssh_default')
        hook.no_host_key_check = True
        args = {
            'owner': 'airflow',
            'start_date': DEFAULT_DATE,
        }
        dag = DAG(TEST_DAG_ID + 'test_schedule_dag_once', default_args=args)
        dag.schedule_interval = '@once'
        self.hook = hook
        self.dag = dag
        self.test_dir = "/tmp"
        self.test_local_dir = "/tmp/tmp2"
        self.test_remote_dir = "/tmp/tmp1"
        self.test_local_filename = 'test_local_file'
        self.test_remote_filename = 'test_remote_file'
        self.test_local_filepath = '{0}/{1}'.format(self.test_dir,
                                                    self.test_local_filename)
        # Local Filepath with Intermediate Directory
        self.test_local_filepath_int_dir = '{0}/{1}'.format(
            self.test_local_dir, self.test_local_filename)
        self.test_remote_filepath = '{0}/{1}'.format(self.test_dir,
                                                     self.test_remote_filename)
        # Remote Filepath with Intermediate Directory
        self.test_remote_filepath_int_dir = '{0}/{1}'.format(
            self.test_remote_dir, self.test_remote_filename)
 def setUp(self):
     configuration.load_test_config()
     from airflow.contrib.hooks.ssh_hook import SSHHook
     hook = SSHHook(ssh_conn_id='ssh_default')
     hook.no_host_key_check = True
     args = {
         'owner': 'airflow',
         'start_date': DEFAULT_DATE,
         'provide_context': True
     }
     dag = DAG(TEST_DAG_ID + 'test_schedule_dag_once', default_args=args)
     dag.schedule_interval = '@once'
     self.hook = hook
     self.dag = dag
     self.test_dir = "/tmp"
     self.test_local_dir = "/tmp/tmp2"
     self.test_remote_dir = "/tmp/tmp1"
     self.test_local_filename = 'test_local_file'
     self.test_remote_filename = 'test_remote_file'
     self.test_local_filepath = '{0}/{1}'.format(self.test_dir,
                                                 self.test_local_filename)
     # Local Filepath with Intermediate Directory
     self.test_local_filepath_int_dir = '{0}/{1}'.format(self.test_local_dir,
                                                         self.test_local_filename)
     self.test_remote_filepath = '{0}/{1}'.format(self.test_dir,
                                                  self.test_remote_filename)
     # Remote Filepath with Intermediate Directory
     self.test_remote_filepath_int_dir = '{0}/{1}'.format(self.test_remote_dir,
                                                          self.test_remote_filename)
Exemplo n.º 10
0
class SSHHookTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        from airflow.contrib.hooks.ssh_hook import SSHHook
        self.hook = SSHHook(ssh_conn_id='ssh_default', keepalive_interval=10)
        self.hook.no_host_key_check = True

    def test_ssh_connection(self):
        ssh_hook = self.hook.get_conn()
        self.assertIsNotNone(ssh_hook)

    def test_tunnel(self):
        print("Setting up remote listener")
        import subprocess
        import socket

        self.server_handle = subprocess.Popen(["python", "-c", HELLO_SERVER_CMD],
                                              stdout=subprocess.PIPE)
        print("Setting up tunnel")
        with self.hook.create_tunnel(2135, 2134):
            print("Tunnel up")
            server_output = self.server_handle.stdout.read(5)
            self.assertEqual(server_output, b"ready")
            print("Connecting to server via tunnel")
            s = socket.socket()
            s.connect(("localhost", 2135))
            print("Receiving...", )
            response = s.recv(5)
            self.assertEqual(response, b"hello")
            print("Closing connection")
            s.close()
            print("Waiting for listener...")
            output, _ = self.server_handle.communicate()
            self.assertEqual(self.server_handle.returncode, 0)
            print("Closing tunnel")
Exemplo n.º 11
0
    def execute(self, context):
        gcs_hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
            delegate_to=self.delegate_to)

        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException(
                    'can not operate without ssh_hook or ssh_conn_id')

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            with self.ssh_hook.get_conn() as ssh_client:
                sftp_client = ssh_client.open_sftp()

                with NamedTemporaryFile('wb') as temp_file:
                    sftp_client.get(self.remote_filepath, temp_file.name)
                    gcs_hook.upload(self.destination_gcs_bucket,
                                    self.destination_gcs_path, temp_file.name)
        except Exception as error_object:
            raise AirflowException(
                'Error while transferring. Error details: {1}'.format(
                    str(error_object)))

        return None
Exemplo n.º 12
0
    def execute(self, context):
        if not self._adls_hook:
            self._adls_hook = ADLSGen2Hook(
                container=self.adls_container,
                azure_data_lake_conn_id=self.azure_data_lake_conn_id,
            )
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        ssh_client = ssh_hook.get_conn()
        sftp_client = ssh_client.open_sftp()

        # Get list of files in ADLS folder
        source_files = [
            os.path.split(file)[1]  # get only the file portion of the path
            for file in self._get_adls_files()
            if fnmatch.fnmatch(os.path.split(file)[1], self.source_object)
        ]
        self.log.info(f"Source Files: `{source_files}`")

        # Get list of files in sftp_path
        try:
            self.log.info(
                f"Getting list of files in sftp_path: `{self.sftp_folder_path}`"
            )
            sftp_files = sftp_client.listdir(self.sftp_folder_path)
        except IOError as e:
            self.log.error(
                f"The folder `{self.sftp_folder_path}` does not exist on the sftp server."
            )
            raise e

        # determine the files to be processed. If all files are to be reloaded
        # then process all filesin the ADLS folder that match the `source object`.
        # If all files are not to be reloaded then only process files for which
        # the file name does not currently exist in the sftp folder
        if self.reload_all:
            files_to_process = source_files
            self.log.info(f"Files to process: `{files_to_process}`")
        else:
            self.log.info(f"Existing files in sftp folder: `{sftp_files}`")
            files_to_process = set(source_files) - set(sftp_files)
            self.log.info(f"Files to process: `{files_to_process}`")

        # create temporary folder and process files
        with tempfile.TemporaryDirectory() as temp_folder:
            for file in files_to_process:
                temp_path = os.path.join(temp_folder, file)
                adls_object = os.path.join(self.adls_folder_path, file)
                sftp_object = os.path.join(self.sftp_folder_path, file)

                self.log.info(f"Processing file: `{adls_object}`")
                self._adls_hook.download_file(
                    local_path=temp_path, remote_path=adls_object, overwrite=True
                )
                sftp_client.put(localpath=temp_path, remotepath=sftp_object)
                os.remove(temp_path)

        # Close ADLS Connection
        self._adls_hook.connection.close()
Exemplo n.º 13
0
    def execute(self, context):
        file_msg = None
        try:
            if self.ssh_conn_id:
                if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
                    self.log.info(
                        "ssh_conn_id is ignored when ssh_hook is provided.")
                else:
                    self.log.info("ssh_hook is not provided or invalid. " +
                                  "Trying ssh_conn_id to create SSHHook.")
                    self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException(
                    "Cannot operate without ssh_hook or ssh_conn_id.")

            if self.remote_host is not None:
                self.log.info(
                    "remote_host is provided explicitly. " +
                    "It will replace the remote_host which was defined " +
                    "in ssh_hook or predefined in connection of ssh_conn_id.")
                self.ssh_hook.remote_host = self.remote_host

            with self.ssh_hook.get_conn() as ssh_client:
                sftp_client = ssh_client.open_sftp()
                if self.operation.lower() == SFTPOperation.GET:
                    local_folder = os.path.dirname(self.local_filepath)
                    if self.create_intermediate_dirs:
                        # Create Intermediate Directories if it doesn't exist
                        try:
                            os.makedirs(local_folder)
                        except OSError:
                            if not os.path.isdir(local_folder):
                                raise
                    file_msg = "from {0} to {1}".format(
                        self.remote_filepath, self.local_filepath)
                    self.log.debug("Starting to transfer %s", file_msg)
                    sftp_client.get(self.remote_filepath, self.local_filepath)
                else:
                    remote_folder = os.path.dirname(self.remote_filepath)
                    if self.create_intermediate_dirs:
                        _make_intermediate_dirs(
                            sftp_client=sftp_client,
                            remote_directory=remote_folder,
                        )
                    file_msg = "from {0} to {1}".format(
                        self.local_filepath, self.remote_filepath)
                    self.log.debug("Starting to transfer file %s", file_msg)
                    sftp_client.put(self.local_filepath,
                                    self.remote_filepath,
                                    confirm=self.confirm)

        except Exception as e:
            raise AirflowException(
                "Error while transferring {0}, error: {1}".format(
                    file_msg, str(e)))

        return None
Exemplo n.º 14
0
def check_for_file_py(**kwargs):
        path = kwargs.get('path', None)
        sftp_conn_id = kwargs.get('sftp_conn_id', None)
        #filename = kwargs.get('templates_dict').get('filename', None)
        ssh_hook = SSHHook(ssh_conn_id=sftp_conn_id)
        sftp_client = ssh_hook.get_conn().open_sftp()
        ftp_files = sftp_client.listdir(path)
        for filename in ftp_files:
            print(filename)
            logging.info('Filename: ' + str(filename))
Exemplo n.º 15
0
    def execute(self, context):
        self.log.info("Going to start Bulk sftp to s3 operator")
        sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
        sftp_hook.no_host_key_check = True
        list_dir = sftp_hook.list_directory(self.sftp_path)

        if len(list_dir) < 1:
            self.log.info("Got no files to process. Skipping")
            return False

        self.log.info(f"Got {len(list_dir)} files to move")
        temp_files = []
        file_path_list = []
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        sftp_client = ssh_hook.get_conn().open_sftp()
        s3_hook = S3Hook(self.aws_conn_id)
        for file_name in list_dir:
            file_path = os.path.join(self.sftp_path, file_name)
            file_path_list.append(file_path)
            s3_key = str(os.path.join(self.dest_path, file_name))
            file_metadata = {"ftp": NamedTemporaryFile("w"), "s3_key": s3_key}
            for i in range(0, 5):
                try:
                    self.log.info(f"Downloading {file_path}")
                    sftp_client.get(file_path, file_metadata["ftp"].name)
                    file_metadata["ftp"].flush()
                    temp_files.append(file_metadata)
                    break
                except Exception:
                    self.log.info(
                        f"Got no response from server, waiting for next try number {(i + 1)}"
                    )
                    if i < 4:
                        time.sleep(2 ** i + random.random())
                        sftp_client = (
                            SSHHook(ssh_conn_id=self.sftp_conn_id)
                            .get_conn()
                            .open_sftp()
                        )
                    else:
                        raise

        self.log.info(f"Uploading to S3 with {self.workers} workers")
        with Pool(self.workers) as pool:
            pool.starmap(
                s3_hook.load_file,
                [
                    (x["ftp"].name, x["s3_key"], self.dest_bucket, True, False)
                    for x in temp_files
                ],
            )

        self.log.info("Finished executing Bulk sftp to s3 operator")
        return file_path_list
Exemplo n.º 16
0
    def execute(self, context):
        self.s3_key = self.get_s3_key(self.s3_key)
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        s3_hook = S3Hook(self.s3_conn_id)

        s3_client = s3_hook.get_conn()
        sftp_client = ssh_hook.get_conn().open_sftp()

        with NamedTemporaryFile("w") as f:
            s3_client.download_file(self.s3_bucket, self.s3_key, f.name)
            sftp_client.put(f.name, self.sftp_path)
Exemplo n.º 17
0
    def execute(self, context):
        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException(
                    "can not operate without ssh_hook or ssh_conn_id")

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            ssh_client = self.ssh_hook.get_conn()

            if not self.command:
                raise AirflowException(
                    "no command specified so nothing to execute here.")

            # Auto apply tty when its required in case of sudo
            get_pty = False
            if self.command.startswith('sudo'):
                get_pty = True

            # set timeout taken as params
            stdin, stdout, stderr = ssh_client.exec_command(
                command=self.command, get_pty=get_pty, timeout=self.timeout)
            stdin.close()
            output = b''
            for line in stdout:
                output += line.encode('utf-8')
                self.log.info(line.strip('\n'))

            exit_status = stdout.channel.recv_exit_status()
            if exit_status is 0:
                # only returning on output if do_xcom_push is set
                # otherwise its not suppose to be disclosed
                if self.do_xcom_push:
                    enable_pickling = configuration.getboolean(
                        'core', 'enable_xcom_pickling')
                    if enable_pickling:
                        return output
                    else:
                        return b64encode(output).decode('utf-8')

            else:
                error_msg = stderr.read()
                raise AirflowException(
                    "error running cmd: {0}, error: {1}".format(
                        self.command, error_msg))

        except Exception as e:
            raise AirflowException("SSH operator error: {0}".format(str(e)))

        return True
Exemplo n.º 18
0
    def execute(self, context):
        self.s3_key = self.get_s3_key(self.s3_key)
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        s3_hook = S3Hook(self.s3_conn_id)

        s3_client = s3_hook.get_conn()
        sftp_client = ssh_hook.get_conn().open_sftp()

        with NamedTemporaryFile("w") as f:
            s3_client.download_file(self.s3_bucket, self.s3_key, f.name)
            sftp_client.put(f.name, self.sftp_path)
Exemplo n.º 19
0
def get_crawler_report() -> str:
    """Get crawler report."""
    ssh = SSHHook(ssh_conn_id='ssh_big_airflow')
    client = ssh.get_conn()
    stdin, stdout, stderr = client.exec_command("""
    docker exec `docker ps  --filter name=bigscrapy_projects_airflow -q` \
    sh -c 'cat /bigcrawler-scrapy/summary.txt'
    """)
    message = "".join([line for line in stdout.readlines()])
    print(f'crawler_report: {message}')
    return message
Exemplo n.º 20
0
 def setUp(self):
     from airflow.contrib.hooks.ssh_hook import SSHHook
     hook = SSHHook(ssh_conn_id='ssh_default')
     hook.no_host_key_check = True
     args = {
         'owner': 'airflow',
         'start_date': DEFAULT_DATE,
     }
     dag = DAG(TEST_DAG_ID + 'test_schedule_dag_once', default_args=args)
     dag.schedule_interval = '@once'
     self.hook = hook
     self.dag = dag
Exemplo n.º 21
0
    def execute(self, context):
        if not self._adls_hook:
            self._adls_hook = ADLSGen2Hook(
                container=self.adls_container,
                azure_data_lake_conn_id=self.azure_data_lake_conn_id,
            )
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        ssh_client = ssh_hook.get_conn()
        sftp_client = ssh_client.open_sftp()

        # Get list of files in sftp_path
        self.log.info(f"Getting list of files in sftp_path: `{self.sftp_folder_path}`")
        path_content = sftp_client.listdir(self.sftp_folder_path)
        files = [
            file for file in path_content if fnmatch.fnmatch(file, self.sftp_filename)
        ]

        # Get files that already exist in the ADLS folder
        existing_files = self._get_adls_files()

        # Determine the files to be processed. If all files are to be reloaded then process all files
        # in the sftp file list. If all files are not to be reloaded then only process files for
        # which the file name does not currently exist in the ADLS folder
        if self.reload_all:
            files_to_process = files
        else:
            existing_set = {os.path.split(filename)[1] for filename in existing_files}
            files_to_process = set(files) - existing_set
            self.log.info(f"Existing files in ADLS: `{existing_set}`")
            self.log.info(f"Files to process: `{files_to_process}`")

        # create temporary folder and process files
        with tempfile.TemporaryDirectory() as temp_folder:
            for file in files_to_process:
                temp_path = os.path.join(temp_folder, file)
                adls_object = os.path.join(self.adls_folder_path, file)
                sftp_object = os.path.join(self.sftp_folder_path, file)

                self.log.info(f"Processing: `{sftp_object}`")
                try:
                    sftp_client.get(sftp_object, temp_path)
                    self._adls_hook.upload_file(
                        local_path=temp_path,
                        remote_path=adls_object,
                        overwrite=self.reload_all,
                    )
                    os.remove(temp_path)
                except IOError:
                    self.log.info(f"Skipping directory `{sftp_object}`.")

        # Close ADLS Connection
        self._adls_hook.connection.close()
 def test_conn_with_extra_parameters(self):
     from airflow.contrib.hooks.ssh_hook import SSHHook
     db.merge_conn(
         models.Connection(conn_id='ssh_with_extra',
                           host='localhost',
                           conn_type='ssh',
                           extra='{"compress" : true, "no_host_key_check" : "true"}'
                           )
     )
     ssh_hook = SSHHook(ssh_conn_id='ssh_with_extra', keepalive_interval=10)
     ssh_hook.get_conn()
     self.assertEqual(ssh_hook.compress, True)
     self.assertEqual(ssh_hook.no_host_key_check, True)
Exemplo n.º 23
0
 def test_conn_with_extra_parameters(self):
     from airflow.contrib.hooks.ssh_hook import SSHHook
     db.merge_conn(
         models.Connection(conn_id='ssh_with_extra',
                           host='localhost',
                           conn_type='ssh',
                           extra='{"compress" : true, "no_host_key_check" : "true"}'
                           )
     )
     ssh_hook = SSHHook(ssh_conn_id='ssh_with_extra', keepalive_interval=10)
     ssh_hook.get_conn()
     self.assertEqual(ssh_hook.compress, True)
     self.assertEqual(ssh_hook.no_host_key_check, True)
    def execute(self, context):
        self.s3_key = self.get_s3_key(self.s3_key)
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        s3_hook = S3Hook(self.s3_conn_id)

        sftp_client = ssh_hook.get_conn().open_sftp()

        with NamedTemporaryFile("w") as f:
            sftp_client.get(self.sftp_path, f.name)

            s3_hook.load_file(filename=f.name,
                              key=self.s3_key,
                              bucket_name=self.s3_bucket,
                              replace=True)
Exemplo n.º 25
0
 def setUp(self):
     configuration.load_test_config()
     from airflow.contrib.hooks.ssh_hook import SSHHook
     hook = SSHHook()
     hook.no_host_key_check = True
     args = {
         'owner': 'airflow',
         'start_date': DEFAULT_DATE,
         'provide_context': True
     }
     dag = DAG(TEST_DAG_ID + 'test_schedule_dag_once', default_args=args)
     dag.schedule_interval = '@once'
     self.hook = hook
     self.dag = dag
Exemplo n.º 26
0
    def execute(self, context):
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        gcs_hook = GoogleCloudStorageHook(self.google_cloud_storage_conn_id)

        sftp_client = ssh_hook.get_conn().open_sftp()

        with NamedTemporaryFile("w") as f:
            filename = f.name
            gcs_hook.download(bucket=self.gcs_bucket,
                              object=self.gcs_dest,
                              filename=filename)
            file_msg = "from {0} to {1}".format(filename, self.sftp_dest_path)
            self.log.info("Starting to transfer file %s", file_msg)
            sftp_client.put(filename, self.sftp_dest_path, confirm=True)
Exemplo n.º 27
0
 def setUp(self):
     configuration.load_test_config()
     from airflow.contrib.hooks.ssh_hook import SSHHook
     hook = SSHHook()
     hook.no_host_key_check = True
     args = {
         'owner': 'airflow',
         'start_date': DEFAULT_DATE,
         'provide_context': True
     }
     dag = DAG(TEST_DAG_ID+'test_schedule_dag_once', default_args=args)
     dag.schedule_interval = '@once'
     self.hook = hook
     self.dag = dag
Exemplo n.º 28
0
    def execute(self, context):
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        s3_hook = S3Hook(self.s3_conn_id)

        s3_files = s3_hook.list_keys(bucket_name=self.s3_bucket, prefix=self.s3_path)

        s3_client = s3_hook.get_conn()
        sftp_client = ssh_hook.get_conn().open_sftp()

        for key in s3_files:
            file_name = key.split("/")[-1]

            with NamedTemporaryFile("w") as f:
                s3_client.download_file(self.s3_bucket, key, f.name)
                sftp_client.put(f.name, os.path.join(self.sftp_path, file_name))
Exemplo n.º 29
0
    def test_ssh_connection_without_password(self, ssh_mock):
        hook = SSHHook(remote_host='remote_host',
                       port='port',
                       username='******',
                       timeout=10,
                       key_file='fake.file')

        with hook.get_conn():
            ssh_mock.return_value.connect.assert_called_once_with(
                hostname='remote_host',
                username='******',
                key_filename='fake.file',
                timeout=10,
                compress=True,
                port='port',
                sock=None)
Exemplo n.º 30
0
def get_sub_ssh_cmds_dag(parent_dag, task_id, args):
	ssh_dag = DAG(
		'%s.%s' % (parent_dag.dag_id, task_id),
		default_args=args,
		start_date=args['start_date'],
		schedule_interval=parent_dag.schedule_interval,
		)
	start = DummyOperator(
		task_id='ssh_start',
		dag=ssh_dag)
	end = DummyOperator(
		task_id='ssh_end',
		dag=ssh_dag)
	# generate the task to submit dynamically depending on the number of hive script that needs to be run 
	response = s3_client.list_objects_v2(Bucket=wk_conf.get('s3_bucket'),Prefix=wk_conf.get('s3_hive_script_location'))
	hive_scripts = [c.get('Key') for c in response.get('Contents')]
	if len(hive_scripts)>0:
		ssh_emr_hook = SSHHook(conn_id='ssh_emr_default')
		ssh_tasks = [ SSHExecuteOperator(
			task_id=str(key.replace(':','_').replace('/','_')),
            		ssh_hook=ssh_emr_hook,
            		bash_command='hive -f "s3://'+wk_conf.get('s3_bucket')+'/'+str(key)+'"',
			dag=ssh_dag) for key in hive_scripts if key.endswith('hql')]
	start.set_downstream(ssh_tasks)
	end.set_upstream(ssh_tasks)
	# if no hive scripts generrated short circuit step in the begining of main dag
	return ssh_dag
Exemplo n.º 31
0
    def execute(self, context):
        file_msg = None
        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("can not operate without ssh_hook or ssh_conn_id")

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            ssh_client = self.ssh_hook.get_conn()
            sftp_client = ssh_client.open_sftp()
            if self.operation.lower() == SFTPOperation.GET:
                file_msg = "from {0} to {1}".format(self.remote_filepath,
                                                    self.local_filepath)
                logging.debug("Starting to transfer {0}".format(file_msg))
                sftp_client.get(self.remote_filepath, self.local_filepath)
            else:
                file_msg = "from {0} to {1}".format(self.local_filepath,
                                                    self.remote_filepath)
                logging.debug("Starting to transfer file {0}".format(file_msg))
                sftp_client.put(self.local_filepath, self.remote_filepath)

        except Exception as e:
            raise AirflowException("Error while transferring {0}, error: {1}"
                                   .format(file_msg, str(e)))

        return None
    def execute(self, context):
        self.s3_key = self.get_s3_key(self.s3_key)
        ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id)
        s3_hook = S3Hook(self.s3_conn_id)

        sftp_client = ssh_hook.get_conn().open_sftp()

        with NamedTemporaryFile("w") as f:
            sftp_client.get(self.sftp_path, f.name)

            s3_hook.load_file(
                filename=f.name,
                key=self.s3_key,
                bucket_name=self.s3_bucket,
                replace=True
            )
    def execute(self, context):
        """
        :raises AirflowException: when the SSH endpoint of the HDI cluster cannot be found
        """
        azure_hook = AzureHDInsightHook(azure_conn_id=self.azure_conn_id)
        azure_conn_opts = azure_hook.get_connection(
            self.azure_conn_id).extra_dejson
        ssh_username = azure_conn_opts['SSH_USER_NAME']
        ssh_password = azure_conn_opts['SSH_PASSWORD']

        state = azure_hook.get_cluster_state(self.cluster_name)
        for endpoint in state.connectivity_endpoints:
            if endpoint.name == 'SSH':
                ssh_endpoint = endpoint.location
                ssh_port = endpoint.port

        if not ssh_endpoint:
            raise AirflowException(
                "Could not find SSH endpoint for cluster {}",
                self.cluster_name)

        self.ssh_hook = SSHHook(remote_host=ssh_endpoint,
                                port=ssh_port,
                                username=ssh_username,
                                password=ssh_password)

        self.log.info("Running SSH command on cluster (%s): %s",
                      self.cluster_name, self.command)
        super(AzureHDInsightSshOperator, self).execute(context)
Exemplo n.º 34
0
    def test_tunnel_without_password(self, ssh_mock):
        hook = SSHHook(remote_host='remote_host',
                       port='port',
                       username='******',
                       timeout=10,
                       key_file='fake.file')

        with hook.get_tunnel(1234):
            ssh_mock.assert_called_once_with('remote_host',
                                             ssh_port='port',
                                             ssh_username='******',
                                             ssh_pkey='fake.file',
                                             ssh_proxy=None,
                                             local_bind_address=('localhost', ),
                                             remote_bind_address=('localhost', 1234),
                                             host_pkey_directories=[],
                                             logger=hook.log)
Exemplo n.º 35
0
    def test_ssh_connection_without_password(self, ssh_mock):
        hook = SSHHook(remote_host='remote_host',
                       port='port',
                       username='******',
                       timeout=10,
                       key_file='fake.file')

        with hook.get_conn():
            ssh_mock.return_value.connect.assert_called_once_with(
                hostname='remote_host',
                username='******',
                key_filename='fake.file',
                timeout=10,
                compress=True,
                port='port',
                sock=None
            )
Exemplo n.º 36
0
    def test_tunnel_without_password(self, ssh_mock):
        hook = SSHHook(remote_host='remote_host',
                       port='port',
                       username='******',
                       timeout=10,
                       key_file='fake.file')

        with hook.get_tunnel(1234):
            ssh_mock.assert_called_once_with('remote_host',
                                             ssh_port='port',
                                             ssh_username='******',
                                             ssh_pkey='fake.file',
                                             ssh_proxy=None,
                                             local_bind_address=('localhost', ),
                                             remote_bind_address=('localhost', 1234),
                                             host_pkey_directories=[],
                                             logger=hook.log)
Exemplo n.º 37
0
    def test_tunnel(self):
        hook = SSHHook(ssh_conn_id='ssh_default')

        import subprocess
        import socket

        server_handle = subprocess.Popen(["python", "-c", HELLO_SERVER_CMD],
                                         stdout=subprocess.PIPE)
        with hook.create_tunnel(2135, 2134):
            server_output = server_handle.stdout.read(5)
            self.assertEqual(server_output, b"ready")
            s = socket.socket()
            s.connect(("localhost", 2135))
            response = s.recv(5)
            self.assertEqual(response, b"hello")
            s.close()
            output, _ = server_handle.communicate()
            self.assertEqual(server_handle.returncode, 0)
Exemplo n.º 38
0
    def test_tunnel(self):
        hook = SSHHook(ssh_conn_id='ssh_default')

        import subprocess
        import socket

        server_handle = subprocess.Popen(["python", "-c", HELLO_SERVER_CMD],
                                         stdout=subprocess.PIPE)
        with hook.create_tunnel(2135, 2134):
            server_output = server_handle.stdout.read(5)
            self.assertEqual(server_output, b"ready")
            s = socket.socket()
            s.connect(("localhost", 2135))
            response = s.recv(5)
            self.assertEqual(response, b"hello")
            s.close()
            output, _ = server_handle.communicate()
            self.assertEqual(server_handle.returncode, 0)
Exemplo n.º 39
0
    def test_ssh_connection_with_private_key_extra(self, ssh_mock):
        hook = SSHHook(
            ssh_conn_id=self.CONN_SSH_WITH_PRIVATE_KEY_EXTRA,
            remote_host='remote_host',
            port='port',
            username='******',
            timeout=10,
        )

        with hook.get_conn():
            ssh_mock.return_value.connect.assert_called_once_with(
                hostname='remote_host',
                username='******',
                pkey=TEST_PKEY,
                timeout=10,
                compress=True,
                port='port',
                sock=None)
Exemplo n.º 40
0
 def test_conn_with_extra_parameters(self):
     db.merge_conn(
         models.Connection(
             conn_id='ssh_with_extra',
             host='localhost',
             conn_type='ssh',
             extra='{"compress" : true, "no_host_key_check" : "true"}'))
     ssh_hook = SSHHook(ssh_conn_id='ssh_with_extra')
     self.assertEqual(ssh_hook.compress, True)
     self.assertEqual(ssh_hook.no_host_key_check, True)
Exemplo n.º 41
0
    def execute(self, context):
        file_msg = None
        try:
            if self.ssh_conn_id:
                if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
                    self.log.info("ssh_conn_id is ignored when ssh_hook is provided.")
                else:
                    self.log.info("ssh_hook is not provided or invalid. " +
                                  "Trying ssh_conn_id to create SSHHook.")
                    self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("Cannot operate without ssh_hook or ssh_conn_id.")

            if self.remote_host is not None:
                self.log.info("remote_host is provided explicitly. " +
                              "It will replace the remote_host which was defined " +
                              "in ssh_hook or predefined in connection of ssh_conn_id.")
                self.ssh_hook.remote_host = self.remote_host

            with self.ssh_hook.get_conn() as ssh_client:
                sftp_client = ssh_client.open_sftp()
                if self.operation.lower() == SFTPOperation.GET:
                    local_folder = os.path.dirname(self.local_filepath)
                    if self.create_intermediate_dirs:
                        # Create Intermediate Directories if it doesn't exist
                        try:
                            os.makedirs(local_folder)
                        except OSError:
                            if not os.path.isdir(local_folder):
                                raise
                    file_msg = "from {0} to {1}".format(self.remote_filepath,
                                                        self.local_filepath)
                    self.log.info("Starting to transfer %s", file_msg)
                    sftp_client.get(self.remote_filepath, self.local_filepath)
                else:
                    remote_folder = os.path.dirname(self.remote_filepath)
                    if self.create_intermediate_dirs:
                        _make_intermediate_dirs(
                            sftp_client=sftp_client,
                            remote_directory=remote_folder,
                        )
                    file_msg = "from {0} to {1}".format(self.local_filepath,
                                                        self.remote_filepath)
                    self.log.info("Starting to transfer file %s", file_msg)
                    sftp_client.put(self.local_filepath,
                                    self.remote_filepath,
                                    confirm=self.confirm)

        except Exception as e:
            raise AirflowException("Error while transferring {0}, error: {1}"
                                   .format(file_msg, str(e)))

        return self.local_filepath
Exemplo n.º 42
0
class SSHHookTest(unittest.TestCase):
    def setUp(self):
        configuration.test_mode()
        from airflow.contrib.hooks.ssh_hook import SSHHook
        self.hook = SSHHook()
        self.hook.no_host_key_check = True

    def test_remote_cmd(self):
        output = self.hook.check_output(["echo", "-n", "airflow"])
        self.assertEqual(output, b"airflow")

    def test_tunnel(self):
        print("Setting up remote listener")
        import subprocess
        import socket

        self.handle = self.hook.Popen([
            "python", "-c", '"{0}"'.format(HELLO_SERVER_CMD)
        ], stdout=subprocess.PIPE)

        print("Setting up tunnel")
        with self.hook.tunnel(2135, 2134):
            print("Tunnel up")
            server_output = self.handle.stdout.read(5)
            self.assertEqual(server_output, b"ready")
            print("Connecting to server via tunnel")
            s = socket.socket()
            s.connect(("localhost", 2135))
            print("Receiving...",)
            response = s.recv(5)
            self.assertEqual(response, b"hello")
            print("Closing connection")
            s.close()
            print("Waiting for listener...")
            output, _ = self.handle.communicate()
            self.assertEqual(self.handle.returncode, 0)
            print("Closing tunnel")
Exemplo n.º 43
0
    def execute(self, context):
        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("can not operate without ssh_hook or ssh_conn_id")

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            ssh_client = self.ssh_hook.get_conn()

            if not self.command:
                raise AirflowException("no command specified so nothing to execute here.")

            # Auto apply tty when its required in case of sudo
            get_pty = False
            if self.command.startswith('sudo'):
                get_pty = True

            # set timeout taken as params
            stdin, stdout, stderr = ssh_client.exec_command(command=self.command,
                                                            get_pty=get_pty,
                                                            timeout=self.timeout
                                                            )
            exit_status = stdout.channel.recv_exit_status()
            if exit_status is 0:
                # only returning on output if do_xcom_push is set
                # otherwise its not suppose to be disclosed
                if self.do_xcom_push:
                    enable_pickling = configuration.getboolean('core',
                                                               'enable_xcom_pickling')
                    if enable_pickling:
                        return stdout.read()
                    else:
                        return b64encode(stdout.read()).decode('utf-8')

            else:
                error_msg = stderr.read()
                raise AirflowException("error running cmd: {0}, error: {1}"
                                        .format(self.command, error_msg))

        except Exception as e:
            raise AirflowException("SSH operator error: {0}".format(str(e)))

        return True
Exemplo n.º 44
0
    def execute(self, context):
        file_msg = None
        try:
            if self.ssh_conn_id:
                if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
                    self.log.info("ssh_conn_id is ignored when ssh_hook is provided.")
                else:
                    self.log.info("ssh_hook is not provided or invalid. " +
                                  "Trying ssh_conn_id to create SSHHook.")
                    self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("Cannot operate without ssh_hook or ssh_conn_id.")

            if self.remote_host is not None:
                self.log.info("remote_host is provided explicitly. " +
                              "It will replace the remote_host which was defined " +
                              "in ssh_hook or predefined in connection of ssh_conn_id.")
                self.ssh_hook.remote_host = self.remote_host

            with self.ssh_hook.get_conn() as ssh_client:
                sftp_client = ssh_client.open_sftp()
                if self.operation.lower() == SFTPOperation.GET:
                    file_msg = "from {0} to {1}".format(self.remote_filepath,
                                                        self.local_filepath)
                    self.log.debug("Starting to transfer %s", file_msg)
                    sftp_client.get(self.remote_filepath, self.local_filepath)
                else:
                    file_msg = "from {0} to {1}".format(self.local_filepath,
                                                        self.remote_filepath)
                    self.log.debug("Starting to transfer file %s", file_msg)
                    sftp_client.put(self.local_filepath,
                                    self.remote_filepath,
                                    confirm=self.confirm)

        except Exception as e:
            raise AirflowException("Error while transferring {0}, error: {1}"
                                   .format(file_msg, str(e)))

        return None
Exemplo n.º 45
0
class SFTPOperator(BaseOperator):
    """
    SFTPOperator for transferring files from remote host to local or vice a versa.
    This operator uses ssh_hook to open sftp trasport channel that serve as basis
    for file transfer.

    :param ssh_hook: predefined ssh_hook to use for remote execution.
        Either `ssh_hook` or `ssh_conn_id` needs to be provided.
    :type ssh_hook: :class:`SSHHook`
    :param ssh_conn_id: connection id from airflow Connections.
        `ssh_conn_id` will be ingored if `ssh_hook` is provided.
    :type ssh_conn_id: str
    :param remote_host: remote host to connect (templated)
        Nullable. If provided, it will replace the `remote_host` which was
        defined in `ssh_hook` or predefined in the connection of `ssh_conn_id`.
    :type remote_host: str
    :param local_filepath: local file path to get or put. (templated)
    :type local_filepath: str
    :param remote_filepath: remote file path to get or put. (templated)
    :type remote_filepath: str
    :param operation: specify operation 'get' or 'put', defaults to put
    :type get: bool
    :param confirm: specify if the SFTP operation should be confirmed, defaults to True
    :type confirm: bool
    """
    template_fields = ('local_filepath', 'remote_filepath', 'remote_host')

    @apply_defaults
    def __init__(self,
                 ssh_hook=None,
                 ssh_conn_id=None,
                 remote_host=None,
                 local_filepath=None,
                 remote_filepath=None,
                 operation=SFTPOperation.PUT,
                 confirm=True,
                 *args,
                 **kwargs):
        super(SFTPOperator, self).__init__(*args, **kwargs)
        self.ssh_hook = ssh_hook
        self.ssh_conn_id = ssh_conn_id
        self.remote_host = remote_host
        self.local_filepath = local_filepath
        self.remote_filepath = remote_filepath
        self.operation = operation
        self.confirm = confirm
        if not (self.operation.lower() == SFTPOperation.GET or
                self.operation.lower() == SFTPOperation.PUT):
            raise TypeError("unsupported operation value {0}, expected {1} or {2}"
                            .format(self.operation, SFTPOperation.GET, SFTPOperation.PUT))

    def execute(self, context):
        file_msg = None
        try:
            if self.ssh_conn_id:
                if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
                    self.log.info("ssh_conn_id is ignored when ssh_hook is provided.")
                else:
                    self.log.info("ssh_hook is not provided or invalid. " +
                                  "Trying ssh_conn_id to create SSHHook.")
                    self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("Cannot operate without ssh_hook or ssh_conn_id.")

            if self.remote_host is not None:
                self.log.info("remote_host is provided explicitly. " +
                              "It will replace the remote_host which was defined " +
                              "in ssh_hook or predefined in connection of ssh_conn_id.")
                self.ssh_hook.remote_host = self.remote_host

            with self.ssh_hook.get_conn() as ssh_client:
                sftp_client = ssh_client.open_sftp()
                if self.operation.lower() == SFTPOperation.GET:
                    file_msg = "from {0} to {1}".format(self.remote_filepath,
                                                        self.local_filepath)
                    self.log.debug("Starting to transfer %s", file_msg)
                    sftp_client.get(self.remote_filepath, self.local_filepath)
                else:
                    file_msg = "from {0} to {1}".format(self.local_filepath,
                                                        self.remote_filepath)
                    self.log.debug("Starting to transfer file %s", file_msg)
                    sftp_client.put(self.local_filepath,
                                    self.remote_filepath,
                                    confirm=self.confirm)

        except Exception as e:
            raise AirflowException("Error while transferring {0}, error: {1}"
                                   .format(file_msg, str(e)))

        return None
Exemplo n.º 46
0
    def execute(self, context):
        try:
            if self.ssh_conn_id:
                if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
                    self.log.info("ssh_conn_id is ignored when ssh_hook is provided.")
                else:
                    self.log.info("ssh_hook is not provided or invalid. " +
                                  "Trying ssh_conn_id to create SSHHook.")
                    self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id,
                                            timeout=self.timeout)

            if not self.ssh_hook:
                raise AirflowException("Cannot operate without ssh_hook or ssh_conn_id.")

            if self.remote_host is not None:
                self.log.info("remote_host is provided explicitly. " +
                              "It will replace the remote_host which was defined " +
                              "in ssh_hook or predefined in connection of ssh_conn_id.")
                self.ssh_hook.remote_host = self.remote_host

            if not self.command:
                raise AirflowException("SSH command not specified. Aborting.")

            with self.ssh_hook.get_conn() as ssh_client:
                # Auto apply tty when its required in case of sudo
                get_pty = False
                if self.command.startswith('sudo'):
                    get_pty = True

                self.log.info("Running command: %s", self.command)

                # set timeout taken as params
                stdin, stdout, stderr = ssh_client.exec_command(command=self.command,
                                                                get_pty=get_pty,
                                                                timeout=self.timeout
                                                                )
                # get channels
                channel = stdout.channel

                # closing stdin
                stdin.close()
                channel.shutdown_write()

                agg_stdout = b''
                agg_stderr = b''

                # capture any initial output in case channel is closed already
                stdout_buffer_length = len(stdout.channel.in_buffer)

                if stdout_buffer_length > 0:
                    agg_stdout += stdout.channel.recv(stdout_buffer_length)

                # read from both stdout and stderr
                while not channel.closed or \
                        channel.recv_ready() or \
                        channel.recv_stderr_ready():
                    readq, _, _ = select([channel], [], [], self.timeout)
                    for c in readq:
                        if c.recv_ready():
                            line = stdout.channel.recv(len(c.in_buffer))
                            line = line
                            agg_stdout += line
                            self.log.info(line.decode('utf-8').strip('\n'))
                        if c.recv_stderr_ready():
                            line = stderr.channel.recv_stderr(len(c.in_stderr_buffer))
                            line = line
                            agg_stderr += line
                            self.log.warning(line.decode('utf-8').strip('\n'))
                    if stdout.channel.exit_status_ready()\
                            and not stderr.channel.recv_stderr_ready()\
                            and not stdout.channel.recv_ready():
                        stdout.channel.shutdown_read()
                        stdout.channel.close()
                        break

                stdout.close()
                stderr.close()

                exit_status = stdout.channel.recv_exit_status()
                if exit_status == 0:
                    enable_pickling = configuration.conf.getboolean(
                        'core', 'enable_xcom_pickling'
                    )
                    if enable_pickling:
                        return agg_stdout
                    else:
                        return b64encode(agg_stdout).decode('utf-8')

                else:
                    error_msg = agg_stderr.decode('utf-8')
                    raise AirflowException("error running cmd: {0}, error: {1}"
                                           .format(self.command, error_msg))

        except Exception as e:
            raise AirflowException("SSH operator error: {0}".format(str(e)))

        return True
Exemplo n.º 47
0
class SSHOperator(BaseOperator):

    """
    SSHOperator to execute commands on given remote host using the ssh_hook.

    :param ssh_hook: predefined ssh_hook to use for remote execution
    :type ssh_hook: :class:`SSHHook`
    :param ssh_conn_id: connection id from airflow Connections
    :type ssh_conn_id: str
    :param remote_host: remote host to connect
    :type remote_host: str
    :param command: command to execute on remote host
    :type command: str
    :param timeout: timeout for executing the command.
    :type timeout: int
    :param do_xcom_push: return the stdout which also get set in xcom by airflow platform
    :type do_xcom_push: bool
    """

    template_fields = ('command',)

    @apply_defaults
    def __init__(self,
                 ssh_hook=None,
                 ssh_conn_id=None,
                 remote_host=None,
                 command=None,
                 timeout=10,
                 do_xcom_push=False,
                 *args,
                 **kwargs):
        super(SSHOperator, self).__init__(*args, **kwargs)
        self.ssh_hook = ssh_hook
        self.ssh_conn_id = ssh_conn_id
        self.remote_host = remote_host
        self.command = command
        self.timeout = timeout
        self.do_xcom_push = do_xcom_push

    def execute(self, context):
        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("can not operate without ssh_hook or ssh_conn_id")

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            ssh_client = self.ssh_hook.get_conn()

            if not self.command:
                raise AirflowException("no command specified so nothing to execute here.")

            # Auto apply tty when its required in case of sudo
            get_pty = False
            if self.command.startswith('sudo'):
                get_pty = True

            # set timeout taken as params
            stdin, stdout, stderr = ssh_client.exec_command(command=self.command,
                                                            get_pty=get_pty,
                                                            timeout=self.timeout
                                                            )
            exit_status = stdout.channel.recv_exit_status()
            if exit_status is 0:
                # only returning on output if do_xcom_push is set
                # otherwise its not suppose to be disclosed
                if self.do_xcom_push:
                    return stdout.read()
            else:
                error_msg = stderr.read()
                raise AirflowException("error running cmd: {0}, error: {1}"
                                        .format(self.command, error_msg))

        except Exception as e:
            raise AirflowException("SSH operator error: {0}".format(str(e)))

        return True

    def tunnel(self):
        ssh_client = self.ssh_hook.get_conn()
        ssh_client.get_transport()
Exemplo n.º 48
0
    def execute(self, context):
        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("can not operate without ssh_hook or ssh_conn_id")

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            ssh_client = self.ssh_hook.get_conn()

            if not self.command:
                raise AirflowException("no command specified so nothing to execute here.")

            # Auto apply tty when its required in case of sudo
            get_pty = False
            if self.command.startswith('sudo'):
                get_pty = True

            # set timeout taken as params
            stdin, stdout, stderr = ssh_client.exec_command(command=self.command,
                                                            get_pty=get_pty,
                                                            timeout=self.timeout
                                                            )
            # get channels
            channel = stdout.channel

            # closing stdin
            stdin.close()
            channel.shutdown_write()

            agg_stdout = b''
            agg_stderr = b''

            # capture any initial output in case channel is closed already
            stdout_buffer_length = len(stdout.channel.in_buffer)

            if stdout_buffer_length > 0:
                agg_stdout += stdout.channel.recv(stdout_buffer_length)

            # read from both stdout and stderr
            while not channel.closed or channel.recv_ready() or channel.recv_stderr_ready():
                readq, _, _ = select([channel], [], [], self.timeout)
                for c in readq:
                    if c.recv_ready():
                        line = stdout.channel.recv(len(c.in_buffer))
                        line = line
                        agg_stdout += line
                        self.log.info(line.decode('utf-8').strip('\n'))
                    if c.recv_stderr_ready():
                        line = stderr.channel.recv_stderr(len(c.in_stderr_buffer))
                        line = line
                        agg_stderr += line
                        self.log.warning(line.decode('utf-8').strip('\n'))
                if stdout.channel.exit_status_ready()\
                        and not stderr.channel.recv_stderr_ready()\
                        and not stdout.channel.recv_ready():
                    stdout.channel.shutdown_read()
                    stdout.channel.close()
                    break

            stdout.close()
            stderr.close()

            exit_status = stdout.channel.recv_exit_status()
            if exit_status is 0:
                # returning output if do_xcom_push is set
                if self.do_xcom_push:
                    enable_pickling = configuration.getboolean('core',
                                                               'enable_xcom_pickling')
                    if enable_pickling:
                        return agg_stdout
                    else:
                        return b64encode(agg_stdout).decode('utf-8')

            else:
                error_msg = agg_stderr.decode('utf-8')
                raise AirflowException("error running cmd: {0}, error: {1}"
                                       .format(self.command, error_msg))

        except Exception as e:
            raise AirflowException("SSH operator error: {0}".format(str(e)))

        return True
Exemplo n.º 49
0
class SFTPOperator(BaseOperator):
    """
    SFTPOperator for transferring files from remote host to local or vice a versa.
    This operator uses ssh_hook to open sftp trasport channel that serve as basis
    for file transfer.

    :param ssh_hook: predefined ssh_hook to use for remote execution
    :type ssh_hook: :class:`SSHHook`
    :param ssh_conn_id: connection id from airflow Connections
    :type ssh_conn_id: str
    :param remote_host: remote host to connect
    :type remote_host: str
    :param local_filepath: local file path to get or put
    :type local_filepath: str
    :param remote_filepath: remote file path to get or put
    :type remote_filepath: str
    :param operation: specify operation 'get' or 'put', defaults to get
    :type get: bool
    """
    template_fields = ('local_filepath', 'remote_filepath')

    @apply_defaults
    def __init__(self,
                 ssh_hook=None,
                 ssh_conn_id=None,
                 remote_host=None,
                 local_filepath=None,
                 remote_filepath=None,
                 operation=SFTPOperation.PUT,
                 *args,
                 **kwargs):
        super(SFTPOperator, self).__init__(*args, **kwargs)
        self.ssh_hook = ssh_hook
        self.ssh_conn_id = ssh_conn_id
        self.remote_host = remote_host
        self.local_filepath = local_filepath
        self.remote_filepath = remote_filepath
        self.operation = operation
        if not (self.operation.lower() == SFTPOperation.GET or self.operation.lower() == SFTPOperation.PUT):
            raise TypeError("unsupported operation value {0}, expected {1} or {2}"
                            .format(self.operation, SFTPOperation.GET, SFTPOperation.PUT))

    def execute(self, context):
        file_msg = None
        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("can not operate without ssh_hook or ssh_conn_id")

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            ssh_client = self.ssh_hook.get_conn()
            sftp_client = ssh_client.open_sftp()
            if self.operation.lower() == SFTPOperation.GET:
                file_msg = "from {0} to {1}".format(self.remote_filepath,
                                                    self.local_filepath)
                logging.debug("Starting to transfer {0}".format(file_msg))
                sftp_client.get(self.remote_filepath, self.local_filepath)
            else:
                file_msg = "from {0} to {1}".format(self.local_filepath,
                                                    self.remote_filepath)
                logging.debug("Starting to transfer file {0}".format(file_msg))
                sftp_client.put(self.local_filepath, self.remote_filepath)

        except Exception as e:
            raise AirflowException("Error while transferring {0}, error: {1}"
                                   .format(file_msg, str(e)))

        return None
Exemplo n.º 50
0
class SSHOperator(BaseOperator):
    """
    SSHOperator to execute commands on given remote host using the ssh_hook.

    :param ssh_hook: predefined ssh_hook to use for remote execution
    :type ssh_hook: :class:`SSHHook`
    :param ssh_conn_id: connection id from airflow Connections
    :type ssh_conn_id: str
    :param remote_host: remote host to connect
    :type remote_host: str
    :param command: command to execute on remote host
    :type command: str
    :param timeout: timeout (in seconds) for executing the command.
    :type timeout: int
    :param do_xcom_push: return the stdout which also get set in xcom by airflow platform
    :type do_xcom_push: bool
    """

    template_fields = ('command',)

    @apply_defaults
    def __init__(self,
                 ssh_hook=None,
                 ssh_conn_id=None,
                 remote_host=None,
                 command=None,
                 timeout=10,
                 do_xcom_push=False,
                 *args,
                 **kwargs):
        super(SSHOperator, self).__init__(*args, **kwargs)
        self.ssh_hook = ssh_hook
        self.ssh_conn_id = ssh_conn_id
        self.remote_host = remote_host
        self.command = command
        self.timeout = timeout
        self.do_xcom_push = do_xcom_push

    def execute(self, context):
        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("can not operate without ssh_hook or ssh_conn_id")

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            ssh_client = self.ssh_hook.get_conn()

            if not self.command:
                raise AirflowException("no command specified so nothing to execute here.")

            # Auto apply tty when its required in case of sudo
            get_pty = False
            if self.command.startswith('sudo'):
                get_pty = True

            # set timeout taken as params
            stdin, stdout, stderr = ssh_client.exec_command(command=self.command,
                                                            get_pty=get_pty,
                                                            timeout=self.timeout
                                                            )
            # get channels
            channel = stdout.channel

            # closing stdin
            stdin.close()
            channel.shutdown_write()

            agg_stdout = b''
            agg_stderr = b''

            # capture any initial output in case channel is closed already
            stdout_buffer_length = len(stdout.channel.in_buffer)

            if stdout_buffer_length > 0:
                agg_stdout += stdout.channel.recv(stdout_buffer_length)

            # read from both stdout and stderr
            while not channel.closed or channel.recv_ready() or channel.recv_stderr_ready():
                readq, _, _ = select([channel], [], [], self.timeout)
                for c in readq:
                    if c.recv_ready():
                        line = stdout.channel.recv(len(c.in_buffer))
                        line = line
                        agg_stdout += line
                        self.log.info(line.decode('utf-8').strip('\n'))
                    if c.recv_stderr_ready():
                        line = stderr.channel.recv_stderr(len(c.in_stderr_buffer))
                        line = line
                        agg_stderr += line
                        self.log.warning(line.decode('utf-8').strip('\n'))
                if stdout.channel.exit_status_ready()\
                        and not stderr.channel.recv_stderr_ready()\
                        and not stdout.channel.recv_ready():
                    stdout.channel.shutdown_read()
                    stdout.channel.close()
                    break

            stdout.close()
            stderr.close()

            exit_status = stdout.channel.recv_exit_status()
            if exit_status is 0:
                # returning output if do_xcom_push is set
                if self.do_xcom_push:
                    enable_pickling = configuration.getboolean('core',
                                                               'enable_xcom_pickling')
                    if enable_pickling:
                        return agg_stdout
                    else:
                        return b64encode(agg_stdout).decode('utf-8')

            else:
                error_msg = agg_stderr.decode('utf-8')
                raise AirflowException("error running cmd: {0}, error: {1}"
                                       .format(self.command, error_msg))

        except Exception as e:
            raise AirflowException("SSH operator error: {0}".format(str(e)))

        return True

    def tunnel(self):
        ssh_client = self.ssh_hook.get_conn()
        ssh_client.get_transport()
Exemplo n.º 51
0
 def setUp(self):
     configuration.test_mode()
     from airflow.contrib.hooks.ssh_hook import SSHHook
     self.hook = SSHHook()
     self.hook.no_host_key_check = True
Exemplo n.º 52
0
 def setUp(self):
     configuration.load_test_config()
     from airflow.contrib.hooks.ssh_hook import SSHHook
     self.hook = SSHHook(ssh_conn_id='ssh_default', keepalive_interval=10)
     self.hook.no_host_key_check = True
Exemplo n.º 53
0
class SSHOperator(BaseOperator):
    """
    SSHOperator to execute commands on given remote host using the ssh_hook.

    :param ssh_hook: predefined ssh_hook to use for remote execution.
        Either `ssh_hook` or `ssh_conn_id` needs to be provided.
    :type ssh_hook: airflow.contrib.hooks.ssh_hook.SSHHook
    :param ssh_conn_id: connection id from airflow Connections.
        `ssh_conn_id` will be ignored if `ssh_hook` is provided.
    :type ssh_conn_id: str
    :param remote_host: remote host to connect (templated)
        Nullable. If provided, it will replace the `remote_host` which was
        defined in `ssh_hook` or predefined in the connection of `ssh_conn_id`.
    :type remote_host: str
    :param command: command to execute on remote host. (templated)
    :type command: str
    :param timeout: timeout (in seconds) for executing the command.
    :type timeout: int
    """

    template_fields = ('command', 'remote_host')
    template_ext = ('.sh',)

    @apply_defaults
    def __init__(self,
                 ssh_hook=None,
                 ssh_conn_id=None,
                 remote_host=None,
                 command=None,
                 timeout=10,
                 *args,
                 **kwargs):
        super(SSHOperator, self).__init__(*args, **kwargs)
        self.ssh_hook = ssh_hook
        self.ssh_conn_id = ssh_conn_id
        self.remote_host = remote_host
        self.command = command
        self.timeout = timeout

    def execute(self, context):
        try:
            if self.ssh_conn_id:
                if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
                    self.log.info("ssh_conn_id is ignored when ssh_hook is provided.")
                else:
                    self.log.info("ssh_hook is not provided or invalid. " +
                                  "Trying ssh_conn_id to create SSHHook.")
                    self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id,
                                            timeout=self.timeout)

            if not self.ssh_hook:
                raise AirflowException("Cannot operate without ssh_hook or ssh_conn_id.")

            if self.remote_host is not None:
                self.log.info("remote_host is provided explicitly. " +
                              "It will replace the remote_host which was defined " +
                              "in ssh_hook or predefined in connection of ssh_conn_id.")
                self.ssh_hook.remote_host = self.remote_host

            if not self.command:
                raise AirflowException("SSH command not specified. Aborting.")

            with self.ssh_hook.get_conn() as ssh_client:
                # Auto apply tty when its required in case of sudo
                get_pty = False
                if self.command.startswith('sudo'):
                    get_pty = True

                self.log.info("Running command: %s", self.command)

                # set timeout taken as params
                stdin, stdout, stderr = ssh_client.exec_command(command=self.command,
                                                                get_pty=get_pty,
                                                                timeout=self.timeout
                                                                )
                # get channels
                channel = stdout.channel

                # closing stdin
                stdin.close()
                channel.shutdown_write()

                agg_stdout = b''
                agg_stderr = b''

                # capture any initial output in case channel is closed already
                stdout_buffer_length = len(stdout.channel.in_buffer)

                if stdout_buffer_length > 0:
                    agg_stdout += stdout.channel.recv(stdout_buffer_length)

                # read from both stdout and stderr
                while not channel.closed or \
                        channel.recv_ready() or \
                        channel.recv_stderr_ready():
                    readq, _, _ = select([channel], [], [], self.timeout)
                    for c in readq:
                        if c.recv_ready():
                            line = stdout.channel.recv(len(c.in_buffer))
                            line = line
                            agg_stdout += line
                            self.log.info(line.decode('utf-8').strip('\n'))
                        if c.recv_stderr_ready():
                            line = stderr.channel.recv_stderr(len(c.in_stderr_buffer))
                            line = line
                            agg_stderr += line
                            self.log.warning(line.decode('utf-8').strip('\n'))
                    if stdout.channel.exit_status_ready()\
                            and not stderr.channel.recv_stderr_ready()\
                            and not stdout.channel.recv_ready():
                        stdout.channel.shutdown_read()
                        stdout.channel.close()
                        break

                stdout.close()
                stderr.close()

                exit_status = stdout.channel.recv_exit_status()
                if exit_status == 0:
                    enable_pickling = configuration.conf.getboolean(
                        'core', 'enable_xcom_pickling'
                    )
                    if enable_pickling:
                        return agg_stdout
                    else:
                        return b64encode(agg_stdout).decode('utf-8')

                else:
                    error_msg = agg_stderr.decode('utf-8')
                    raise AirflowException("error running cmd: {0}, error: {1}"
                                           .format(self.command, error_msg))

        except Exception as e:
            raise AirflowException("SSH operator error: {0}".format(str(e)))

        return True

    def tunnel(self):
        ssh_client = self.ssh_hook.get_conn()
        ssh_client.get_transport()
Exemplo n.º 54
0
class SFTPOperator(BaseOperator):
    """
    SFTPOperator for transferring files from remote host to local or vice a versa.
    This operator uses ssh_hook to open sftp transport channel that serve as basis
    for file transfer.

    :param ssh_hook: predefined ssh_hook to use for remote execution.
        Either `ssh_hook` or `ssh_conn_id` needs to be provided.
    :type ssh_hook: airflow.contrib.hooks.ssh_hook.SSHHook
    :param ssh_conn_id: connection id from airflow Connections.
        `ssh_conn_id` will be ignored if `ssh_hook` is provided.
    :type ssh_conn_id: str
    :param remote_host: remote host to connect (templated)
        Nullable. If provided, it will replace the `remote_host` which was
        defined in `ssh_hook` or predefined in the connection of `ssh_conn_id`.
    :type remote_host: str
    :param local_filepath: local file path to get or put. (templated)
    :type local_filepath: str
    :param remote_filepath: remote file path to get or put. (templated)
    :type remote_filepath: str
    :param operation: specify operation 'get' or 'put', defaults to put
    :type operation: str
    :param confirm: specify if the SFTP operation should be confirmed, defaults to True
    :type confirm: bool
    :param create_intermediate_dirs: create missing intermediate directories when
        copying from remote to local and vice-versa. Default is False.

        Example: The following task would copy ``file.txt`` to the remote host
        at ``/tmp/tmp1/tmp2/`` while creating ``tmp``,``tmp1`` and ``tmp2`` if they
        don't exist. If the parameter is not passed it would error as the directory
        does not exist. ::

            put_file = SFTPOperator(
                task_id="test_sftp",
                ssh_conn_id="ssh_default",
                local_filepath="/tmp/file.txt",
                remote_filepath="/tmp/tmp1/tmp2/file.txt",
                operation="put",
                create_intermediate_dirs=True,
                dag=dag
            )

    :type create_intermediate_dirs: bool
    """
    template_fields = ('local_filepath', 'remote_filepath', 'remote_host')

    @apply_defaults
    def __init__(self,
                 ssh_hook=None,
                 ssh_conn_id=None,
                 remote_host=None,
                 local_filepath=None,
                 remote_filepath=None,
                 operation=SFTPOperation.PUT,
                 confirm=True,
                 create_intermediate_dirs=False,
                 *args,
                 **kwargs):
        super(SFTPOperator, self).__init__(*args, **kwargs)
        self.ssh_hook = ssh_hook
        self.ssh_conn_id = ssh_conn_id
        self.remote_host = remote_host
        self.local_filepath = local_filepath
        self.remote_filepath = remote_filepath
        self.operation = operation
        self.confirm = confirm
        self.create_intermediate_dirs = create_intermediate_dirs
        if not (self.operation.lower() == SFTPOperation.GET or
                self.operation.lower() == SFTPOperation.PUT):
            raise TypeError("unsupported operation value {0}, expected {1} or {2}"
                            .format(self.operation, SFTPOperation.GET, SFTPOperation.PUT))

    def execute(self, context):
        file_msg = None
        try:
            if self.ssh_conn_id:
                if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
                    self.log.info("ssh_conn_id is ignored when ssh_hook is provided.")
                else:
                    self.log.info("ssh_hook is not provided or invalid. " +
                                  "Trying ssh_conn_id to create SSHHook.")
                    self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("Cannot operate without ssh_hook or ssh_conn_id.")

            if self.remote_host is not None:
                self.log.info("remote_host is provided explicitly. " +
                              "It will replace the remote_host which was defined " +
                              "in ssh_hook or predefined in connection of ssh_conn_id.")
                self.ssh_hook.remote_host = self.remote_host

            with self.ssh_hook.get_conn() as ssh_client:
                sftp_client = ssh_client.open_sftp()
                if self.operation.lower() == SFTPOperation.GET:
                    local_folder = os.path.dirname(self.local_filepath)
                    if self.create_intermediate_dirs:
                        # Create Intermediate Directories if it doesn't exist
                        try:
                            os.makedirs(local_folder)
                        except OSError:
                            if not os.path.isdir(local_folder):
                                raise
                    file_msg = "from {0} to {1}".format(self.remote_filepath,
                                                        self.local_filepath)
                    self.log.info("Starting to transfer %s", file_msg)
                    sftp_client.get(self.remote_filepath, self.local_filepath)
                else:
                    remote_folder = os.path.dirname(self.remote_filepath)
                    if self.create_intermediate_dirs:
                        _make_intermediate_dirs(
                            sftp_client=sftp_client,
                            remote_directory=remote_folder,
                        )
                    file_msg = "from {0} to {1}".format(self.local_filepath,
                                                        self.remote_filepath)
                    self.log.info("Starting to transfer file %s", file_msg)
                    sftp_client.put(self.local_filepath,
                                    self.remote_filepath,
                                    confirm=self.confirm)

        except Exception as e:
            raise AirflowException("Error while transferring {0}, error: {1}"
                                   .format(file_msg, str(e)))

        return self.local_filepath
Exemplo n.º 55
0
 def test_ssh_connection(self):
     hook = SSHHook(ssh_conn_id='ssh_default')
     with hook.get_conn() as client:
         (_, stdout, _) = client.exec_command('ls')
         self.assertIsNotNone(stdout.read())