def _execute(self, cmd, **kwargs): """ Execute a command on a remote host. Parameters ---------- cmd : string Command to be executed on remote host. kwargs : keywords Options to pass to subprocess.Popen. Returns ------- proc : Popen subprocess Subprocess used to run remote command. """ logger.info('Sending command...') template = 'ssh {login} -T -o ControlPath={socket} << EOF\n{cmd}\nEOF' config = dict(self._subprocess_config) config.update(kwargs) return run_in_subprocess(template.format(login=self._login_info, socket=self._socket_path, cmd=cmd), check_output=True, **config)
def _disconnect(self): """ Exit persistent connection to remote host. """ # Send exit request to control socket. cmd = "ssh {login} -T -S {socket} -O exit".format( login=self._login_info, socket=self._socket_path) proc = run_in_subprocess(cmd)
def _is_connected(self): """ Return whether SSHClient is connected by checking the control socket. """ logger.info('Checking control socket...') cmd = "ssh {login} -T -S {socket} -O check".format( login=self._login_info, socket=self._socket_path) proc = run_in_subprocess(cmd) return proc.returncode == 0
def _port_forward_stop(self, local_port, remote_host, remote_port, connection): logger.info('Cancelling port forward...') cmd_template = 'ssh {login} -T -O cancel -S {socket} -L localhost:{local_port}:{remote_host}:{remote_port}' cmd = cmd_template.format(login=self._login_info, socket=self._socket_path, local_port=local_port, remote_host=remote_host, remote_port=remote_port) proc = run_in_subprocess(cmd) logger.info('Port forward succesfully stopped.' if proc.returncode == 0 else 'Failed to stop port forwarding.')
def _is_connected(self): cmd = "ssh {login} -T -S {socket} -O check".format(login=self._login_info, socket=self._socket_path) proc = run_in_subprocess(cmd) if proc.returncode != 0: if os.path.exists(self._socket_path): os.remove(self._socket_path) return False return True
def _run_in_hivecli(self, cmd): """Run a query using hive cli in a subprocess.""" # Turn hive command into quotable string. double_escaped = re.sub('\\' * 2, '\\' * 4, cmd) sys_cmd = 'hive -e "{0}"'.format(re.sub('"', '\\"', double_escaped)) # Execute command in a subprocess. if self.remote: proc = self.remote.execute(sys_cmd) else: proc = run_in_subprocess(sys_cmd, check_output=True) return proc
def _is_connected(self): """ Return whether SSHClient is connected by checking the control socket. """ cmd = "ssh {login} -T -S {socket} -O check".format( login=self._login_info, socket=self._socket_path) proc = run_in_subprocess(cmd) if proc.returncode != 0: if os.path.exists(self._socket_path): os.remove(self._socket_path) return False return True
def download(self, source, dest=None, overwrite=False, fs=None): """ Download files to another filesystem. This method (recursively) downloads a file/folder from path `source` on this filesystem to the path `dest` on filesytem `fs`, overwriting any existing file if `overwrite` is `True`. Args: source (str): The path on this filesystem of the file to download to the nominated filesystem (`fs`). If `source` ends with '/' then contents of the the `source` directory will be copied into destination folder, and will throw an error if path does not resolve to a directory. dest (str): The destination path on filesystem (`fs`). If not specified, the file/folder is uploaded into the default path, usually one's home folder. If `dest` ends with '/', and corresponds to a directory, the contents of source will be copied instead of copying the entire folder. If `dest` is otherwise a directory, an exception will be raised. overwrite (bool): `True` if the contents of any existing file by the same name should be overwritten, `False` otherwise. fs (FileSystemClient): The FileSystemClient into which the nominated file/folder `source` should be downloaded. If not specified, defaults to the local filesystem. SSHClient Quirks: This method is overloaded so that remote-to-local downloads can be handled specially using `scp`. Downloads to any non-local filesystem are handled using the standard implementation. """ from ..filesystems.local import LocalFsClient if fs is None or isinstance(fs, LocalFsClient): self.connect() logger.info('Copying file to local...') dest = dest or posixpath.basename(source) cmd = ( "scp -r -o ControlPath={socket} {login}:'{remote_file}' '{local_file}'" .format( socket=self._socket_path, login=self._login_info, remote_file=dest.replace('"', r'\"'), local_file=source.replace('"', r'\"'), # quote escaped for bash )) proc = run_in_subprocess(cmd, check_output=True) logger.info(proc.stderr or 'Success') else: return super(RemoteClient, self).download(source, dest, overwrite, fs)
def _port_forward_start(self, local_port, remote_host, remote_port): self.connect() logger.info('Establishing port forward...') cmd_template = 'ssh {login} -T -O forward -S {socket} -L localhost:{local_port}:{remote_host}:{remote_port}' cmd = cmd_template.format(login=self._login_info, socket=self._socket_path, local_port=local_port, remote_host=remote_host, remote_port=remote_port) proc = run_in_subprocess(cmd) if proc.returncode != 0: raise Exception('Unable to port forward with command: {}'.format(cmd)) logger.info(proc.stderr or 'Success') return proc
def _run_in_hivecli(self, cmd): """Run a query using hive cli in a subprocess.""" # Turn hive command into quotable string. double_escaped = re.sub('\\' * 2, '\\' * 4, cmd) sys_cmd = 'hive -e "{0}"'.format(re.sub('"', '\\"', double_escaped)) # Execute command in a subprocess. if self.remote: proc = self.remote.execute(sys_cmd) elif self.__env__.allows_direct_querying(): proc = run_in_subprocess(sys_cmd, check_output=True) else: raise Exception( "No ssh connection and environment does not allow direct databases" ) return proc
def update_host_keys(self): """ This method updates the SSH host-keys stored in `~/.ssh/known_hosts`, allowing one to successfully connect to hosts when servers are, for example, redeployed and have different host keys. """ assert not self.remote, "Updating host key only works for local connections." cmd = "ssh-keygen -R {host} && ssh-keyscan {host} >> ~/.ssh/known_hosts".format(host=self.host) proc = run_in_subprocess(cmd, True) if proc.returncode != 0: raise RuntimeError( "Could not update host keys! Please handle this manually. The " "error was:\n" + '\n'.join([proc.stdout.decode('utf-8'), proc.stderr.decode('utf-8')]) ) return proc.returncode == 0
def download(self, source, dest=None, overwrite=False, fs=None): """ This method (recursively) downloads a file/folder from path `source` on this filesystem to the path `dest` on filesytem `fs`, overwriting any existing file if `overwrite` is `True`. Parameters: source (str): The path on this filesystem of the file to download to the nominated filesystem (`fs`). If `source` ends with '/' then contents of the the `source` directory will be copied into destination folder, and will throw an error if path does not resolve to a directory. dest (str): The destination path on filesystem (`fs`). If not specified, the file/folder is uploaded into the default path, usually one's home folder. If `dest` ends with '/', and corresponds to a directory, the contents of source will be copied instead of copying the entire folder. If `dest` is otherwise a directory, an exception will be raised. overwrite (bool): `True` if the contents of any existing file by the same name should be overwritten, `False` otherwise. fs (FileSystemClient): The FileSystemClient into which the nominated file/folder `source` should be downloaded. If not specified, defaults to the local filesystem. SSHClient Quirks: This method is overloaded so that remote-to-local downloads can be handled specially using `scp`. Downloads to any non-local filesystem are handled using the standard implementation. """ from ..filesystems.local import LocalFsClient if fs is None or isinstance(fs, LocalFsClient): self.connect() logger.info('Copying file to local...') dest = dest or posixpath.basename(source) cmd = ( "scp -r -o ControlPath={socket} {login}:'{remote_file}' '{local_file}'".format( socket=self._socket_path, login=self._login_info, remote_file=dest.replace('"', r'\"'), local_file=source.replace('"', r'\"'), # quote escaped for bash ) ) proc = run_in_subprocess(cmd, check_output=True) logger.info(proc.stderr or 'Success') else: return super(RemoteClient, self).download(source, dest, overwrite, fs)
def _execute(self, cmd, skip_cwd=False, **kwargs): """ Additional Parameters: skip_cwd (bool): Whether to skip changing to the current working directory associated with this client before executing the command. This is mainly useful to methods internal to this class. """ template = 'ssh {login} -T -o ControlPath={socket} << EOF\n{cwd}{cmd}\nEOF' config = dict(self._subprocess_config) config.update(kwargs) cwd = 'cd "{path}"\n'.format(path=escape_path(self.path_cwd)) if not skip_cwd else '' return run_in_subprocess(template.format(login=self._login_info, socket=self._socket_path, cwd=cwd, cmd=cmd), check_output=True, **config)
def _copy_from_local(self, source, dest=None): """ SCP local file. Parameters ---------- origin_file : string Path to remote file to copy. destination_file : string Target location on local machine. """ self.connect() logger.info('Copying file from local...') template = 'scp -o ControlPath={socket} {local_file} {login}:{remote_file}' destination_file = dest or os.path.split(source)[1] proc = run_in_subprocess(template.format(socket=self._socket_path, login=self._login_info, local_file=source, remote_file=destination_file), check_output=True) logger.info(proc.stderr or 'Success')
def update_host_keys(self): assert not self.remote, "Updating host key only works for local connections." cmd = "ssh-keygen -R {host} && ssh-keyscan {host} >> ~/.ssh/known_hosts".format( host=self.host) return run_in_subprocess(cmd, False).returncode == 0
def _disconnect(self): # Send exit request to control socket. cmd = "ssh {login} -T -S {socket} -O exit".format(login=self._login_info, socket=self._socket_path) run_in_subprocess(cmd)
def _push(self, df, table, partition_clause='', overwrite=False, schema='omniduct', sep='\t'): """ Create a new table in hive from pandas DataFrame. Parameters ---------- df : pandas.DataFrame or Series Data to be push into a hive table. table : str Table name for new hive table. schema : str Schema (or database) for new hive table. partition_clause : str The hive partition clause specifying which partitions to load data into. overwrite : bool, optional Whether to overwrite the table data if it exists. Default: False. sep : str Field delimiter for data. See Also -------- https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML """ # Save dataframe to file. _, tmp_path = tempfile.mkstemp(dir='.') tmp_fname = os.path.basename(tmp_path) logger.info('Saving dataframe to file... {}'.format(tmp_fname)) df.to_csv(tmp_fname, index=False, header=False, sep=sep, encoding='utf-8') # Create table statement. cts = _create_table_statement_from_df(df=df, table=table, schema=schema, drop=overwrite and not partition_clause, text=True, sep=sep) # Load data statement. lds = '\nLOAD DATA LOCAL INPATH "{path}" {overwrite} INTO TABLE {schema}.{table} {partition_clause};'.format( path=tmp_fname, overwrite="OVERWRITE" if overwrite else "", schema=schema, table=table, partition_clause=partition_clause) # SCP data if SSHClient is set. if self.remote: logger.info('Uploading data to remote host...') self.remote.copy_from_local(tmp_fname, tmp_fname) # Run create table statement and load data statment. logger.info('Creating hive table and loading data...') proc = self._run_in_hivecli('\n'.join([cts, lds])) if proc.returncode != 0: logger.error(proc.stderr) # Clean up files. logger.info('Cleaning up files...') rm_cmd = 'rm -rf {0}'.format(tmp_fname) run_in_subprocess(rm_cmd) if self.remote: self.remote.execute(rm_cmd) return proc