Exemplo n.º 1
0
    def install(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        print("[{h}] Installing HDFS...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        with ssh_client.open_sftp() as sftp:
            sftp.put(
                localpath=os.path.join(SCRIPTS_DIR, 'download-hadoop.py'),
                remotepath='/tmp/download-hadoop.py')

        ssh_check_output(
            client=ssh_client,
            command="""
                set -e

                python /tmp/download-hadoop.py "{version}" "{download_source}"

                mkdir "hadoop"
                mkdir "hadoop/conf"

                tar xzf "hadoop-{version}.tar.gz" -C "hadoop" --strip-components=1
                rm "hadoop-{version}.tar.gz"

                for f in $(find hadoop/bin -type f -executable -not -name '*.cmd'); do
                    sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)"
                done
            """.format(version=self.version, download_source=self.download_source))
Exemplo n.º 2
0
    def install(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        print("[{h}] Installing HDFS...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        with ssh_client.open_sftp() as sftp:
            sftp.put(
                localpath=os.path.join(SCRIPTS_DIR, 'download-hadoop.py'),
                remotepath='/tmp/download-hadoop.py')

        ssh_check_output(
            client=ssh_client,
            command="""
                set -e

                python /tmp/download-hadoop.py "{version}" "{download_source}"

                mkdir "hadoop"
                mkdir "hadoop/conf"

                tar xzf "hadoop-{version}.tar.gz" -C "hadoop" --strip-components=1
                rm "hadoop-{version}.tar.gz"
            """.format(version=self.version, download_source=self.download_source))
Exemplo n.º 3
0
def ssh_check_output(client: paramiko.client.SSHClient, command: str):
    """
    Run a command via the provided SSH client and return the output captured
    on stdout.

    Raise an exception if the command returns a non-zero code.
    """
    stdin, stdout, stderr = client.exec_command(command, get_pty=True)

    # NOTE: Paramiko doesn't clearly document this, but we must read() before
    #       calling recv_exit_status().
    #       See: https://github.com/paramiko/paramiko/issues/448#issuecomment-159481997
    stdout_output = stdout.read().decode('utf8').rstrip('\n')
    stderr_output = stderr.read().decode('utf8').rstrip('\n')
    exit_status = stdout.channel.recv_exit_status()

    if exit_status:
        # TODO: Return a custom exception that includes the return code.
        #       See: https://docs.python.org/3/library/subprocess.html#subprocess.check_output
        # NOTE: We are losing the output order here since output from stdout and stderr
        #       may be interleaved.
        raise SSHError(host=client.get_transport().getpeername()[0],
                       message=stdout_output + stderr_output)

    return stdout_output
Exemplo n.º 4
0
    def install(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):

        print("[{h}] Installing Spark...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        try:
            if self.version:
                with ssh_client.open_sftp() as sftp:
                    sftp.put(
                        localpath=os.path.join(SCRIPTS_DIR, 'install-spark.sh'),
                        remotepath='/tmp/install-spark.sh')
                    sftp.chmod(path='/tmp/install-spark.sh', mode=0o755)
                url = self.download_source.format(v=self.version)
                ssh_check_output(
                    client=ssh_client,
                    command="""
                        set -e
                        /tmp/install-spark.sh {url}
                        rm -f /tmp/install-spark.sh
                    """.format(url=shlex.quote(url)))
            else:
                ssh_check_output(
                    client=ssh_client,
                    command="""
                        set -e
                        sudo yum install -y git
                        sudo yum install -y java-devel
                        """)
                ssh_check_output(
                    client=ssh_client,
                    command="""
                        set -e
                        git clone {repo} spark
                        cd spark
                        git reset --hard {commit}
                        if [ -e "make-distribution.sh" ]; then
                            ./make-distribution.sh -Phadoop-2.6
                        else
                            ./dev/make-distribution.sh -Phadoop-2.6
                        fi
                    """.format(
                        repo=shlex.quote(self.git_repository),
                        commit=shlex.quote(self.git_commit)))
            ssh_check_output(
                client=ssh_client,
                command="""
                    set -e
                    for f in $(find spark/bin -type f -executable -not -name '*.cmd'); do
                        sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)"
                    done
                    echo "export SPARK_HOME='$(pwd)/spark'" >> .bashrc
                """)
        except Exception as e:
            # TODO: This should be a more specific exception.
            print("Error: Failed to install Spark.", file=sys.stderr)
            print(e, file=sys.stderr)
            raise
Exemplo n.º 5
0
    def install(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):

        print("[{h}] Installing Spark...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        try:
            if self.version:
                with ssh_client.open_sftp() as sftp:
                    sftp.put(
                        localpath=os.path.join(SCRIPTS_DIR, 'install-spark.sh'),
                        remotepath='/tmp/install-spark.sh')
                    sftp.chmod(path='/tmp/install-spark.sh', mode=0o755)
                url = self.download_source.format(v=self.version)
                ssh_check_output(
                    client=ssh_client,
                    command="""
                        set -e
                        /tmp/install-spark.sh {url}
                        rm -f /tmp/install-spark.sh
                    """.format(url=shlex.quote(url)))
            else:
                ssh_check_output(
                    client=ssh_client,
                    command="""
                        set -e
                        sudo yum install -y git
                        sudo yum install -y java-devel
                        """)
                ssh_check_output(
                    client=ssh_client,
                    command="""
                        set -e
                        git clone {repo} spark
                        cd spark
                        git reset --hard {commit}
                        if [ -e "make-distribution.sh" ]; then
                            ./make-distribution.sh -Phadoop-2.6
                        else
                            ./dev/make-distribution.sh -Phadoop-2.6
                        fi
                    """.format(
                        repo=shlex.quote(self.git_repository),
                        commit=shlex.quote(self.git_commit)))
            ssh_check_output(
                client=ssh_client,
                command="""
                    set -e
                    for f in $(find spark/bin -type f -executable -not -name '*.cmd'); do
                        sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)"
                    done
                    echo "export SPARK_HOME='$(pwd)/spark'" >> .bashrc
                """)
        except Exception as e:
            # TODO: This should be a more specific exception.
            print("Error: Failed to install Spark.", file=sys.stderr)
            print(e, file=sys.stderr)
            raise
Exemplo n.º 6
0
    def install(
        self,
        ssh_client: paramiko.client.SSHClient,
        cluster: FlintrockCluster,
    ):
        logger.info("[{h}] Installing HDFS...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        with ssh_client.open_sftp() as sftp:
            sftp.put(localpath=os.path.join(SCRIPTS_DIR,
                                            'download-package.py'),
                     remotepath='/tmp/download-package.py')

        logger.debug("[{h}] Downloading Hadoop from: {s}".format(
            h=ssh_client.get_transport().getpeername()[0],
            s=self.download_source,
        ))

        ssh_check_output(
            client=ssh_client,
            command="""
                set -e

                python /tmp/download-package.py "{download_source}" "hadoop"

                for f in $(find hadoop/bin -type f -executable -not -name '*.cmd'); do
                    sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)"
                done

                echo "export HADOOP_LIBEXEC_DIR='$(pwd)/hadoop/libexec'" >> .bashrc
            """.format(
                # version=self.version,
                download_source=self.download_source.format(v=self.version), ))
Exemplo n.º 7
0
    def install(self, ssh_client: paramiko.client.SSHClient,
                cluster: FlintrockCluster):
        logger.info("[{h}] Installing HDFS...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        with ssh_client.open_sftp() as sftp:
            sftp.put(localpath=os.path.join(SCRIPTS_DIR, 'download-hadoop.py'),
                     remotepath='/tmp/download-hadoop.py')

        ssh_check_output(client=ssh_client,
                         command="""
                set -e

                python /tmp/download-hadoop.py "{version}" "{download_source}"

                mkdir "hadoop"
                mkdir "hadoop/conf"

                tar xzf "hadoop-{version}.tar.gz" -C "hadoop" --strip-components=1
                rm "hadoop-{version}.tar.gz"

                for f in $(find hadoop/bin -type f -executable -not -name '*.cmd'); do
                    sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)"
                done
                echo "export HADOOP_LIBEXEC_DIR='$(pwd)/hadoop/libexec'" >> .bashrc
            """.format(version=self.version,
                       download_source=self.download_source))
Exemplo n.º 8
0
    def install(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        print("[{h}] Installing HDFS...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        with ssh_client.open_sftp() as sftp:
            sftp.put(
                localpath=os.path.join(SCRIPTS_DIR, 'download-hadoop.py'),
                remotepath='/tmp/download-hadoop.py')

        ssh_check_output(
            client=ssh_client,
            command="""
                set -e

                python /tmp/download-hadoop.py "{version}"

                mkdir "hadoop"
                mkdir "hadoop/conf"

                tar xzf "hadoop-{version}.tar.gz" -C "hadoop" --strip-components=1
                rm "hadoop-{version}.tar.gz"
            """.format(version=self.version))
Exemplo n.º 9
0
    def install(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        logger.info("[{h}] Installing HDFS...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        with ssh_client.open_sftp() as sftp:
            sftp.put(
                localpath=os.path.join(SCRIPTS_DIR, 'download-package.py'),
                remotepath='/tmp/download-package.py')

        ssh_check_output(
            client=ssh_client,
            command="""
                set -e

                python /tmp/download-package.py "{download_source}" "hadoop"

                for f in $(find hadoop/bin -type f -executable -not -name '*.cmd'); do
                    sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)"
                done

                echo "export HADOOP_LIBEXEC_DIR='$(pwd)/hadoop/libexec'" >> .bashrc
            """.format(
                version=self.version,
                download_source=self.download_source.format(v=self.version),
            ))
Exemplo n.º 10
0
def run_experiment(
    ssh_client: paramiko.client.SSHClient,
    experiment_name: str,
    duration: int,
    exposure_time: float = None,
) -> ExperimentStreams:
    """Run run_experiment (image capture program) on the cosmobot with the given name and duration

    Args:
        experiment_name: experiment name to pass to run_experiment
        duration: duration to pass to run_experiment

    Returns: ExperimentStreams object
    """

    run_experiment_command = _generate_run_experiment_command(
        experiment_name, duration, exposure_time
    )

    hostname = ssh_client.get_transport().hostname
    logger.info(
        f"Starting image capture on cosmobot {hostname}\n"
        f"Command: {run_experiment_command}"
    )

    return ExperimentStreams(*ssh_client.exec_command(run_experiment_command))
Exemplo n.º 11
0
def ssh_check_output(
        client: paramiko.client.SSHClient,
        command: str,
        timeout_seconds: int=None,
):
    """
    Run a command via the provided SSH client and return the output captured
    on stdout.

    Raise an exception if the command returns a non-zero code.
    """
    stdin, stdout, stderr = client.exec_command(
        command,
        get_pty=True,
        timeout=timeout_seconds)

    # NOTE: Paramiko doesn't clearly document this, but we must read() before
    #       calling recv_exit_status().
    #       See: https://github.com/paramiko/paramiko/issues/448#issuecomment-159481997
    stdout_output = stdout.read().decode('utf8').rstrip('\n')
    stderr_output = stderr.read().decode('utf8').rstrip('\n')
    exit_status = stdout.channel.recv_exit_status()

    if exit_status:
        # TODO: Return a custom exception that includes the return code.
        #       See: https://docs.python.org/3/library/subprocess.html#subprocess.check_output
        # NOTE: We are losing the output order here since output from stdout and stderr
        #       may be interleaved.
        raise SSHError(
            host=client.get_transport().getpeername()[0],
            message=stdout_output + stderr_output)

    return stdout_output
Exemplo n.º 12
0
    def install(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        logger.info("[{h}] Installing Spark...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        if self.version:
            with ssh_client.open_sftp() as sftp:
                sftp.put(
                    localpath=os.path.join(SCRIPTS_DIR, 'download-package.py'),
                    remotepath='/tmp/download-package.py')

            ssh_check_output(
                client=ssh_client,
                command="""
                    python /tmp/download-package.py "{download_source}" "spark"
                """.format(
                    version=self.version,
                    download_source=self.download_source.format(v=self.version),
                ))

        else:
            ssh_check_output(
                client=ssh_client,
                command="""
                    set -e
                    sudo yum install -y git
                    sudo yum install -y java-devel
                    """)
            ssh_check_output(
                client=ssh_client,
                command="""
                    set -e
                    git clone {repo} spark
                    cd spark
                    git reset --hard {commit}
                    if [ -e "make-distribution.sh" ]; then
                        ./make-distribution.sh -Phadoop-{hadoop_short_version}
                    else
                        ./dev/make-distribution.sh -Phadoop-{hadoop_short_version}
                    fi
                """.format(
                    repo=shlex.quote(self.git_repository),
                    commit=shlex.quote(self.git_commit),
                    # Hardcoding this here until we figure out a better way to handle
                    # the supported build profiles.
                    hadoop_short_version='2.7',
                ))
        ssh_check_output(
            client=ssh_client,
            command="""
                set -e
                for f in $(find spark/bin -type f -executable -not -name '*.cmd'); do
                    sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)"
                done
                echo "export SPARK_HOME='$(pwd)/spark'" >> .bashrc
            """)
Exemplo n.º 13
0
def setup_node(
        *,
        # Change this to take host, user, and identity_file?
        # Add some kind of caching for SSH connections so that they
        # can be looked up by host and reused?
        ssh_client: paramiko.client.SSHClient,
        services: list,
        java_version: int,
        cluster: FlintrockCluster):
    """
    Setup a new node.

    Cluster methods like provision_node() and add_slaves_node() should
    delegate the main work of setting up new nodes to this function.
    """
    host = ssh_client.get_transport().getpeername()[0]
    ssh_check_output(client=ssh_client,
                     command="""
            set -e

            echo {private_key} > "$HOME/.ssh/id_rsa"
            echo {public_key} >> "$HOME/.ssh/authorized_keys"

            chmod 400 "$HOME/.ssh/id_rsa"
        """.format(private_key=shlex.quote(cluster.ssh_key_pair.private),
                   public_key=shlex.quote(cluster.ssh_key_pair.public)))

    with ssh_client.open_sftp() as sftp:
        sftp.put(localpath=os.path.join(SCRIPTS_DIR,
                                        'setup-ephemeral-storage.py'),
                 remotepath='/tmp/setup-ephemeral-storage.py')

    logger.info("[{h}] Configuring ephemeral storage...".format(h=host))
    # TODO: Print some kind of warning if storage is large, since formatting
    #       will take several minutes (~4 minutes for 2TB).
    storage_dirs_raw = ssh_check_output(client=ssh_client,
                                        command="""
            set -e
            python /tmp/setup-ephemeral-storage.py
            rm -f /tmp/setup-ephemeral-storage.py
        """)
    storage_dirs = json.loads(storage_dirs_raw)

    cluster.storage_dirs.root = storage_dirs['root']
    cluster.storage_dirs.ephemeral = storage_dirs['ephemeral']

    ensure_java(ssh_client, java_version)

    for service in services:
        try:
            service.install(
                ssh_client=ssh_client,
                cluster=cluster,
            )
        except Exception as e:
            raise Exception("Failed to install {}.".format(
                type(service).__name__)) from e
Exemplo n.º 14
0
    def install(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        # TODO: Allow users to specify the Spark "distribution". (?)
        distribution = 'hadoop2.6'

        print("[{h}] Installing Spark...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        try:
            if self.version:
                with ssh_client.open_sftp() as sftp:
                    sftp.put(
                        localpath=os.path.join(SCRIPTS_DIR, 'install-spark.sh'),
                        remotepath='/tmp/install-spark.sh')
                    sftp.chmod(path='/tmp/install-spark.sh', mode=0o755)
                ssh_check_output(
                    client=ssh_client,
                    command="""
                        set -e
                        /tmp/install-spark.sh {spark_version} {distribution}
                        rm -f /tmp/install-spark.sh
                    """.format(
                            spark_version=shlex.quote(self.version),
                            distribution=shlex.quote(distribution)))
            else:
                ssh_check_output(
                    client=ssh_client,
                    command="""
                        set -e
                        sudo yum install -y git
                        sudo yum install -y java-devel
                        """)
                ssh_check_output(
                    client=ssh_client,
                    command="""
                        set -e
                        git clone {repo} spark
                        cd spark
                        git reset --hard {commit}
                        if [ -e "make-distribution.sh" ]; then
                            ./make-distribution.sh -Phadoop-2.6
                        else
                            ./dev/make-distribution.sh -Phadoop-2.6
                        fi
                    """.format(
                        repo=shlex.quote(self.git_repository),
                        commit=shlex.quote(self.git_commit)))
        except Exception as e:
            # TODO: This should be a more specific exception.
            print("Error: Failed to install Spark.", file=sys.stderr)
            print(e, file=sys.stderr)
            raise
Exemplo n.º 15
0
    def install(self, ssh_client: paramiko.client.SSHClient,
                cluster: FlintrockCluster):
        logger.info("[{h}] Installing Spark...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        if self.version:
            with ssh_client.open_sftp() as sftp:
                sftp.put(localpath=os.path.join(SCRIPTS_DIR,
                                                'download-package.py'),
                         remotepath='/tmp/download-package.py')

            ssh_check_output(client=ssh_client,
                             command="""
                    python /tmp/download-package.py "{download_source}" "spark"
                """.format(
                                 version=self.version,
                                 download_source=self.download_source.format(
                                     v=self.version),
                             ))

        else:
            ssh_check_output(client=ssh_client,
                             command="""
                    set -e
                    sudo yum install -y git
                    sudo yum install -y java-devel
                    """)
            ssh_check_output(
                client=ssh_client,
                command="""
                    set -e
                    git clone {repo} spark
                    cd spark
                    git reset --hard {commit}
                    if [ -e "make-distribution.sh" ]; then
                        ./make-distribution.sh -Phadoop-{hadoop_short_version}
                    else
                        ./dev/make-distribution.sh -Phadoop-{hadoop_short_version}
                    fi
                """.format(
                    repo=shlex.quote(self.git_repository),
                    commit=shlex.quote(self.git_commit),
                    # Hardcoding this here until we figure out a better way to handle
                    # the supported build profiles.
                    hadoop_short_version='2.7',
                ))
        ssh_check_output(client=ssh_client,
                         command="""
                set -e
                for f in $(find spark/bin -type f -executable -not -name '*.cmd'); do
                    sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)"
                done
                echo "export SPARK_HOME='$(pwd)/spark'" >> .bashrc
            """)
Exemplo n.º 16
0
def setup_node(
        *,
        # Change this to take host, user, and identity_file?
        # Add some kind of caching for SSH connections so that they
        # can be looked up by host and reused?
        ssh_client: paramiko.client.SSHClient,
        services: list,
        cluster: FlintrockCluster):
    """
    Setup a new node.

    Cluster methods like provision_node() and add_slaves_node() should
    delegate the main work of setting up new nodes to this function.
    """
    host = ssh_client.get_transport().getpeername()[0]
    ssh_check_output(
        client=ssh_client,
        command="""
            set -e

            echo {private_key} > "$HOME/.ssh/id_rsa"
            echo {public_key} >> "$HOME/.ssh/authorized_keys"

            chmod 400 "$HOME/.ssh/id_rsa"
        """.format(
            private_key=shlex.quote(cluster.ssh_key_pair.private),
            public_key=shlex.quote(cluster.ssh_key_pair.public)))

    with ssh_client.open_sftp() as sftp:
        sftp.put(
            localpath=os.path.join(SCRIPTS_DIR, 'setup-ephemeral-storage.py'),
            remotepath='/tmp/setup-ephemeral-storage.py')

    logger.info("[{h}] Configuring ephemeral storage...".format(h=host))
    # TODO: Print some kind of warning if storage is large, since formatting
    #       will take several minutes (~4 minutes for 2TB).
    storage_dirs_raw = ssh_check_output(
        client=ssh_client,
        command="""
            set -e
            python /tmp/setup-ephemeral-storage.py
            rm -f /tmp/setup-ephemeral-storage.py
        """)
    storage_dirs = json.loads(storage_dirs_raw)

    cluster.storage_dirs.root = storage_dirs['root']
    cluster.storage_dirs.ephemeral = storage_dirs['ephemeral']

    ensure_java8(ssh_client)

    for service in services:
        service.install(
            ssh_client=ssh_client,
            cluster=cluster)
Exemplo n.º 17
0
def attempt_to_close_connection(ssh_client: paramiko.client.SSHClient):
    """Call ssh_client.close() and log exception and cosmobot hostname if it fails"""

    # get hostname up here in case the transport isn't available after a failed close()
    hostname = ssh_client.get_transport().hostname

    try:
        ssh_client.close()
    except Exception as e:
        logging.error(
            f"exception occured while trying to close ssh connection to cosmobot {hostname}"
        )
        logging.exception(e)
Exemplo n.º 18
0
def uploadFile(sourcePath: str,
               targetPath: str,
               sshc: paramiko.client.SSHClient,
               compress_method: str = None,
               verbose: bool = True) -> pathlib.Path:
    def show_progress(filename, size, sent):
        print(f"Uploading {filename} progress: " +
              f"{float(sent)/float(size)*100:.2f}%",
              end="\r")

    progress = show_progress if verbose else None

    try:
        if compress_method:
            fileName = pathlib.Path(sourcePath).name
            # change targetPath for uploading to
            # targetPath's directory / sourcePath's name + ext.
            targetPath = pathlib.Path(
                str(pathlib.Path(targetPath).parent / fileName) + "." +
                compress_method)
            sourcePath = archiveFile(sourcePath,
                                     verbose=verbose,
                                     method=compress_method)
            isArchived = True

        with scp.SCPClient(sshc.get_transport(), progress=progress) as scpc:
            # in case Path is PosixPath, casting them to str
            scpc.put(str(sourcePath), str(targetPath))
            print("\n")  # nextline

        if compress_method:
            unarchiveSSH(targetPath,
                         sshc,
                         method=compress_method,
                         verbose=verbose)
            isUnarchived = True
            # change targetPath to uploaded raw file
            uploadedPath = str(pathlib.Path(targetPath).parent / fileName)
    finally:  # delete archive files
        if 'isArchived' in locals():
            with verbosity_context(f"Deleting archive {sourcePath}", verbose):
                os.remove(sourcePath)
        if 'isUnarchived' in locals():
            sftp = sshc.open_sftp()
            with verbosity_context(f"Deleting archive {targetPath} via SCP",
                                   verbose):
                sftp.remove(str(targetPath))

    return uploadedPath if compress_method else targetPath
Exemplo n.º 19
0
def tdloadViaSSH(engine: sqlalchemy.engine.base.Engine,
                 sshc: paramiko.client.SSHClient,
                 tablename: str,
                 targetPath: str,
                 jobname: str,
                 dbname: str = None,
                 skipRowNum: int = 0,
                 verbose: bool = True) -> None:

    targetPath = pathlib.Path(targetPath)
    if dbname is None:
        dbname = engine.url.database
    # always use option: QuotedData = 'Optional'
    options = "--DCPQuotedData 'Optional'"
    if skipRowNum > 0:
        options += f" --SourceSkipRows {skipRowNum}"

    tdload_command = (f"tdload -f {targetPath} -t {dbname}.{tablename}" +
                      f" -h {engine.url.host} -u {engine.url.username}" +
                      f" -p {engine.url.password}" +
                      f" --TargetWorkingDatabase {dbname}" +
                      f" {options} {jobname}")

    # drop error log table if exists
    dropIfExists(tablename + "_ET", dbname, engine)
    dropIfExists(tablename + "_UV", dbname, engine)

    # execute command via ssh
    stdin, stdout, stderr = sshc.exec_command(tdload_command)
    for line in stdout:
        if verbose:
            print(line)
        else:
            if re.match(r".*(Total Rows|successfully).*", line):
                print(line)
Exemplo n.º 20
0
def ensure_java8(client: paramiko.client.SSHClient):
    host = client.get_transport().getpeername()[0]
    java_major_version = get_java_major_version(client)

    if not java_major_version or java_major_version < (1, 8):
        logger.info("[{h}] Installing Oracle Java 1.8...".format(h=host))

        ssh_check_output(client=client,
                         command="""
                set -e

                cd /tmp

                readonly url="http://www.oracle.com"
                readonly jdk_download_url1="$url/technetwork/java/javase/downloads/index.html"
                readonly jdk_download_url2=$(curl -s $jdk_download_url1 | egrep -o "\/technetwork\/java/\javase\/downloads\/jdk8-downloads-.+?\.html" | head -1 | cut -d '"' -f 1)
                [[ -z "$jdk_download_url2" ]] && error "Could not get jdk download url - $jdk_download_url1"
                
                readonly jdk_download_url3="${url}${jdk_download_url2}"
                readonly jdk_download_url4=$(curl -s $jdk_download_url3 | egrep -o "http\:\/\/download.oracle\.com\/otn-pub\/java\/jdk\/[7-8]u[0-9]+\-(.*)+\/jdk-[7-8]u[0-9]+(.*)linux-x64.rpm")
                
                for dl_url in ${jdk_download_url4[@]}; do
                    wget --no-cookies --no-check-certificate --header "Cookie: oraclelicense=accept-securebackup-cookie" -N $dl_url
                done
                
                sudo yum remove -y java-1.8.0-openjdk*
                
                sudo yum localinstall -y ./jdk-8u144-linux-x64.rpm
                
                sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk java-1.8.0-openjdk*

                sudo sh -c "echo export JAVA_HOME=/usr/java/default/jre >> /etc/environment"
                source /etc/environment
            """)
Exemplo n.º 21
0
    def configure_master(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        host = ssh_client.get_transport().getpeername()[0]
        print("[{h}] Configuring Spark master...".format(h=host))

        # TODO: Maybe move this shell script out to some separate file/folder
        #       for the Spark service.
        # TODO: Add some timeout for waiting on master UI to come up.
        ssh_check_output(
            client=ssh_client,
            command="""
                set -e

                spark/sbin/start-master.sh

                set +e

                master_ui_response_code=0
                while [ "$master_ui_response_code" -ne 200 ]; do
                    sleep 1
                    master_ui_response_code="$(
                        curl --head --silent --output /dev/null \
                             --write-out "%{{http_code}}" {m}:8080
                    )"
                done

                set -e

                spark/sbin/start-slaves.sh
            """.format(
                m=shlex.quote(cluster.master_host)))
Exemplo n.º 22
0
def reboot_pc(ssh_client: paramiko.client.SSHClient, password: str):
    ip, port = ssh_client.get_transport().getpeername()
    cmd = 'sudo -S -p " " reboot'
    ch, stdin, stdout, stderr = sudo_exec(ssh_client, cmd, password)
    print_stdout_blocking(stdout, "reboot_pc", ip, port)
    print_stderr_blocking(stderr, "reboot_pc", ip, port)
    return
Exemplo n.º 23
0
    def configure_master(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        host = ssh_client.get_transport().getpeername()[0]
        print("[{h}] Configuring Spark master...".format(h=host))

        # TODO: Maybe move this shell script out to some separate file/folder
        #       for the Spark service.
        # TODO: Add some timeout for waiting on master UI to come up.
        ssh_check_output(
            client=ssh_client,
            command="""
                spark/sbin/start-all.sh

                master_ui_response_code=0
                while [ "$master_ui_response_code" -ne 200 ]; do
                    sleep 1
                    master_ui_response_code="$(
                        curl --head --silent --output /dev/null \
                             --write-out "%{{http_code}}" {m}:8080
                    )"
                done
            """.format(
                m=shlex.quote(cluster.master_host)))
Exemplo n.º 24
0
    def configure_master(self, ssh_client: paramiko.client.SSHClient,
                         cluster: FlintrockCluster):
        host = ssh_client.get_transport().getpeername()[0]
        logger.info("[{h}] Configuring Spark master...".format(h=host))

        # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/129
        attempt_limit = 3
        for attempt in range(attempt_limit):
            try:
                ssh_check_output(
                    client=ssh_client,
                    # Maybe move this shell script out to some separate
                    # file/folder for the Spark service.
                    command="""
                        spark/sbin/start-all.sh

                        master_ui_response_code=0
                        while [ "$master_ui_response_code" -ne 200 ]; do
                            sleep 1
                            master_ui_response_code="$(
                                curl --head --silent --output /dev/null \
                                    --write-out "%{{http_code}}" {m}:8080
                            )"
                        done
                    """.format(m=shlex.quote(cluster.master_host)),
                    timeout_seconds=90)
                break
            except socket.timeout as e:
                logger.debug(
                    "Timed out waiting for Spark master to come up.{}".format(
                        " Trying again..." if attempt < attempt_limit -
                        1 else ""))
        else:
            raise Exception("Timed out waiting for Spark master to come up.")
Exemplo n.º 25
0
def run_one_sudo_cmd(ssh_client: paramiko.client.SSHClient, cmd: str,
                     password: str):
    ip, port = ssh_client.get_transport().getpeername()
    cmd_s = 'sudo -S -p " " ' + cmd
    ch, stdin, stdout, stderr = sudo_exec(ssh_client, cmd_s, password)
    print_stdout_blocking(stdout, "run_one_sudo_cmd", ip, port)
    print_stderr_blocking(stderr, "run_one_sudo_cmd", ip, port)
    return
Exemplo n.º 26
0
def ensure_java(client: paramiko.client.SSHClient, java_version: int):
    """
    Ensures that Java is available on the machine and that it has a
    version of at least java_version.

    The specified version of Java will be installed if it does not
    exist or the existing version has a major version lower than java_version.

    :param client:
    :param java_version:
        minimum version of Java required
    :return:
    """
    host = client.get_transport().getpeername()[0]
    installed_java_version = get_installed_java_version(client)

    if installed_java_version == java_version:
        logger.info(
            "Java {j} is already installed, skipping Java install".format(
                j=installed_java_version))
        return

    if installed_java_version and installed_java_version > java_version:
        logger.warning("""
            Existing Java {j} installation is newer than the configured version {java_version}.
            Your applications will be executed with Java {j}.
            Please choose a different AMI if this does not work for you.
            """.format(j=installed_java_version, java_version=java_version))
        return

    if installed_java_version and installed_java_version < java_version:
        logger.info("""
                Existing Java {j} will be upgraded to AdoptOpenJDK {java_version}
                """.format(j=installed_java_version,
                           java_version=java_version))

    # We will install AdoptOpenJDK because it gives us access to Java 8 through 15
    # Right now, Amazon Extras only provides Corretto Java 8, 11 and 15
    logger.info("[{h}] Installing AdoptOpenJDK Java {j}...".format(
        h=host, j=java_version))

    install_adoptopenjdk_repo(client)
    java_package = "adoptopenjdk-{j}-hotspot".format(j=java_version)
    ssh_check_output(client=client,
                     command="""
            set -e

            # Install Java first to protect packages that depend on Java from being removed.
            sudo yum install -q -y {jp}

            # Remove any older versions of Java to force the default Java to the requested version.
            # We don't use /etc/alternatives because it does not seem to update links in /usr/lib/jvm correctly,
            # and we don't just rely on JAVA_HOME because some programs use java directly in the PATH.
            sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk

            sudo sh -c "echo export JAVA_HOME=/usr/lib/jvm/{jp} >> /etc/environment"
            source /etc/environment
        """.format(jp=java_package))
Exemplo n.º 27
0
def no_sudo_exec(ssh_client: paramiko.client.SSHClient, cmd: str, block=True):
    ch = ssh_client.get_transport().open_session()
    if block:
        ch.get_pty()
    ch.exec_command(cmd)
    stdin = ch.makefile_stdin("wb", -1)
    stdout = ch.makefile("r", -1)
    stderr = ch.makefile_stderr("r", -1)
    return ch, stdin, stdout, stderr
Exemplo n.º 28
0
    def configure_master(self, ssh_client: paramiko.client.SSHClient,
                         cluster: FlintrockCluster):
        host = ssh_client.get_transport().getpeername()[0]
        print("[{h}] Configuring HDFS master...".format(h=host))

        ssh_check_output(client=ssh_client,
                         command="""
                ./hadoop/bin/hdfs namenode -format -nonInteractive
                ./hadoop/sbin/start-dfs.sh
            """)
Exemplo n.º 29
0
    def configure_master(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        host = ssh_client.get_transport().getpeername()[0]
        print("[{h}] Configuring HDFS master...".format(h=host))

        ssh_check_output(
            client=ssh_client,
            command="""
                ./hadoop/bin/hdfs namenode -format -nonInteractive
                ./hadoop/sbin/start-dfs.sh
            """)
Exemplo n.º 30
0
    def install(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster_info: ClusterInfo):
        """
        Downloads and installs Spark on a given node.
        """
        # TODO: Allow users to specify the Spark "distribution".
        distribution = 'hadoop1'

        print("[{h}] Installing Spark...".format(
            h=ssh_client.get_transport().getpeername()[0]))

        try:
            # TODO: Figure out how these non-template paths should work.
            ssh_check_output(
                client=ssh_client,
                command="""
                    set -e

                    echo {f} > /tmp/install-spark.sh
                    chmod 755 /tmp/install-spark.sh

                    /tmp/install-spark.sh {spark_version} {distribution}
                """.format(
                    f=shlex.quote(
                        get_formatted_template(
                            path='./install-spark.sh',
                            mapping=vars(cluster_info))),
                    spark_version=shlex.quote(self.version),
                    distribution=shlex.quote(distribution)))
        except Exception as e:
            print("Could not find package for Spark {s} / {d}.".format(
                    s=self.version,
                    d=distribution
                ), file=sys.stderr)
            raise

        template_path = "./spark/conf/spark-env.sh"
        ssh_check_output(
            client=ssh_client,
            command="""
                echo {f} > {p}
            """.format(
                f=shlex.quote(
                    get_formatted_template(
                        path="templates/" + template_path,
                        mapping=vars(cluster_info))),
                p=shlex.quote(template_path)))
Exemplo n.º 31
0
def sudo_exec(ssh_client: paramiko.client.SSHClient,
              cmd: str,
              password: str,
              block=True):
    ch = ssh_client.get_transport().open_session()
    if block:
        ch.get_pty()
    ch.exec_command(cmd)
    stdin = ch.makefile_stdin("wb", -1)
    stdin.write(password + '\n')
    stdin.flush()
    stdout = ch.makefile("r", -1)
    stderr = ch.makefile_stderr("r", -1)
    stdout.readline()
    stdout.readline()
    return ch, stdin, stdout, stderr
Exemplo n.º 32
0
def upload_file(ssh_client: paramiko.client.SSHClient, local_path,
                remote_path):
    def print_progress(filename, size, sent, trans):
        print(
            trans, "%s\'s progress: %.2f%%   \r" %
            (filename, float(sent) / float(size) * 100))

    scp_client = scp.SCPClient(ssh_client.get_transport(),
                               progress4=print_progress)

    def fnmatch_list(fn, l):
        for n in l:
            if fnmatch.fnmatch(fn, n):
                return True
        return False

    ex_list = [
        '.svn', '.cvs', '.idea', '.DS_Store', '.git', '.hg', '.hprof', '*.pyc',
        'build', '*-build-*', '__pycache__', 'events*', '*.pt'
    ]

    for path, dirs, files in os.walk(local_path, topdown=True):
        dirs[:] = [d for d in dirs if not fnmatch_list(d, ex_list)]
        for d in dirs:
            local_p_to_d = os.path.join(path, d)
            relative_p_to_d = os.path.relpath(local_p_to_d, local_path)
            remote_p_to_d = os.path.join(remote_path, relative_p_to_d)
            print('cp from:	', relative_p_to_d)
            print('cp to:	', remote_p_to_d, scp_client.peername)
            run_one_no_sudo_cmd(ssh_client, 'mkdir ' + remote_p_to_d)

        for filename in files:
            if fnmatch_list(filename, ex_list):
                continue
            local_p_to_f = os.path.join(path, filename)
            relative_p_to_f = os.path.relpath(local_p_to_f, local_path)
            remote_p_to_f = os.path.join(remote_path, relative_p_to_f)
            print('local file', local_p_to_f)
            print('remote file', remote_p_to_f)

            print(
                scp_client.put(local_p_to_f,
                               remote_path=remote_p_to_f,
                               preserve_times=False))

    return
Exemplo n.º 33
0
    def configure_master(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        host = ssh_client.get_transport().getpeername()[0]
        logger.info("[{h}] Configuring HDFS master...".format(h=host))

        ssh_check_output(
            client=ssh_client,
            command="""
                # `|| true` because on cluster restart this command will fail.
                ./hadoop/bin/hdfs namenode -format -nonInteractive || true
            """)

        # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/157
        attempt_limit = 3
        for attempt in range(attempt_limit):
            try:
                ssh_check_output(
                    client=ssh_client,
                    command="""
                        ./hadoop/sbin/stop-dfs.sh
                        ./hadoop/sbin/start-dfs.sh

                        master_ui_response_code=0
                        while [ "$master_ui_response_code" -ne 200 ]; do
                            sleep 1
                            master_ui_response_code="$(
                                curl --head --silent --output /dev/null \
                                    --write-out "%{{http_code}}" {m}:50070
                            )"
                        done
                    """.format(m=shlex.quote(cluster.master_host)),
                    timeout_seconds=90
                )
                break
            except socket.timeout as e:
                logger.debug(
                    "Timed out waiting for HDFS master to come up.{}"
                    .format(" Trying again..." if attempt < attempt_limit - 1 else "")
                )
        else:
            raise Exception("Time out waiting for HDFS master to come up.")
Exemplo n.º 34
0
    def configure_master(self, ssh_client: paramiko.client.SSHClient,
                         cluster: FlintrockCluster):
        host = ssh_client.get_transport().getpeername()[0]
        logger.info("[{h}] Configuring HDFS master...".format(h=host))

        ssh_check_output(client=ssh_client,
                         command="""
                # `|| true` because on cluster restart this command will fail.
                ./hadoop/bin/hdfs namenode -format -nonInteractive || true
            """)

        # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/157
        attempt_limit = 3
        for attempt in range(attempt_limit):
            try:
                ssh_check_output(client=ssh_client,
                                 command="""
                        ./hadoop/sbin/stop-dfs.sh
                        ./hadoop/sbin/start-dfs.sh

                        master_ui_response_code=0
                        while [ "$master_ui_response_code" -ne 200 ]; do
                            sleep 1
                            master_ui_response_code="$(
                                curl \
                                    --location --head --silent \
                                    --output /dev/null \
                                    --write-out "%{{http_code}}" \
                                    {m}:{p}
                            )"
                        done
                    """.format(m=shlex.quote(cluster.master_private_host),
                               p=self.name_node_ui_port),
                                 timeout_seconds=90)
                break
            except socket.timeout as e:
                logger.debug(
                    "Timed out waiting for HDFS master to come up.{}".format(
                        " Trying again..." if attempt < attempt_limit -
                        1 else ""))
        else:
            raise Exception("Time out waiting for HDFS master to come up.")
Exemplo n.º 35
0
def ensure_java8(client: paramiko.client.SSHClient):
    host = client.get_transport().getpeername()[0]
    java_major_version = get_java_major_version(client)

    if not java_major_version or java_major_version < (1, 8):
        logger.info("[{h}] Installing Java 1.8...".format(h=host))

        ssh_check_output(client=client,
                         command="""
                set -e

                # Install Java 1.8 first to protect packages that depend on Java from being removed.
                sudo yum install -y java-1.8.0-openjdk

                # Remove any older versions of Java to force the default Java to 1.8.
                # We don't use /etc/alternatives because it does not seem to update links in /usr/lib/jvm correctly,
                # and we don't just rely on JAVA_HOME because some programs use java directly in the PATH.
                sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk

                sudo sh -c "echo export JAVA_HOME=/usr/lib/jvm/jre >> /etc/environment"
                source /etc/environment
            """)
Exemplo n.º 36
0
def unarchiveSSH(archivePath: pathlib.PosixPath,
                 sshc: paramiko.client.SSHClient,
                 unarchiveFolder: pathlib.PosixPath = None,
                 method: str = "gz",
                 verbose: bool = True) -> None:
    if unarchiveFolder is None:
        unarchiveFolder = archivePath.parent

    if method == "7z":
        command = f"7z e {archivePath} -o{unarchiveFolder}"
    elif method in ["gz", "xz", "bz2"]:
        d_format_option = {"gz": "z", "xz": "J", "bz2": "j"}
        command = (f"tar -xv{d_format_option[method]}f" +
                   f"{archivePath} -C {unarchiveFolder}")
    else:
        raise ValueError(f"method only supports ['7z', 'gz', 'xz', 'bz2']")

    with verbosity_context(f"Unarchiving {archivePath}", verbose):
        stdin, stdout, stderr = sshc.exec_command(command)
        if verbose:
            for line in stdout:
                print(line)
Exemplo n.º 37
0
def ensure_java8(client: paramiko.client.SSHClient):
    host = client.get_transport().getpeername()[0]
    java_major_version = get_java_major_version(client)

    if not java_major_version or java_major_version < (1, 8):
        logger.info("[{h}] Installing Java 1.8...".format(h=host))

        ssh_check_output(
            client=client,
            command="""
                set -e

                # Install Java 1.8 first to protect packages that depend on Java from being removed.
                sudo yum install -y java-1.8.0-openjdk

                # Remove any older versions of Java to force the default Java to 1.8.
                # We don't use /etc/alternatives because it does not seem to update links in /usr/lib/jvm correctly,
                # and we don't just rely on JAVA_HOME because some programs use java directly in the PATH.
                sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk

                sudo sh -c "echo export JAVA_HOME=/usr/lib/jvm/jre >> /etc/environment"
                source /etc/environment
            """)
Exemplo n.º 38
0
def download_checkpoint(ssh_client: paramiko.client.SSHClient,
                        sftp_client: paramiko.sftp_client.SFTPClient,
                        run_id: str) -> None:
    remote_checkpoint_dir = f'/home/{user}/proj/Wave-U-Net/checkpoints/{run_id}/'
    _stdin, stdout, _stderr = ssh_client.exec_command(
        f'ls -1 -v {remote_checkpoint_dir} -I checkpoint | tail -n 1')
    last_checkpoint_filename = stdout.read().decode('utf8')
    if not last_checkpoint_filename:
        print('File not found')
        return
    last_checkpoint_name = os.path.splitext(last_checkpoint_filename)[0]
    checkpoint_files = [
        last_checkpoint_name + '.index', last_checkpoint_name + '.meta',
        last_checkpoint_name + '.data-00000-of-00001'
    ]

    local_folder = os.path.join(output_dir, run_id)
    os.makedirs(local_folder, exist_ok=True)
    for filename in checkpoint_files:
        local_path = os.path.join(local_folder, filename)
        print(os.path.join(remote_checkpoint_dir, filename))
        sftp_client.get(os.path.join(remote_checkpoint_dir, filename),
                        local_path)
Exemplo n.º 39
0
    def configure_master(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster: FlintrockCluster):
        host = ssh_client.get_transport().getpeername()[0]
        logger.info("[{h}] Configuring Spark master...".format(h=host))

        # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/129
        attempt_limit = 3
        for attempt in range(attempt_limit):
            try:
                ssh_check_output(
                    client=ssh_client,
                    # Maybe move this shell script out to some separate
                    # file/folder for the Spark service.
                    command="""
                        spark/sbin/start-all.sh

                        master_ui_response_code=0
                        while [ "$master_ui_response_code" -ne 200 ]; do
                            sleep 1
                            master_ui_response_code="$(
                                curl --head --silent --output /dev/null \
                                    --write-out "%{{http_code}}" {m}:8080
                            )"
                        done
                    """.format(m=shlex.quote(cluster.master_host)),
                    timeout_seconds=90
                )
                break
            except socket.timeout as e:
                logger.debug(
                    "Timed out waiting for Spark master to come up.{}"
                    .format(" Trying again..." if attempt < attempt_limit - 1 else "")
                )
        else:
            raise Exception("Timed out waiting for Spark master to come up.")
Exemplo n.º 40
0
def run_one_no_sudo_cmd(ssh_client: paramiko.client.SSHClient, cmd: str):
    ip, port = ssh_client.get_transport().getpeername()
    ch, stdin, stdout, stderr = no_sudo_exec(ssh_client, cmd)
    print_stdout_blocking(stdout, "run_one_no_sudo_cmd", ip, port)
    print_stderr_blocking(stderr, "run_one_no_sudo_cmd", ip, port)
    return
Exemplo n.º 41
0
    def configure_master(
            self,
            ssh_client: paramiko.client.SSHClient,
            cluster_info: ClusterInfo):
        """
        Configures the Spark master and starts both the master and slaves.
        """
        host = ssh_client.get_transport().getpeername()[0]
        print("[{h}] Configuring Spark master...".format(h=host))

        # TODO: Maybe move this shell script out to some separate file/folder
        #       for the Spark module.
        ssh_check_output(
            client=ssh_client,
            command="""
                set -e

                echo {s} > spark/conf/slaves

                spark/sbin/start-master.sh

                set +e

                master_ui_response_code=0
                while [ "$master_ui_response_code" -ne 200 ]; do
                    sleep 1
                    master_ui_response_code="$(
                        curl --head --silent --output /dev/null \
                             --write-out "%{{http_code}}" {m}:8080
                    )"
                done

                set -e

                spark/sbin/start-slaves.sh
            """.format(
                s=shlex.quote('\n'.join(cluster_info.slave_hosts)),
                m=shlex.quote(cluster_info.master_host)))

        # Spark health check
        # TODO: Move to health_check() module method?
        # TODO: Research (or implement) way to get Spark to tell you when
        #       it's ready, as opposed to checking after a time delay.
        time.sleep(30)

        spark_master_ui = 'http://{m}:8080/json/'.format(m=cluster_info.master_host)

        spark_ui_info = json.loads(
            urllib.request.urlopen(spark_master_ui).read().decode('utf-8'))

        print(textwrap.dedent(
            """\
            Spark Health Report:
              * Master: {status}
              * Workers: {workers}
              * Cores: {cores}
              * Memory: {memory:.1f} GB\
            """.format(
                status=spark_ui_info['status'],
                workers=len(spark_ui_info['workers']),
                cores=spark_ui_info['cores'],
                memory=spark_ui_info['memory'] / 1024)))