Exemplo n.º 1
0
 def _setup_package_repos(self):
     super(MesosBox, self)._setup_package_repos()
     sudo('apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF')
     codename = self.release().codename
     sudo(
         fmt('echo "deb http://repos.mesosphere.io/ubuntu {codename} main" '
             '> /etc/apt/sources.list.d/mesosphere.list'))
Exemplo n.º 2
0
 def __setup_mesos(self):
     sudo("rm /etc/init/mesos-{master,slave}.conf")
     self._lazy_mkdir(log_dir, 'mesos', persistent=False)
     self._lazy_mkdir('/var/lib', 'mesos', persistent=True)
     self.__prepare_credentials()
     self.__register_upstart_jobs(mesos_services)
     self._post_install_mesos()
Exemplo n.º 3
0
 def _setup_package_repos( self ):
     super( ToilJenkinsSlave, self )._setup_package_repos( )
     sudo( "apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF" )
     distro = run( "lsb_release -is | tr '[:upper:]' '[:lower:]'" )
     codename = run( "lsb_release -cs" )
     run( 'echo "deb http://repos.mesosphere.io/{} {} main"'
          '| sudo tee /etc/apt/sources.list.d/mesosphere.list'.format( distro, codename ) )
Exemplo n.º 4
0
    def __install_apache_package(self, path):
        """
        Download the given file from an Apache download mirror.

        Some mirrors may be down or serve crap, so we may need to retry this a couple of times.
        """
        # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit
        components = path.split('/')
        package, tarball = components[0], components[-1]
        tries = iter(xrange(3))
        while True:
            try:
                mirror_url = self.__apache_s3_mirror_url(path)
                if run("curl -Ofs '%s'" % mirror_url, warn_only=True).failed:
                    mirror_url = self.__apache_official_mirror_url(path)
                    run("curl -Ofs '%s'" % mirror_url)
                try:
                    sudo(fmt('mkdir -p {install_dir}/{package}'))
                    sudo(
                        fmt('tar -C {install_dir}/{package} '
                            '--strip-components=1 -xzf {tarball}'))
                    return
                finally:
                    run(fmt('rm {tarball}'))
            except SystemExit:
                if next(tries, None) is None:
                    raise
                else:
                    log.warn(
                        "Could not download or extract the package, retrying ..."
                    )
Exemplo n.º 5
0
    def __register_systemd_jobs( self, service_map ):
        for node_type, services in service_map.iteritems( ):
            for service in services:
                service_command_path = '/usr/sbin/%s-start.sh' % service.init_name

                put( local_path=StringIO( "#!/bin/sh\n" + service.command ), remote_path=service_command_path, use_sudo=True )
                sudo( "chown root:root '%s'" % service_command_path )
                sudo( "chmod +x '%s'" % service_command_path )

                self._register_init_script(
                    service.init_name,
                    heredoc( """
                        [Unit]
                        Description={service.description}
                        Before=docker.service
                        Wants=docker.service
                        Requires=mesosbox.service
                        After=mesosbox.service

                        [Service]
                        Type=simple
                        ExecStart={service_command_path}
                        User={service.user}
                        Group={service.user}
                        Environment="USER={user}"
                        LimitNOFILE=8000:8192
                        UMask=022

                        [Install]
                        WantedBy=multi-user.target
                        """ ) )
Exemplo n.º 6
0
    def _install_apache_package( self, remote_path, install_dir ):
        """
        Download the given package from an Apache download mirror and extract it to a child 
        directory of the directory at the given path. 

        :param str remote_path: the URL path of the package on the Apache download server and its 
               mirrors.
        
        :param str install_dir: The path to a local directory in which to create the directory 
               containing the extracted package. 
        """
        # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit
        components = remote_path.split( '/' )
        package, tarball = components[ 0 ], components[ -1 ]
        # Some mirrors may be down or serve crap, so we may need to retry this a couple of times.
        tries = iter( xrange( 3 ) )
        while True:
            try:
                mirror_url = self.__apache_s3_mirror_url( remote_path )
                if run( "curl -Ofs '%s'" % mirror_url, warn_only=True ).failed:
                    mirror_url = self.__apache_official_mirror_url( remote_path )
                    run( "curl -Ofs '%s'" % mirror_url )
                try:
                    sudo( fmt( 'mkdir -p {install_dir}/{package}' ) )
                    sudo( fmt( 'tar -C {install_dir}/{package} '
                               '--strip-components=1 -xzf {tarball}' ) )
                    return
                finally:
                    run( fmt( 'rm {tarball}' ) )
            except SystemExit:
                if next( tries, None ) is None:
                    raise
                else:
                    log.warn( "Could not download or extract the package, retrying ..." )
Exemplo n.º 7
0
 def __setup_application_user(self):
     sudo(
         fmt('useradd '
             '--home /home/{user} '
             '--create-home '
             '--user-group '
             '--shell /bin/bash {user}'))
Exemplo n.º 8
0
 def __install_mesos_egg( self ):
     # FIXME: this is the ubuntu 14.04 version. Wont work with other versions.
     run( "wget http://downloads.mesosphere.io/master/ubuntu/14.04/"
          "mesos-0.22.0-py2.7-linux-x86_64.egg" )
     # we need a newer version of protobuf than comes default on ubuntu
     sudo( "pip install --upgrade protobuf", pty=False )
     sudo( "easy_install mesos-0.22.0-py2.7-linux-x86_64.egg" )
Exemplo n.º 9
0
    def _lazy_mkdir(self, parent, name, persistent=False):
        """
        __lazy_mkdir( '/foo', 'dir', True ) creates /foo/dir now and ensures that
        /mnt/persistent/foo/dir is created and bind-mounted into /foo/dir when the box starts.
        Likewise, __lazy_mkdir( '/foo', 'dir', False) creates /foo/dir now and ensures that
        /mnt/ephemeral/foo/dir is created and bind-mounted into /foo/dir when the box starts.

        Note that at start-up time, /mnt/persistent may be reassigned  to /mnt/ephemeral if no
        EBS volume is mounted at /mnt/persistent.

        _lazy_mkdir( '/foo', 'dir', None ) will look up an instance tag named 'persist_foo_dir'
        when the box starts and then behave like _lazy_mkdir( '/foo', 'dir', True ) if that tag's
        value is 'True', or _lazy_mkdir( '/foo', 'dir', False ) if that tag's value is False.
        """
        assert self.lazy_dirs is not None
        assert '/' not in name
        assert parent.startswith('/')
        for location in (persistent_dir, ephemeral_dir):
            assert location.startswith('/')
            assert not location.startswith(parent) and not parent.startswith(
                location)
        logical_path = parent + '/' + name
        sudo('mkdir -p "%s"' % logical_path)
        self.lazy_dirs.add((parent, name, persistent))
        return logical_path
Exemplo n.º 10
0
 def __install_toil(self):
     # FIXME: consider using a virtualenv for Toil like we do for s3am
     # Older versions of pip don't support the 'extra' mechanism used by Toil's setup.py
     pip('install --upgrade pip', use_sudo=True)
     pip(concat('install', self._toil_pip_args()), use_sudo=True)
     self._lazy_mkdir('/var/lib', 'toil', persistent=None)
     sudo('echo "TOIL_WORKDIR=/var/lib/toil" >> /etc/environment')
Exemplo n.º 11
0
 def __setup_mesos( self ):
     sudo( "rm /etc/init/mesos-{master,slave}.conf" )
     self._lazy_mkdir( log_dir, 'mesos', persistent=False )
     self._lazy_mkdir( '/var/lib', 'mesos', persistent=True )
     self.__prepare_credentials( )
     self.__register_upstart_jobs( mesos_services )
     self._post_install_mesos( )
Exemplo n.º 12
0
 def __install_toil( self ):
     # FIXME: consider using a virtualenv for Toil like we do for s3am
     # Older versions of pip don't support the 'extra' mechanism used by Toil's setup.py
     pip( 'install --upgrade pip', use_sudo=True )
     pip( concat( 'install', self._toil_pip_args( ) ), use_sudo=True )
     self._lazy_mkdir( '/var/lib', 'toil', persistent=None )
     sudo( 'echo "TOIL_WORKDIR=/var/lib/toil" >> /etc/environment' )
Exemplo n.º 13
0
    def __install_apache_package( self, path ):
        """
        Download the given file from an Apache download mirror.

        Some mirrors may be down or serve crap, so we may need to retry this a couple of times.
        """
        # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit
        components = path.split( '/' )
        package, tarball = components[ 0 ], components[ -1 ]
        tries = iter( xrange( 3 ) )
        while True:
            try:
                mirror_url = self.__apache_s3_mirror_url( path )
                if run( "curl -Ofs '%s'" % mirror_url, warn_only=True ).failed:
                    mirror_url = self.__apache_official_mirror_url( path )
                    run( "curl -Ofs '%s'" % mirror_url )
                try:
                    sudo( fmt( 'mkdir -p {install_dir}/{package}' ) )
                    sudo( fmt( 'tar -C {install_dir}/{package} '
                               '--strip-components=1 -xzf {tarball}' ) )
                    return
                finally:
                    run( fmt( 'rm {tarball}' ) )
            except SystemExit:
                if next( tries, None ) is None:
                    raise
                else:
                    log.warn( "Could not download or extract the package, retrying ..." )
Exemplo n.º 14
0
 def _setup_build_user(self):
     super(UbuntuGenericJenkinsSlave, self)._setup_build_user()
     sudo("echo 'Defaults:jenkins !requiretty' >> /etc/sudoers")
     for prog in ('apt-get', 'dpkg', 'gdebi'):
         sudo(
             "echo 'jenkins ALL=(ALL) NOPASSWD: /usr/bin/%s' >> /etc/sudoers"
             % prog)
Exemplo n.º 15
0
 def __install_mesos_egg( self ):
     egg = 'mesos-' + self._mesos_egg_version( ) + '-py2.7-linux-x86_64.egg'
     version = self.release( ).version
     run( fmt( 'wget http://downloads.mesosphere.io/master/ubuntu/{version}/{egg}' ) )
     # We need a newer version of protobuf than what comes default on Ubuntu
     pip( 'install --upgrade protobuf', use_sudo=True )
     sudo( 'easy_install -a ' + egg )
     run( 'rm ' + egg )
Exemplo n.º 16
0
 def _setup_docker( self ):
     # The docker and dockerbox init jobs depend on /mnt/persistent which is set up by the
     # mesosbox job. Adding a dependency of the docker job on mesosbox should satsify that
     # dependency.
     super( ToilBoxSupport, self )._setup_docker( )
     with remote_sudo_popen( 'patch -d /lib/systemd/system' ) as patch:
         patch.write( self._docker_patch_heredoc( ) )
         sudo ( "systemctl daemon-reload")
Exemplo n.º 17
0
 def _install_mesos_egg( self ):
     egg = 'mesos-' + self._mesos_egg_version( ) + '-py2.7-linux-x86_64.egg'
     version = self.release( ).version
     run( fmt( 'wget http://downloads.mesosphere.io/master/ubuntu/{version}/{egg}' ) )
     # We need a newer version of protobuf than what comes default on Ubuntu
     pip( 'install --upgrade protobuf', use_sudo=True )
     sudo( 'easy_install -a ' + egg )
     run( 'rm ' + egg )
Exemplo n.º 18
0
 def _setup_build_user( self ):
     super( ToilJenkinsSlave, self )._setup_build_user( )
     # Allow mount and umount such that Toil tests can use an isolated loopback filesystem for
     # TMPDIR (and therefore Toil's work directory), thereby preventing the tracking of
     # left-over files from being skewed by other activities on the ephemeral file system,
     # like build logs, creation of .pyc files, etc.
     for prog in ('mount', 'umount'):
         sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/%s' >> /etc/sudoers" % prog )
Exemplo n.º 19
0
 def _setup_build_user(self):
     super(ToilJenkinsSlave, self)._setup_build_user()
     # Allow mount and umount such that Toil tests can use an isolated loopback filesystem for
     # TMPDIR (and therefore Toil's work directory), thereby preventing the tracking of
     # left-over files from being skewed by other activities on the ephemeral file system,
     # like build logs, creation of .pyc files, etc.
     for prog in ('mount', 'umount'):
         sudo("echo 'jenkins ALL=(ALL) NOPASSWD: /bin/%s' >> /etc/sudoers" %
              prog)
Exemplo n.º 20
0
 def _setup_package_repos( self ):
     assert run( 'test -e /usr/lib/apt/methods/https', warn_only=True ).succeeded, \
         "Need HTTPS support in apt-get in order to install from the Docker repository"
     super( DockerBox, self )._setup_package_repos( )
     sudo( ' '.join( [ 'apt-key', 'adv',
                         '--keyserver', 'hkp://p80.pool.sks-keyservers.net:80',
                         '--recv-keys', '58118E89F3A912897C070ADBF76221572C52609D' ] ) )
     codename = self.release( ).codename
     sudo( fmt( 'echo deb https://apt.dockerproject.org/repo ubuntu-{codename} main '
                '> /etc/apt/sources.list.d/docker.list' ) )
Exemplo n.º 21
0
 def _setup_package_repos( self ):
     assert run( 'test -e /usr/lib/apt/methods/https', warn_only=True ).succeeded, \
         "Need HTTPS support in apt-get in order to install from the Docker repository"
     super( DockerBox, self )._setup_package_repos( )
     sudo( ' '.join( [ 'apt-key', 'adv',
                         '--keyserver', 'hkp://p80.pool.sks-keyservers.net:80',
                         '--recv-keys', '58118E89F3A912897C070ADBF76221572C52609D' ] ) )
     codename = self.release( ).codename
     sudo( fmt( 'echo deb https://apt.dockerproject.org/repo ubuntu-{codename} main '
                '> /etc/apt/sources.list.d/docker.list' ) )
Exemplo n.º 22
0
 def qconf_dict( opt, d=None, file_name='qconf.tmp' ):
     if d:
         # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works
         s = '\n'.join( ' '.join( i ) for i in d.iteritems( ) ) + '\n'
         put( remote_path=file_name, local_path=StringIO( s ) )
         sudo( ' '.join( [ 'qconf', opt, file_name ] ) )
         run( ' '.join( [ 'rm', file_name ] ) )
     else:
         return dict( tuple( ws.split( l, 1 ) )
                          for l in nl.split( run( 'SGE_SINGLE_LINE=1 qconf ' + opt ) )
                          if l and not l.startswith( '#' ) )
Exemplo n.º 23
0
 def qconf_dict( opt, d=None, file_name='qconf.tmp' ):
     if d:
         # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works
         s = '\n'.join( ' '.join( i ) for i in d.iteritems( ) ) + '\n'
         put( remote_path=file_name, local_path=StringIO( s ) )
         sudo( ' '.join( [ 'qconf', opt, file_name ] ) )
         run( ' '.join( [ 'rm', file_name ] ) )
     else:
         return dict( tuple( ws.split( l, 1 ) )
                          for l in nl.split( run( 'SGE_SINGLE_LINE=1 qconf ' + opt ) )
                          if l and not l.startswith( '#' ) )
Exemplo n.º 24
0
    def __install_tools( self ):
        """
        Installs the mesos-master-discovery init script and its companion mesos-tools. The latter
        is a Python package distribution that's included in cgcloud-mesos as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        admin = self.admin_account( )
        sudo( fmt( 'mkdir -p {tools_dir}' ) )
        sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) )
        sudo( fmt( 'virtualenv --no-pip {tools_dir}' ) )
        sudo( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )

        with settings( forward_agent=True ):
            with self._project_artifacts( 'mesos-tools' ) as artifacts:
                pip( use_sudo=True,
                     path=tools_dir + '/bin/pip',
                     args=concat( 'install', artifacts ) )
        sudo( fmt( 'chown -R root:root {tools_dir}' ) )

        mesos_tools = "MesosTools(**%r)" % dict( user=user,
                                                 shared_dir=self._shared_dir( ),
                                                 ephemeral_dir=ephemeral_dir,
                                                 persistent_dir=persistent_dir,
                                                 lazy_dirs=self.lazy_dirs )

        self.lazy_dirs = None  # make sure it can't be used anymore once we are done with it

        self._register_init_script(
            "mesosbox",
            heredoc( """
                description "Mesos master discovery"
                console log
                start on (local-filesystems and net-device-up IFACE!=lo)
                stop on runlevel [!2345]
                pre-start script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.start()
                END
                end script
                post-stop script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.stop()
                END
                end script""" ) )
Exemplo n.º 25
0
 def setup_repo_host_keys(self, user=None):
     #
     # Pre-seed the host keys from bitbucket and github, such that ssh doesn't prompt during
     # the initial checkouts.
     #
     for host in [ 'bitbucket.org', 'github.com' ]:
         command = 'ssh-keyscan -t rsa %s >> ~/.ssh/known_hosts' % host
         if user is None:
             run( command )
         elif user == 'root':
             sudo( command )
         else:
             sudo( command, user=user, sudo_args='-i' )
Exemplo n.º 26
0
 def qconf_dict(opt, d=None, file_name="qconf.tmp"):
     if d:
         # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works
         s = "\n".join(" ".join(i) for i in d.iteritems()) + "\n"
         put(remote_path=file_name, local_path=StringIO(s))
         sudo(" ".join(["qconf", opt, file_name]))
         run(" ".join(["rm", file_name]))
     else:
         return dict(
             tuple(ws.split(l, 1))
             for l in nl.split(run("SGE_SINGLE_LINE=1 qconf " + opt))
             if l and not l.startswith("#")
         )
Exemplo n.º 27
0
    def __install_hadoop(self):
        # Download and extract Hadoop
        path = fmt(
            'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz'
        )
        self._install_apache_package(path, install_dir)

        # Add environment variables to hadoop_env.sh
        hadoop_env = dict(HADOOP_LOG_DIR=self._lazy_mkdir(log_dir, "hadoop"),
                          JAVA_HOME='/usr/lib/jvm/java-8-oracle')
        hadoop_env_sh_path = fmt(
            "{install_dir}/hadoop/etc/hadoop/hadoop-env.sh")
        with remote_open(hadoop_env_sh_path, use_sudo=True) as hadoop_env_sh:
            hadoop_env_sh.write('\n')
            for name, value in hadoop_env.iteritems():
                hadoop_env_sh.write(fmt('export {name}="{value}"\n'))

        # Configure HDFS
        hdfs_dir = var_dir + "/hdfs"
        put(use_sudo=True,
            remote_path=fmt('{install_dir}/hadoop/etc/hadoop/hdfs-site.xml'),
            local_path=StringIO(
                self.__to_hadoop_xml_config({
                    'dfs.replication':
                    str(hdfs_replication),
                    'dfs.permissions':
                    'false',
                    'dfs.name.dir':
                    self._lazy_mkdir(hdfs_dir, 'name', persistent=True),
                    'dfs.data.dir':
                    self._lazy_mkdir(hdfs_dir, 'data', persistent=True),
                    'fs.checkpoint.dir':
                    self._lazy_mkdir(hdfs_dir, 'checkpoint', persistent=True),
                    'dfs.namenode.http-address':
                    'spark-master:50070',
                    'dfs.namenode.secondary.http-address':
                    'spark-master:50090'
                })))

        # Configure Hadoop
        put(use_sudo=True,
            remote_path=fmt('{install_dir}/hadoop/etc/hadoop/core-site.xml'),
            local_path=StringIO(
                self.__to_hadoop_xml_config(
                    {'fs.default.name': 'hdfs://spark-master:8020'})))

        # Make shell auto completion easier
        sudo(fmt('find {install_dir}/hadoop -name "*.cmd" | xargs rm'))

        # Install upstart jobs
        self.__register_upstart_jobs(hadoop_services)
Exemplo n.º 28
0
 def _setup_docker(self):
     for docker_user in set(self._docker_users()):
         sudo("usermod -aG docker " + docker_user)
     prefixes = self._docker_data_prefixes()
     if prefixes:
         prefixes = " ".join(map(quote, prefixes))
         self._run_init_script("docker", "stop")
         # Make sure Docker's aufs backend isn't mounted anymore
         sudo("umount /var/lib/docker/aufs", warn_only=True)
         # Backup initial state of data directory so we can initialize an empty ephemeral volume
         sudo("tar -czC /var/lib docker > /var/lib/docker.tar.gz")
         # Then delete it and recreate it as an empty directory to serve as the bind mount point
         sudo("rm -rf /var/lib/docker && mkdir /var/lib/docker")
         self._register_init_script(
             "dockerbox",
             heredoc(
                 """
                 description "Placement of /var/lib/docker"
                 console log
                 start on starting docker
                 stop on stopped docker
                 pre-start script
                     echo
                     echo "This is the dockerbox pre-start script"
                     set -ex
                     if mountpoint -q /var/lib/docker; then
                         echo "The directory '/var/lib/docker' is already mounted, exiting."
                     else
                         for prefix in {prefixes}; do
                             # Prefix must refer to a separate volume, e.g. ephemeral or EBS
                             if mountpoint -q "$prefix"; then
                                 # Make sure Docker's aufs backend isn't mounted anymore
                                 umount /var/lib/docker/aufs || true
                                 if test -d "$prefix/var/lib/docker"; then
                                     echo "The directory '$prefix/var/lib/docker' already exists, using it."
                                 else
                                     mkdir -p "$prefix/var/lib"
                                     # If /var/lib/docker contains files ...
                                     if python -c 'import os, sys; sys.exit( 0 if os.listdir( sys.argv[1] ) else 1 )' /var/lib/docker; then
                                         # ... move it to prefix ...
                                         mv /var/lib/docker "$prefix/var/lib"
                                         # ... and recreate it as an empty mount point, ...
                                         mkdir -p /var/lib/docker
                                     else
                                         # ... otherwise untar the initial backup.
                                         tar -xzC "$prefix/var/lib" < /var/lib/docker.tar.gz
                                     fi
                                 fi
                                 # Now bind-mount into /var/lib/docker
                                 mount --bind "$prefix/var/lib/docker" /var/lib/docker
                                 break
                             else
                                 echo "The prefix directory '$prefix' is not a mount point, skipping."
                             fi
                         done
                     fi
                 end script"""
             ),
         )
         self._run_init_script("docker", "start")
Exemplo n.º 29
0
 def _setup_docker(self):
     for docker_user in set(self._docker_users()):
         sudo("usermod -aG docker " + docker_user)
     prefixes = self._docker_data_prefixes()
     if prefixes:
         prefixes = ' '.join(map(quote, prefixes))
         self._run_init_script('docker', 'stop')
         # Make sure Docker's aufs backend isn't mounted anymore
         sudo('umount /var/lib/docker/aufs', warn_only=True)
         # Backup initial state of data directory so we can initialize an empty ephemeral volume
         sudo('tar -czC /var/lib docker > /var/lib/docker.tar.gz')
         # Then delete it and recreate it as an empty directory to serve as the bind mount point
         sudo('rm -rf /var/lib/docker && mkdir /var/lib/docker')
         self._register_init_script(
             'dockerbox',
             heredoc("""
                 description "Placement of /var/lib/docker"
                 console log
                 start on starting docker
                 stop on stopped docker
                 pre-start script
                     echo
                     echo "This is the dockerbox pre-start script"
                     set -ex
                     if mountpoint -q /var/lib/docker; then
                         echo "The directory '/var/lib/docker' is already mounted, exiting."
                     else
                         for prefix in {prefixes}; do
                             # Prefix must refer to a separate volume, e.g. ephemeral or EBS
                             if mountpoint -q "$prefix"; then
                                 # Make sure Docker's aufs backend isn't mounted anymore
                                 umount /var/lib/docker/aufs || true
                                 if test -d "$prefix/var/lib/docker"; then
                                     echo "The directory '$prefix/var/lib/docker' already exists, using it."
                                 else
                                     mkdir -p "$prefix/var/lib"
                                     # If /var/lib/docker contains files ...
                                     if python -c 'import os, sys; sys.exit( 0 if os.listdir( sys.argv[1] ) else 1 )' /var/lib/docker; then
                                         # ... move it to prefix ...
                                         mv /var/lib/docker "$prefix/var/lib"
                                         # ... and recreate it as an empty mount point, ...
                                         mkdir -p /var/lib/docker
                                     else
                                         # ... otherwise untar the initial backup.
                                         tar -xzC "$prefix/var/lib" < /var/lib/docker.tar.gz
                                     fi
                                 fi
                                 # Now bind-mount into /var/lib/docker
                                 mount --bind "$prefix/var/lib/docker" /var/lib/docker
                                 break
                             else
                                 echo "The prefix directory '$prefix' is not a mount point, skipping."
                             fi
                         done
                     fi
                 end script"""))
         self._run_init_script('docker', 'start')
Exemplo n.º 30
0
 def __install_sparkbox_tools( self ):
     """
     Installs the spark-master-discovery init script and its companion spark-tools. The latter
     is a Python package distribution that's included in cgcloud-spark as a resource. This is
     in contrast to the cgcloud agent, which is a standalone distribution.
     """
     tools_dir = install_dir + '/tools'
     admin = self.admin_account( )
     sudo( fmt( 'mkdir -p {tools_dir} {persistent_dir} {ephemeral_dir}' ) )
     sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) )
     run( fmt( 'virtualenv --no-pip {tools_dir}' ) )
     run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )
     spark_tools_artifacts = ' '.join( self._project_artifacts( 'spark-tools' ) )
     with settings( forward_agent=True ):
         run( fmt( '{tools_dir}/bin/pip install {spark_tools_artifacts}' ) )
     sudo( fmt( 'chown -R root:root {tools_dir}' ) )
     spark_tools = "SparkTools(**%r)" % dict( user=user,
                                              install_dir=install_dir,
                                              ephemeral_dir=ephemeral_dir,
                                              persistent_dir=persistent_dir,
                                              lazy_dirs=self.lazy_dirs )
     self._register_init_script(
         "sparkbox",
         heredoc( """
             description "Spark/HDFS master discovery"
             console log
             start on runlevel [2345]
             stop on runlevel [016]
             pre-start script
             {tools_dir}/bin/python2.7 - <<END
             import logging
             logging.basicConfig( level=logging.INFO )
             from cgcloud.spark_tools import SparkTools
             spark_tools = {spark_tools}
             spark_tools.start()
             end script
             post-stop script
             {tools_dir}/bin/python2.7 - <<END
             import logging
             logging.basicConfig( level=logging.INFO )
             from cgcloud.spark_tools import SparkTools
             spark_tools = {spark_tools}
             spark_tools.stop()
             END
             end script""" ) )
     script_path = "/usr/local/bin/sparkbox-manage-slaves"
     put( remote_path=script_path, use_sudo=True, local_path=StringIO( heredoc( """
         #!{tools_dir}/bin/python2.7
         import sys
         import logging
         logging.basicConfig( level=logging.INFO )
         from cgcloud.spark_tools import SparkTools
         spark_tools = {spark_tools}
         spark_tools.manage_slaves( slaves_to_add=sys.argv[1:] )""" ) ) )
     sudo( fmt( "chown root:root {script_path} && chmod 755 {script_path}" ) )
Exemplo n.º 31
0
 def __lazy_mkdir( self, parent, name, persistent=False ):
     """
     __lazy_mkdir( '/foo', 'dir', True ) creates /foo/dir now and ensures that
     /mnt/persistent/foo/dir is created and bind-mounted into /foo/dir when the box starts.
     Likewise, __lazy_mkdir( '/foo', 'dir', False) creates /foo/dir now and ensures that
     /mnt/ephemeral/foo/dir is created and bind-mounted into /foo/dir when the box starts.
     Note that at start-up time, /mnt/persistent may be reassigned  to /mnt/ephemeral if no
     EBS volume is mounted at /mnt/persistent.
     """
     assert '/' not in name
     assert parent.startswith( '/' )
     for location in ( persistent_dir, ephemeral_dir ):
         assert location.startswith( '/' )
         assert not location.startswith( parent ) and not parent.startswith( location )
     logical_path = parent + '/' + name
     sudo( 'mkdir -p "%s"' % logical_path )
     self.lazy_dirs.add( ( parent, name, persistent ) )
     return logical_path
Exemplo n.º 32
0
    def __setup_application_user( self ):
        sudo( fmt( 'useradd '
                   '--home /home/{user} '
                   '--create-home '
                   '--user-group '
                   '--shell /bin/bash {user}' ) )

        sudoer_file = heredoc( """
            # CGcloud - MesosBox

            # User rules for ubuntu
            mesosbox ALL=(ALL) NOPASSWD:ALL

            # User rules for ubuntu
            mesosbox ALL=(ALL) NOPASSWD:ALL
            """ )

        sudoer_file_path = '/etc/sudoers.d/89-mesosbox-user'
        put( local_path=StringIO( sudoer_file ), remote_path=sudoer_file_path, use_sudo=True, mode=0440 )
        sudo( "chown root:root '%s'" % sudoer_file_path )
Exemplo n.º 33
0
 def _setup_build_user(self):
     super( CentosRpmbuildJenkinsSlave, self )._setup_build_user( )
     # Some RPM builds depend on the product of other RPM builds to be installed so we need to
     # be able to run rpm in between RPM builds
     sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" )
     sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/rpm' >> /etc/sudoers" )
     sudo( "useradd -s /sbin/nologin mockbuild" ) # goes with the mock package
Exemplo n.º 34
0
 def _setup_package_repos( self ):
     super( SparkBox, self )._setup_package_repos( )
     sudo( 'add-apt-repository -y ppa:webupd8team/java' )
     sudo( 'echo debconf shared/accepted-oracle-license-v1-1 select true '
           '| sudo debconf-set-selections' )
     sudo( 'echo debconf shared/accepted-oracle-license-v1-1 seen true '
           '| sudo debconf-set-selections' )
 def _setup_build_user(self):
     super( CentosRpmbuildJenkinsSlave, self )._setup_build_user( )
     # Some RPM builds depend on the product of other RPM builds to be installed so we need to
     # be able to run rpm in between RPM builds
     sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" )
     sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/rpm' >> /etc/sudoers" )
     sudo( "useradd -s /sbin/nologin mockbuild" ) # goes with the mock package
Exemplo n.º 36
0
    def __install_hadoop( self ):
        # Download and extract Hadoop
        path = fmt( 'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz' )
        self.__install_apache_package( path )

        # Add environment variables to hadoop_env.sh
        hadoop_env = dict(
            HADOOP_LOG_DIR=self._lazy_mkdir( log_dir, "hadoop" ),
            JAVA_HOME='/usr/lib/jvm/java-7-oracle' )
        hadoop_env_sh_path = fmt( "{install_dir}/hadoop/etc/hadoop/hadoop-env.sh" )
        with remote_open( hadoop_env_sh_path, use_sudo=True ) as hadoop_env_sh:
            hadoop_env_sh.write( '\n' )
            for name, value in hadoop_env.iteritems( ):
                hadoop_env_sh.write( fmt( 'export {name}="{value}"\n' ) )

        # Configure HDFS
        hdfs_dir = var_dir + "/hdfs"
        put( use_sudo=True,
             remote_path=fmt( '{install_dir}/hadoop/etc/hadoop/hdfs-site.xml' ),
             local_path=StringIO( self.__to_hadoop_xml_config( {
                 'dfs.replication': str( hdfs_replication ),
                 'dfs.permissions': 'false',
                 'dfs.name.dir': self._lazy_mkdir( hdfs_dir, 'name', persistent=True ),
                 'dfs.data.dir': self._lazy_mkdir( hdfs_dir, 'data', persistent=True ),
                 'fs.checkpoint.dir': self._lazy_mkdir( hdfs_dir, 'checkpoint', persistent=True ),
                 'dfs.namenode.http-address': 'spark-master:50070',
                 'dfs.namenode.secondary.http-address': 'spark-master:50090' } ) ) )

        # Configure Hadoop
        put( use_sudo=True,
             remote_path=fmt( '{install_dir}/hadoop/etc/hadoop/core-site.xml' ),
             local_path=StringIO( self.__to_hadoop_xml_config( {
                 'fs.default.name': 'hdfs://spark-master:8020' } ) ) )

        # Make shell auto completion easier
        sudo( fmt( 'find {install_dir}/hadoop -name "*.cmd" | xargs rm' ) )

        # Install upstart jobs
        self.__register_upstart_jobs( hadoop_services )
Exemplo n.º 37
0
 def _setup_package_repos(self):
     assert run(
         "test -e /usr/lib/apt/methods/https", warn_only=True
     ).succeeded, "Need HTTPS support in apt-get in order to install from the Docker repository"
     super(DockerBox, self)._setup_package_repos()
     sudo(
         " ".join(
             [
                 "apt-key",
                 "adv",
                 "--keyserver",
                 "hkp://p80.pool.sks-keyservers.net:80",
                 "--recv-keys",
                 "58118E89F3A912897C070ADBF76221572C52609D",
             ]
         )
     )
     codename = self.release().codename
     sudo(
         fmt(
             "echo deb https://apt.dockerproject.org/repo ubuntu-{codename} main "
             "> /etc/apt/sources.list.d/docker.list"
         )
     )
Exemplo n.º 38
0
    def _lazy_mkdir( self, parent, name, persistent=False ):
        """
        __lazy_mkdir( '/foo', 'dir', True ) creates /foo/dir now and ensures that
        /mnt/persistent/foo/dir is created and bind-mounted into /foo/dir when the box starts.
        Likewise, __lazy_mkdir( '/foo', 'dir', False) creates /foo/dir now and ensures that
        /mnt/ephemeral/foo/dir is created and bind-mounted into /foo/dir when the box starts.

        Note that at start-up time, /mnt/persistent may be reassigned  to /mnt/ephemeral if no
        EBS volume is mounted at /mnt/persistent.

        _lazy_mkdir( '/foo', 'dir', None ) will look up an instance tag named 'persist_foo_dir'
        when the box starts and then behave like _lazy_mkdir( '/foo', 'dir', True ) if that tag's
        value is 'True', or _lazy_mkdir( '/foo', 'dir', False ) if that tag's value is False.
        """
        assert self.lazy_dirs is not None
        assert '/' not in name
        assert parent.startswith( '/' )
        for location in (persistent_dir, ephemeral_dir):
            assert location.startswith( '/' )
            assert not location.startswith( parent ) and not parent.startswith( location )
        logical_path = parent + '/' + name
        sudo( 'mkdir -p "%s"' % logical_path )
        self.lazy_dirs.add( (parent, name, persistent) )
        return logical_path
Exemplo n.º 39
0
    def __install_mesosbox_tools( self ):
        """
        Installs the mesos-master-discovery init script and its companion mesos-tools. The latter
        is a Python package distribution that's included in cgcloud-mesos as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        sudo( fmt( 'mkdir -p {tools_dir}') )
        sudo( fmt( 'virtualenv --no-pip {tools_dir}' ) )
        sudo( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )

        mesos_tools_artifacts = ' '.join( self._project_artifacts( 'mesos-tools' ) )
        with settings( forward_agent=True ):
            sudo( fmt( '{tools_dir}/bin/pip install {mesos_tools_artifacts}' ) )

        mesos_tools = "MesosTools(**%r)" % dict(user=user )
        self._register_init_script(
            "mesosbox",
            heredoc( """
                description "Mesos master discovery"
                console log
                start on runlevel [2345]
                stop on runlevel [016]
                pre-start script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.start()
                END
                end script
                post-stop script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.stop()
                END
                end script""" ) )
Exemplo n.º 40
0
    def __setup_agent(self):
        availability_zone = self.ctx.availability_zone
        namespace = self.ctx.namespace
        ec2_keypair_globs = ' '.join(
            shell.quote(_) for _ in self.ec2_keypair_globs)
        accounts = ' '.join([self.admin_account()] + self.other_accounts())
        admin_account = self.admin_account()
        run_dir = '/var/run/cgcloudagent'
        log_dir = '/var/log'
        install_dir = '/opt/cgcloudagent'

        # Lucid & CentOS 5 have an ancient pip
        pip('install --upgrade pip==1.5.2', use_sudo=True)
        pip('install --upgrade virtualenv', use_sudo=True)
        sudo(fmt('mkdir -p {install_dir}'))
        sudo(fmt('chown {admin_account}:{admin_account} {install_dir}'))
        # By default, virtualenv installs the latest version of pip. We want a specific
        # version, so we tell virtualenv not to install pip and then install that version of
        # pip using easy_install.
        run(fmt('virtualenv --no-pip {install_dir}'))
        run(fmt('{install_dir}/bin/easy_install pip==1.5.2'))

        with settings(forward_agent=True):
            venv_pip = install_dir + '/bin/pip'
            if self._enable_agent_metrics():
                pip(path=venv_pip, args='install psutil==3.4.1')
            with self._project_artifacts('agent') as artifacts:
                pip(
                    path=venv_pip,
                    args=concat(
                        'install',
                        '--allow-external',
                        'argparse',  # needed on CentOS 5 and 6
                        artifacts))

        sudo(fmt('mkdir {run_dir}'))
        script = self.__gunzip_base64_decode(
            run(
                fmt('{install_dir}/bin/cgcloudagent'
                    ' --init-script'
                    ' --zone {availability_zone}'
                    ' --namespace {namespace}'
                    ' --accounts {accounts}'
                    ' --keypairs {ec2_keypair_globs}'
                    ' --user root'
                    ' --group root'
                    ' --pid-file {run_dir}/cgcloudagent.pid'
                    ' --log-spill {log_dir}/cgcloudagent.out'
                    '| gzip -c | base64')))
        self._register_init_script('cgcloudagent', script)
        self._run_init_script('cgcloudagent')
Exemplo n.º 41
0
    def __setup_agent( self ):
        availability_zone = self.ctx.availability_zone
        namespace = self.ctx.namespace
        ec2_keypair_globs = ' '.join( shell.quote( _ ) for _ in self.ec2_keypair_globs )
        accounts = ' '.join( [ self.admin_account( ) ] + self.other_accounts( ) )
        admin_account = self.admin_account( )
        run_dir = '/var/run/cgcloudagent'
        log_dir = '/var/log'
        install_dir = '/opt/cgcloudagent'

        # Lucid & CentOS 5 have an ancient pip
        pip( 'install --upgrade pip==1.5.2', use_sudo=True )
        pip( 'install --upgrade virtualenv', use_sudo=True )
        sudo( fmt( 'mkdir -p {install_dir}' ) )
        sudo( fmt( 'chown {admin_account}:{admin_account} {install_dir}' ) )
        # By default, virtualenv installs the latest version of pip. We want a specific
        # version, so we tell virtualenv not to install pip and then install that version of
        # pip using easy_install.
        run( fmt( 'virtualenv --no-pip {install_dir}' ) )
        run( fmt( '{install_dir}/bin/easy_install pip==1.5.2' ) )

        with settings( forward_agent=True ):
            venv_pip = install_dir + '/bin/pip'
            if self._enable_agent_metrics( ):
                pip( path=venv_pip, args='install psutil==3.4.1' )
            with self._project_artifacts( 'agent' ) as artifacts:
                pip( path=venv_pip,
                     args=concat( 'install',
                                  '--allow-external', 'argparse',  # needed on CentOS 5 and 6
                                  artifacts ) )

        sudo( fmt( 'mkdir {run_dir}' ) )
        script = self.__gunzip_base64_decode( run( fmt(
            '{install_dir}/bin/cgcloudagent'
            ' --init-script'
            ' --zone {availability_zone}'
            ' --namespace {namespace}'
            ' --accounts {accounts}'
            ' --keypairs {ec2_keypair_globs}'
            ' --user root'
            ' --group root'
            ' --pid-file {run_dir}/cgcloudagent.pid'
            ' --log-spill {log_dir}/cgcloudagent.out'
            '| gzip -c | base64' ) ) )
        self._register_init_script( 'cgcloudagent', script )
        self._run_init_script( 'cgcloudagent' )
Exemplo n.º 42
0
    def __install_spark(self):
        # Download and extract Spark
        path = fmt(
            'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz'
        )
        self._install_apache_package(path, install_dir)

        spark_dir = var_dir + "/spark"

        # Add environment variables to spark_env.sh
        spark_env_sh_path = fmt("{install_dir}/spark/conf/spark-env.sh")
        sudo(fmt("cp {spark_env_sh_path}.template {spark_env_sh_path}"))
        spark_env = dict(
            SPARK_LOG_DIR=self._lazy_mkdir(log_dir, "spark"),
            SPARK_WORKER_DIR=self._lazy_mkdir(spark_dir, "work"),
            SPARK_LOCAL_DIRS=self._lazy_mkdir(spark_dir, "local"),
            JAVA_HOME='/usr/lib/jvm/java-8-oracle',
            SPARK_MASTER_IP='spark-master',
            HADOOP_CONF_DIR=fmt("{install_dir}/hadoop/etc/hadoop"))
        with remote_open(spark_env_sh_path, use_sudo=True) as spark_env_sh:
            spark_env_sh.write('\n')
            for name, value in spark_env.iteritems():
                spark_env_sh.write(fmt('export {name}="{value}"\n'))

        # Configure Spark properties
        spark_defaults = {
            'spark.eventLog.enabled': 'true',
            'spark.eventLog.dir': self._lazy_mkdir(spark_dir, "history"),
            'spark.master': 'spark://spark-master:7077'
        }
        spark_defaults_conf_path = fmt(
            "{install_dir}/spark/conf/spark-defaults.conf")
        sudo(
            fmt("cp {spark_defaults_conf_path}.template {spark_defaults_conf_path}"
                ))
        with remote_open(spark_defaults_conf_path,
                         use_sudo=True) as spark_defaults_conf:
            for name, value in spark_defaults.iteritems():
                spark_defaults_conf.write(fmt("{name}\t{value}\n"))

        # Make shell auto completion easier
        sudo(fmt('find {install_dir}/spark -name "*.cmd" | xargs rm'))

        # Install upstart jobs
        self.__register_upstart_jobs(spark_services)
Exemplo n.º 43
0
    def __install_spark( self ):
        # Download and extract Spark
        path = fmt( 'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz' )
        self._install_apache_package( path, install_dir )

        spark_dir = var_dir + "/spark"

        # Add environment variables to spark_env.sh
        spark_env_sh_path = fmt( "{install_dir}/spark/conf/spark-env.sh" )
        sudo( fmt( "cp {spark_env_sh_path}.template {spark_env_sh_path}" ) )
        spark_env = dict(
            SPARK_LOG_DIR=self._lazy_mkdir( log_dir, "spark" ),
            SPARK_WORKER_DIR=self._lazy_mkdir( spark_dir, "work" ),
            SPARK_LOCAL_DIRS=self._lazy_mkdir( spark_dir, "local" ),
            JAVA_HOME='/usr/lib/jvm/java-8-oracle',
            SPARK_MASTER_IP='spark-master',
            HADOOP_CONF_DIR=fmt( "{install_dir}/hadoop/etc/hadoop" ),
            SPARK_PUBLIC_DNS="$(curl -s http://169.254.169.254/latest/meta-data/public-hostname)" )
        with remote_open( spark_env_sh_path, use_sudo=True ) as spark_env_sh:
            spark_env_sh.write( '\n' )
            for name, value in spark_env.iteritems( ):
                spark_env_sh.write( fmt( 'export {name}="{value}"\n' ) )

        # Configure Spark properties
        spark_defaults = {
            'spark.eventLog.enabled': 'true',
            'spark.eventLog.dir': self._lazy_mkdir( spark_dir, "history" ),
            'spark.master': 'spark://spark-master:7077'
        }
        spark_defaults_conf_path = fmt( "{install_dir}/spark/conf/spark-defaults.conf" )
        sudo( fmt( "cp {spark_defaults_conf_path}.template {spark_defaults_conf_path}" ) )
        with remote_open( spark_defaults_conf_path, use_sudo=True ) as spark_defaults_conf:
            for name, value in spark_defaults.iteritems( ):
                spark_defaults_conf.write( fmt( "{name}\t{value}\n" ) )

        # Make shell auto completion easier
        sudo( fmt( 'find {install_dir}/spark -name "*.cmd" | xargs rm' ) )

        # Install upstart jobs
        self.__register_upstart_jobs( spark_services )
Exemplo n.º 44
0
    def __configure_slurm(self):
        """
        Configures SLURM in a single-node configuration with text-file accounting
        :return:
        """
        # Create munge key and start
        sudo('/usr/sbin/create-munge-key')
        sudo('/usr/sbin/service munge start')

        slurm_acct_file = '/var/log/slurm-llnl/slurm-acct.txt'

        # Default values placed into compute node config, will be replaced by pre script
        slurm_conf = heredoc("""
            ClusterName=jenkins-testing
            ControlMachine=localhost
            SlurmUser=slurm
            SlurmctldPort=6817
            SlurmdPort=6818
            StateSaveLocation=/tmp
            SlurmdSpoolDir=/tmp/slurmd
            SwitchType=switch/none
            MpiDefault=none
            SlurmctldPidFile=/var/run/slurmctld.pid
            SlurmdPidFile=/var/run/slurmd.pid
            ProctrackType=proctrack/pgid
            CacheGroups=0
            ReturnToService=0
            SlurmctldTimeout=300
            SlurmdTimeout=300
            InactiveLimit=0
            MinJobAge=300
            KillWait=30
            Waittime=0
            SchedulerType=sched/backfill
            SelectType=select/cons_res
            FastSchedule=1

            # LOGGING
            SlurmctldDebug=3
            SlurmdDebug=3
            JobCompType=jobcomp/none

            # ACCOUNTING
            AccountingStorageLoc={slurm_acct_file}
            AccountingStorageType=accounting_storage/filetxt
            AccountingStoreJobComment=YES
            JobAcctGatherFrequency=30
            JobAcctGatherType=jobacct_gather/linux

            # COMPUTE NODES
            NodeName=localhost CPUs=1 State=UNKNOWN RealMemory=256
            PartitionName=debug Nodes=localhost Default=YES MaxTime=INFINITE State=UP
        """)
        slurm_conf_tmp = '/tmp/slurm.conf'
        slurm_conf_file = '/etc/slurm-llnl/slurm.conf'
        # Put config file in: /etc/slurm-llnl/slurm.conf
        put(remote_path=slurm_conf_tmp, local_path=StringIO(slurm_conf))
        sudo('mkdir -p /etc/slurm-llnl')
        sudo('mv %s %s' % (slurm_conf_tmp, slurm_conf_file))
        sudo('chown root:root %s' % slurm_conf_file)

        # Touch the accounting job file and make sure it's owned by slurm user
        sudo('mkdir -p /var/log/slurm-llnl')
        sudo('touch %s' % slurm_acct_file)
        sudo('chown slurm:slurm %s' % slurm_acct_file)
        sudo('chmod 644 %s' % slurm_acct_file)

        # Register an init-script that sets the CPUs and RealMemory in slurm.conf
        # slurm.conf needs cpus and memory in order to handle jobs with these resource requests
        self._register_init_script(
            'slurm-llnl-pre',
            heredoc("""
            description "Slurm pre-start configuration"
            console log
            start on filesystem
            pre-start script
                CPUS=$(/usr/bin/nproc)
                MEMORY=$(cat /proc/meminfo | grep MemTotal | awk '{{print $2, "/ 1024"}}' | bc)
                sed -i "s/CPUs=[0-9]\+/CPUs=${{CPUS}}/" {slurm_conf_file}
                sed -i "s/RealMemory=[0-9]\+/RealMemory=${{MEMORY}}/" {slurm_conf_file}
            end script"""))

        # Start slurm services
        self._run_init_script('slurm-llnl-pre')
        self._run_init_script('slurm-llnl')

        # Ensure partition is up
        sudo('scontrol update NodeName=localhost State=Down')
        sudo('scontrol update NodeName=localhost State=Resume')
Exemplo n.º 45
0
    def __install_tools( self ):
        """
        Installs the spark-master-discovery init script and its companion spark-tools. The latter
        is a Python package distribution that's included in cgcloud-spark as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        admin = self.admin_account( )
        sudo( fmt( 'mkdir -p {tools_dir}' ) )
        sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) )
        run( fmt( 'virtualenv --no-pip {tools_dir}' ) )
        run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )

        with settings( forward_agent=True ):
            with self._project_artifacts( 'spark-tools' ) as artifacts:
                pip( use_sudo=True,
                     path=tools_dir + '/bin/pip',
                     args=concat( 'install', artifacts ) )
        sudo( fmt( 'chown -R root:root {tools_dir}' ) )

        spark_tools = "SparkTools(**%r)" % dict( user=user,
                                                 shared_dir=self._shared_dir( ),
                                                 install_dir=install_dir,
                                                 ephemeral_dir=ephemeral_dir,
                                                 persistent_dir=persistent_dir,
                                                 lazy_dirs=self.lazy_dirs )

        self.lazy_dirs = None  # make sure it can't be used anymore once we are done with it

        self._register_init_script(
            "sparkbox",
            heredoc( """
                description "Spark/HDFS master discovery"
                console log
                start on (local-filesystems and net-device-up IFACE!=lo)
                stop on runlevel [!2345]
                pre-start script
                for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.spark_tools import SparkTools
                spark_tools = {spark_tools}
                spark_tools.start()
                END
                then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1
                end script
                post-stop script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.spark_tools import SparkTools
                spark_tools = {spark_tools}
                spark_tools.stop()
                END
                end script""" ) )

        script_path = "/usr/local/bin/sparkbox-manage-slaves"
        put( remote_path=script_path, use_sudo=True, local_path=StringIO( heredoc( """
            #!{tools_dir}/bin/python2.7
            import sys
            import logging
            # Prefix each log line to make it more obvious that it's the master logging when the
            # slave calls this script via ssh.
            logging.basicConfig( level=logging.INFO,
                                 format="manage_slaves: " + logging.BASIC_FORMAT )
            from cgcloud.spark_tools import SparkTools
            spark_tools = {spark_tools}
            spark_tools.manage_slaves( slaves_to_add=sys.argv[1:] )""" ) ) )
        sudo( fmt( "chown root:root {script_path} && chmod 755 {script_path}" ) )
Exemplo n.º 46
0
 def __disable_mesos_daemons(self):
     for daemon in ('master', 'slave'):
         sudo('echo manual > /etc/init/mesos-%s.override' % daemon)
Exemplo n.º 47
0
 def _setup_package_repos( self ):
     super( SparkBox, self )._setup_package_repos( )
     sudo( 'add-apt-repository -y ppa:webupd8team/java' )
Exemplo n.º 48
0
 def __setup_application_user( self ):
     sudo( fmt( 'useradd '
                '--home /home/{user} '
                '--create-home '
                '--user-group '
                '--shell /bin/bash {user}' ) )
Exemplo n.º 49
0
 def __prepare_credentials( self ):
     # Create the credentials file and transfer ownership to mesosbox
     sudo( 'mkdir -p /etc/mesos' )
     sudo( 'echo toil liot > /etc/mesos/credentials' )
     sudo( 'chown mesosbox:mesosbox /etc/mesos/credentials' )
Exemplo n.º 50
0
 def __install_parasol( self ):
     run( "git clone [email protected]:BD2KGenomics/parasol-binaries.git" )
     sudo( "cp parasol-binaries/* /usr/local/bin" )
Exemplo n.º 51
0
 def __install_parasol(self):
     run("git clone https://github.com/BD2KGenomics/parasol-binaries.git")
     sudo("cp parasol-binaries/* /usr/local/bin")
     run("rm -rf parasol-binaries")
Exemplo n.º 52
0
 def __disable_mesos_daemons( self ):
     for daemon in ('master', 'slave'):
         sudo( 'echo manual > /etc/init/mesos-%s.override' % daemon )
Exemplo n.º 53
0
    def __install_tools(self):
        """
        Installs the spark-master-discovery init script and its companion spark-tools. The latter
        is a Python package distribution that's included in cgcloud-spark as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        admin = self.admin_account()
        sudo(fmt('mkdir -p {tools_dir}'))
        sudo(fmt('chown {admin}:{admin} {tools_dir}'))
        run(fmt('virtualenv --no-pip {tools_dir}'))
        run(fmt('{tools_dir}/bin/easy_install pip==1.5.2'))

        with settings(forward_agent=True):
            with self._project_artifacts('spark-tools') as artifacts:
                pip(use_sudo=True,
                    path=tools_dir + '/bin/pip',
                    args=concat('install', artifacts))
        sudo(fmt('chown -R root:root {tools_dir}'))

        spark_tools = "SparkTools(**%r)" % dict(user=user,
                                                shared_dir=self._shared_dir(),
                                                install_dir=install_dir,
                                                ephemeral_dir=ephemeral_dir,
                                                persistent_dir=persistent_dir,
                                                lazy_dirs=self.lazy_dirs)

        self.lazy_dirs = None  # make sure it can't be used anymore once we are done with it

        self._register_init_script(
            "sparkbox",
            heredoc("""
                description "Spark/HDFS master discovery"
                console log
                start on (local-filesystems and net-device-up IFACE!=lo)
                stop on runlevel [!2345]
                pre-start script
                for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.spark_tools import SparkTools
                spark_tools = {spark_tools}
                spark_tools.start()
                END
                then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1
                end script
                post-stop script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.spark_tools import SparkTools
                spark_tools = {spark_tools}
                spark_tools.stop()
                END
                end script"""))

        script_path = "/usr/local/bin/sparkbox-manage-slaves"
        put(remote_path=script_path,
            use_sudo=True,
            local_path=StringIO(
                heredoc("""
            #!{tools_dir}/bin/python2.7
            import sys
            import logging
            # Prefix each log line to make it more obvious that it's the master logging when the
            # slave calls this script via ssh.
            logging.basicConfig( level=logging.INFO,
                                 format="manage_slaves: " + logging.BASIC_FORMAT )
            from cgcloud.spark_tools import SparkTools
            spark_tools = {spark_tools}
            spark_tools.manage_slaves( slaves_to_add=sys.argv[1:] )""")))
        sudo(fmt("chown root:root {script_path} && chmod 755 {script_path}"))
Exemplo n.º 54
0
    def __configure_gridengine( self ):
        """
        Configure the GridEngine daemons (master and exec) and creata a default queue. Ensure
        that the queue is updated to reflect the number of cores actually available.
        """

        ws = re.compile( r'\s+' )
        nl = re.compile( r'[\r\n]+' )

        def qconf( opt, **kwargs ):
            return qconf_dict( opt, kwargs )

        def qconf_dict( opt, d=None, file_name='qconf.tmp' ):
            if d:
                # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works
                s = '\n'.join( ' '.join( i ) for i in d.iteritems( ) ) + '\n'
                put( remote_path=file_name, local_path=StringIO( s ) )
                sudo( ' '.join( [ 'qconf', opt, file_name ] ) )
                run( ' '.join( [ 'rm', file_name ] ) )
            else:
                return dict( tuple( ws.split( l, 1 ) )
                                 for l in nl.split( run( 'SGE_SINGLE_LINE=1 qconf ' + opt ) )
                                 if l and not l.startswith( '#' ) )

        # Add the user defined in fname to the Sun Grid Engine cluster.
        qconf( '-Auser', name=Jenkins.user, oticket='0', fshare='0', delete_time='0',
               default_project='NONE' )

        # Adds users to Sun Grid Engine user access lists (ACLs).
        sudo( 'qconf -au %s arusers' % Jenkins.user )

        # Add hosts hostname to the list of hosts allowed to submit Sun Grid Engine jobs and
        # control their behavior only.
        sudo( 'qconf -as localhost' )

        # Remove all currently defined execution hosts
        run( 'for i in `qconf -sel`; do sudo qconf -de $i ; done' )

        # Add an execution host
        qconf( '-Ae', hostname='localhost', load_scaling='NONE', complex_values='NONE',
               user_lists='arusers', xuser_lists='NONE', projects='NONE', xprojects='NONE',
               usage_scaling='NONE', report_variables='NONE' )

        # Add a parallel environment
        qconf( '-Ap', pe_name='smp', slots='999', user_lists='NONE', xuser_lists='NONE',
               start_proc_args='/bin/true', stop_proc_args='/bin/true', allocation_rule='$pe_slots',
               control_slaves='FALSE', job_is_first_task='TRUE', urgency_slots='min',
               accounting_summary='FALSE' )

        # Add a queue, the slots and processors will be adjusted dynamically, by an init script
        qconf( '-Aq', qname='all.q', processors='1', slots='1', hostlist='localhost', seq_no='0',
               load_thresholds='np_load_avg=1.75', suspend_thresholds='NONE', nsuspend='1',
               suspend_interval='00:05:00', priority='0', min_cpu_interval='00:05:00',
               qtype='BATCH INTERACTIVE', ckpt_list='NONE', pe_list='make smp', rerun='FALSE',
               tmpdir='/tmp', shell='/bin/bash', prolog='NONE', epilog='NONE',
               shell_start_mode='posix_compliant', starter_method='NONE', suspend_method='NONE',
               resume_method='NONE', terminate_method='NONE', notify='00:00:60', owner_list='NONE',
               user_lists='arusers', xuser_lists='NONE', subordinate_list='NONE',
               complex_values='NONE', projects='NONE', xprojects='NONE', calendar='NONE',
               initial_state='default', s_rt='INFINITY', h_rt='INFINITY', s_cpu='INFINITY',
               h_cpu='INFINITY', s_fsize='INFINITY', h_fsize='INFINITY', s_data='INFINITY',
               h_data='INFINITY', s_stack='INFINITY', h_stack='INFINITY', s_core='INFINITY',
               h_core='INFINITY', s_rss='INFINITY', h_rss='INFINITY', s_vmem='INFINITY',
               h_vmem='INFINITY' )

        # Enable on-demand scheduling. This will eliminate the long time that jobs spend waiting
        # in the qw state. There is no -Asconf so we have to fake it using -ssconf and -Msconf.
        sconf = qconf( '-ssconf' )
        sconf.update( dict( flush_submit_sec='1', flush_finish_sec='1',
                            schedule_interval='0:0:1' ) )
        qconf_dict( '-Msconf', sconf )

        # Enable immediate flushing of the accounting file. The SGE batch system in Toil uses the
        #  qacct program to determine the exit code of a finished job. The qacct program reads
        # the accounting file. By default, this file is written to every 15 seconds which means
        # that it may take up to 15 seconds before a finished job is seen by Toil. An
        # accounting_flush_time value of 00:00:00 causes the accounting file to be flushed
        # immediately, allowing qacct to report the status of finished jobs immediately. Again,
        # there is no -Aconf, so we fake it with -sconf and -Mconf. Also, the file name has to be
        # 'global'.
        conf = qconf( '-sconf' )
        params = dict( tuple( e.split( '=' ) ) for e in conf[ 'reporting_params' ].split( ' ' ) )
        params[ 'accounting_flush_time' ] = '00:00:00'
        conf[ 'reporting_params' ] = ' '.join( '='.join( e ) for e in params.iteritems( ) )
        qconf_dict( '-Mconf', conf, file_name='global' )

        # Register an init-script that ensures GridEngine uses localhost instead of hostname
        path = '/var/lib/gridengine/default/common/'
        self._register_init_script( 'gridengine-pre', heredoc( """
            description "GridEngine pre-start configuration"
            console log
            start on filesystem
            pre-start script
                echo localhost > {path}/act_qmaster ; chown sgeadmin:sgeadmin {path}/act_qmaster
                echo localhost `hostname -f` > {path}/host_aliases
            end script""" ) )

        # Register an init-script that adjust the queue config to reflect the number of cores
        self._register_init_script( 'gridengine-post', heredoc( """
            description "GridEngine post-start configuration"
            console log
            # I would rather depend on the gridengine daemons but don't know how as they are
            # started by SysV init scripts. Supposedly the 'rc' job is run last.
            start on started rc
            pre-start script
                cores=$(grep -c '^processor' /proc/cpuinfo)
                qconf -mattr queue processors $cores `qselect`
                qconf -mattr queue slots $cores `qselect`
            end script""" ) )

        # Run pre-start script
        for daemon in ('exec', 'master'):
            sudo( '/etc/init.d/gridengine-%s stop' % daemon )
        sudo( "killall -9 -r 'sge_.*'" )  # the exec daemon likes to hang
        self._run_init_script( 'gridengine-pre' )
        for daemon in ('master', 'exec'):
            sudo( '/etc/init.d/gridengine-%s start' % daemon )

        # Run post-start script
        self._run_init_script( 'gridengine-post' )
        while 'execd is in unknown state' in run( 'qstat -f -q all.q -explain a', warn_only=True ):
            time.sleep( 1 )
Exemplo n.º 55
0
    def _setup_docker( self ):
        for docker_user in set( self._docker_users( ) ):
            sudo( "usermod -aG docker " + docker_user )
        prefixes = self._docker_data_prefixes( )
        if prefixes:
            prefixes = ' '.join( map( quote, prefixes ) )
            setup_docker_script = heredoc( """
                #!/bin/sh
                echo
                echo "This is the dockerbox pre-start script"
                set -ex
                if mountpoint -q /var/lib/docker; then
                    echo "The directory '/var/lib/docker' is already mounted, exiting."
                else
                    for prefix in {prefixes}; do
                        # Prefix must refer to a separate volume, e.g. ephemeral or EBS
                        if mountpoint -q "$prefix"; then
                            # Make sure Docker's aufs backend isn't mounted anymore
                            umount /var/lib/docker/aufs || true
                            if test -d "$prefix/var/lib/docker"; then
                                echo "The directory '$prefix/var/lib/docker' already exists, using it."
                            else
                                mkdir -p "$prefix/var/lib"
                                # If /var/lib/docker contains files ...
                                if python -c 'import os, sys; sys.exit( 0 if os.listdir( sys.argv[1] ) else 1 )' /var/lib/docker; then
                                    # ... move it to prefix ...
                                    mv /var/lib/docker "$prefix/var/lib"
                                    # ... and recreate it as an empty mount point, ...
                                    mkdir -p /var/lib/docker
                                else
                                    # ... otherwise untar the initial backup.
                                    tar -xzC "$prefix/var/lib" < /var/lib/docker.tar.gz
                                fi
                            fi
                            # Now bind-mount into /var/lib/docker
                            mount --bind "$prefix/var/lib/docker" /var/lib/docker
                            break
                        else
                            echo "The prefix directory '$prefix' is not a mount point, skipping."
                        fi
                    done
                fi""" )

            dockerbox_path = '/usr/sbin/dockerbox-setup.sh'
            systemd_heredoc = heredoc( """
                [Unit]
                Description=Placement of /var/lib/docker
                Requires=docker.service
                After=docker.service

                [Service]
                Type=simple
                ExecStart={dockerbox_path}

                [Install]
                WantedBy=docker.service
            """)

            self._run_init_script( 'docker', 'stop' )
            # Make sure Docker's aufs backend isn't mounted anymore
            sudo( 'umount /var/lib/docker/aufs', warn_only=True )
            # Backup initial state of data directory so we can initialize an empty ephemeral volume
            sudo( 'tar -czC /var/lib docker > /var/lib/docker.tar.gz' )
            # Then delete it and recreate it as an empty directory to serve as the bind mount point
            sudo( 'rm -rf /var/lib/docker && mkdir /var/lib/docker' )

            # Pick the init script based on system settings
            put( local_path=StringIO( setup_docker_script ), remote_path=dockerbox_path, use_sudo=True )
            sudo( "chown root:root '%s'" % dockerbox_path )
            sudo( "chmod +x '%s'" % dockerbox_path )
            self._register_init_script(
                'dockerbox',
                systemd_heredoc )
            self._run_init_script( 'docker', 'start' )
Exemplo n.º 56
0
    def __configure_gridengine(self):
        """
        Configure the GridEngine daemons (master and exec) and creata a default queue. Ensure
        that the queue is updated to reflect the number of cores actually available.
        """

        ws = re.compile(r'\s+')
        nl = re.compile(r'[\r\n]+')

        def qconf(opt, **kwargs):
            return qconf_dict(opt, kwargs)

        def qconf_dict(opt, d=None, file_name='qconf.tmp'):
            if d:
                # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works
                s = '\n'.join(' '.join(i) for i in d.iteritems()) + '\n'
                put(remote_path=file_name, local_path=StringIO(s))
                sudo(' '.join(['qconf', opt, file_name]))
                run(' '.join(['rm', file_name]))
            else:
                return dict(
                    tuple(ws.split(l, 1))
                    for l in nl.split(run('SGE_SINGLE_LINE=1 qconf ' + opt))
                    if l and not l.startswith('#'))

        # Add the user defined in fname to the Sun Grid Engine cluster.
        qconf('-Auser',
              name=Jenkins.user,
              oticket='0',
              fshare='0',
              delete_time='0',
              default_project='NONE')

        # Adds users to Sun Grid Engine user access lists (ACLs).
        sudo('qconf -au %s arusers' % Jenkins.user)

        # Add hosts hostname to the list of hosts allowed to submit Sun Grid Engine jobs and
        # control their behavior only.
        sudo('qconf -as localhost')

        # Remove all currently defined execution hosts
        run('for i in `qconf -sel`; do sudo qconf -de $i ; done')

        # Add an execution host
        qconf('-Ae',
              hostname='localhost',
              load_scaling='NONE',
              complex_values='NONE',
              user_lists='arusers',
              xuser_lists='NONE',
              projects='NONE',
              xprojects='NONE',
              usage_scaling='NONE',
              report_variables='NONE')

        # Add a parallel environment
        qconf('-Ap',
              pe_name='smp',
              slots='999',
              user_lists='NONE',
              xuser_lists='NONE',
              start_proc_args='/bin/true',
              stop_proc_args='/bin/true',
              allocation_rule='$pe_slots',
              control_slaves='FALSE',
              job_is_first_task='TRUE',
              urgency_slots='min',
              accounting_summary='FALSE')

        # Add a queue, the slots and processors will be adjusted dynamically, by an init script
        qconf('-Aq',
              qname='all.q',
              processors='1',
              slots='1',
              hostlist='localhost',
              seq_no='0',
              load_thresholds='np_load_avg=1.75',
              suspend_thresholds='NONE',
              nsuspend='1',
              suspend_interval='00:05:00',
              priority='0',
              min_cpu_interval='00:05:00',
              qtype='BATCH INTERACTIVE',
              ckpt_list='NONE',
              pe_list='make smp',
              rerun='FALSE',
              tmpdir='/tmp',
              shell='/bin/bash',
              prolog='NONE',
              epilog='NONE',
              shell_start_mode='posix_compliant',
              starter_method='NONE',
              suspend_method='NONE',
              resume_method='NONE',
              terminate_method='NONE',
              notify='00:00:60',
              owner_list='NONE',
              user_lists='arusers',
              xuser_lists='NONE',
              subordinate_list='NONE',
              complex_values='NONE',
              projects='NONE',
              xprojects='NONE',
              calendar='NONE',
              initial_state='default',
              s_rt='INFINITY',
              h_rt='INFINITY',
              s_cpu='INFINITY',
              h_cpu='INFINITY',
              s_fsize='INFINITY',
              h_fsize='INFINITY',
              s_data='INFINITY',
              h_data='INFINITY',
              s_stack='INFINITY',
              h_stack='INFINITY',
              s_core='INFINITY',
              h_core='INFINITY',
              s_rss='INFINITY',
              h_rss='INFINITY',
              s_vmem='INFINITY',
              h_vmem='INFINITY')

        # Enable on-demand scheduling. This will eliminate the long time that jobs spend waiting
        # in the qw state. There is no -Asconf so we have to fake it using -ssconf and -Msconf.
        sconf = qconf('-ssconf')
        sconf.update(
            dict(flush_submit_sec='1',
                 flush_finish_sec='1',
                 schedule_interval='0:0:1'))
        qconf_dict('-Msconf', sconf)

        # Enable immediate flushing of the accounting file. The SGE batch system in Toil uses the
        #  qacct program to determine the exit code of a finished job. The qacct program reads
        # the accounting file. By default, this file is written to every 15 seconds which means
        # that it may take up to 15 seconds before a finished job is seen by Toil. An
        # accounting_flush_time value of 00:00:00 causes the accounting file to be flushed
        # immediately, allowing qacct to report the status of finished jobs immediately. Again,
        # there is no -Aconf, so we fake it with -sconf and -Mconf. Also, the file name has to be
        # 'global'.
        conf = qconf('-sconf')
        params = dict(
            tuple(e.split('=')) for e in conf['reporting_params'].split(' '))
        params['accounting_flush_time'] = '00:00:00'
        conf['reporting_params'] = ' '.join('='.join(e)
                                            for e in params.iteritems())
        qconf_dict('-Mconf', conf, file_name='global')

        # Register an init-script that ensures GridEngine uses localhost instead of hostname
        path = '/var/lib/gridengine/default/common/'
        self._register_init_script(
            'gridengine-pre',
            heredoc("""
            description "GridEngine pre-start configuration"
            console log
            start on filesystem
            pre-start script
                echo localhost > {path}/act_qmaster ; chown sgeadmin:sgeadmin {path}/act_qmaster
                echo localhost `hostname -f` > {path}/host_aliases
            end script"""))

        # Register an init-script that adjust the queue config to reflect the number of cores
        self._register_init_script(
            'gridengine-post',
            heredoc("""
            description "GridEngine post-start configuration"
            console log
            # I would rather depend on the gridengine daemons but don't know how as they are
            # started by SysV init scripts. Supposedly the 'rc' job is run last.
            start on started rc
            pre-start script
                cores=$(grep -c '^processor' /proc/cpuinfo)
                qconf -mattr queue processors $cores `qselect`
                qconf -mattr queue slots $cores `qselect`
            end script"""))

        # Run pre-start script
        for daemon in ('exec', 'master'):
            sudo('/etc/init.d/gridengine-%s stop' % daemon)
        sudo("killall -9 -r 'sge_.*'",
             warn_only=True)  # the exec daemon likes to hang
        self._run_init_script('gridengine-pre')
        for daemon in ('master', 'exec'):
            sudo('/etc/init.d/gridengine-%s start' % daemon)

        # Run post-start script
        self._run_init_script('gridengine-post')
        while 'execd is in unknown state' in run(
                'qstat -f -q all.q -explain a', warn_only=True):
            time.sleep(1)
Exemplo n.º 57
0
    def __install_tools( self ):
        """
        Installs the mesos-master-discovery init script and its companion mesos-tools. The latter
        is a Python package distribution that's included in cgcloud-mesos as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        admin = self.admin_account( )
        sudo( fmt( 'mkdir -p {tools_dir}' ) )
        sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) )
        run( fmt( 'virtualenv --no-pip {tools_dir}' ) )
        run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )

        with settings( forward_agent=True ):
            with self._project_artifacts( 'mesos-tools' ) as artifacts:
                pip( use_sudo=True,
                     path=tools_dir + '/bin/pip',
                     args=concat( 'install', artifacts ) )
        sudo( fmt( 'chown -R root:root {tools_dir}' ) )

        mesos_tools = "MesosTools(**%r)" % dict( user=user,
                                                 shared_dir=self._shared_dir( ),
                                                 ephemeral_dir=ephemeral_dir,
                                                 persistent_dir=persistent_dir,
                                                 lazy_dirs=self.lazy_dirs )

        self.lazy_dirs = None  # make sure it can't be used anymore once we are done with it

        mesosbox_start_path = '/usr/sbin/mesosbox-start.sh'
        mesosbox_stop_path = '/usr/sbin/mesosbox-stop.sh'
        systemd_heredoc = heredoc( """
            [Unit]
            Description=Mesos master discovery
            Requires=networking.service network-online.target
            After=networking.service network-online.target

            [Service]
            Type=simple
            ExecStart={mesosbox_start_path}
            RemainAfterExit=true
            ExecStop={mesosbox_stop_path}

            [Install]
            WantedBy=multi-user.target
            """ )

        mesosbox_setup_start_script = heredoc( """
                #!/bin/sh
                for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.start()
                END
                then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1""" )

        mesosbox_setup_stop_script = heredoc ("""
                #!/{tools_dir}/bin/python2.7
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.stop()""" )

        put( local_path=StringIO( mesosbox_setup_start_script ), remote_path=mesosbox_start_path, use_sudo=True )
        sudo( "chown root:root '%s'" % mesosbox_start_path )
        sudo( "chmod +x '%s'" % mesosbox_start_path )

        put( local_path=StringIO( mesosbox_setup_stop_script ), remote_path=mesosbox_stop_path, use_sudo=True )
        sudo( "chown root:root '%s'" % mesosbox_stop_path )
        sudo( "chmod +x '%s'" % mesosbox_stop_path )

        self._register_init_script(
            "mesosbox",
            systemd_heredoc )

        # Enable mesosbox to start on boot
        sudo( "systemctl enable mesosbox" )

        # Explicitly start the mesosbox service to achieve creation of lazy directoriess right
        # now. This makes a generic mesosbox useful for adhoc tests that involve Mesos and Toil.
        self._run_init_script( 'mesosbox' )