Пример #1
0
    def bootstrap(self, tar_file):

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages,
                                      self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; " +
                "apt-get update && apt-get install -y --force-yes " +
                required_packages, self.hosts).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess(
            'echo $(readlink -f /usr/bin/javac | '
            'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy Hive tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote(
            "rm -rf " + self.base_dir + " " + self.conf_dir + " " +
            self.warehouse_dir + " " + self.logs_dir, self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") +
            " " + self.base_dir, self.hosts)
        mkdirs = TaktukRemote(
            "mkdir -p " + self.conf_dir + " && mkdir -p " + self.warehouse_dir,
            self.hosts)
        chmods = TaktukRemote(
            "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir +
            " && chmod g+w " + self.warehouse_dir, self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 3. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hive-env.sh << EOF\n"
        command += "JAVA_HOME=" + self.java_home + "\n"
        command += "HIVE_HOME=" + self.base_dir + "\n"
        command += "HIVE_CONF_DIR=" + self.conf_dir + "\n"
        command += "HADOOP_HOME=" + self.hc.base_dir + "\n"
        command += "EOF\n"
        command += "chmod +x " + self.conf_dir + "/hive-env.sh"
        action = Remote(command, self.hosts)
        action.run()
Пример #2
0
    def bootstrap(self, tar_file):

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages,
                                      self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; " +
                "apt-get update && apt-get install -y --force-yes " +
                required_packages, self.hosts).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess('echo $(readlink -f /usr/bin/javac | '
                                   'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote("rm -rf " + self.base_dir +
                               " " + self.conf_dir,
                               self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tgz", "") + " " +
            self.base_dir,
            self.hosts)
        mkdirs = TaktukRemote("mkdir -p " + self.conf_dir, self.hosts)
        chmods = TaktukRemote("chmod g+w " + self.base_dir +
                              " && chmod g+w " + self.conf_dir,
                              self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 3. Specify environment variables
        command = "cat >> " + self.conf_dir + "/spark-env.sh << EOF\n"
        command += "JAVA_HOME=" + self.java_home + "\n"
        command += "SPARK_LOG_DIR=" + self.logs_dir + "\n"
        if self.hc:
            command += "HADOOP_CONF_DIR=" + self.hc.conf_dir + "\n"
        if self.mode == YARN_MODE:
            command += "YARN_CONF_DIR=" + self.hc.conf_dir + "\n"
        command += "EOF\n"
        command += "chmod +x " + self.conf_dir + "/spark-env.sh"
        action = Remote(command, self.hosts)
        action.run()
Пример #3
0
def check_java_version(java_major_version, hosts):

    tr = TaktukRemote("java -version 2>&1 | grep version", hosts)
    tr.run()

    for p in tr.processes:
        match = re.match('.*[^.0-9]1\.([0-9]+).[0-9].*', p.stdout)
        version = int(match.group(1))
        if java_major_version > version:
            msg = "Java 1.%d+ required" % java_major_version
            return False

    return True
Пример #4
0
def check_java_version(java_major_version, hosts):

    tr = TaktukRemote("java -version 2>&1 | grep version", hosts)
    tr.run()

    for p in tr.processes:
        match = re.match('.*[^.0-9]1\.([0-9]+).[0-9].*', p.stdout)
        version = int(match.group(1))
        if java_major_version > version:
            msg = "Java 1.%d+ required" % java_major_version
            return False

    return True
Пример #5
0
    def bootstrap(self, tar_file):
        """Install Cassandra in all cluster nodes from the specified tar.gz file.

        Args:
          tar_file (str):
            The file containing Cassandra binaries.
        """

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages, self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; "
                + "apt-get update && apt-get install -y --force-yes "
                + required_packages,
                self.hosts,
            ).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess("echo $(readlink -f /usr/bin/javac | " 'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote("rm -rf " + self.base_dir + " " + self.conf_dir + " " + self.logs_dir, self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote("tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir and create other dirs
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hosts
        )
        mkdirs = TaktukRemote("mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir, self.hosts)
        chmods = TaktukRemote(
            "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir + " && chmod g+w " + self.logs_dir,
            self.hosts,
        )
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()
Пример #6
0
    def start(self):

        self._check_initialization()

        logger.info("Starting Cassandra")

        if self.running_cassandra:
            logger.warn("Cassandra was already started")
            return

        proc = TaktukRemote(self.bin_dir + "/cassandra", self.hosts)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while starting Cassandra")
        else:
            self.running_cassandra = True
            self.running = True
Пример #7
0
    def bootstrap(self, tar_file):
        """Install MongoDB in all cluster nodes from the specified tgz file.

        Args:
          tar_file (str):
            The file containing MongoDB binaries.
        """

        # 1. Copy mongo tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_files = TaktukRemote("rm -rf " + self.base_dir +
                                " " + self.conf_dir +
                                " " + self.data_dir +
                                " " + self.logs_dir,
                                self.hosts)

        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote("tar xf /tmp/" + os.path.basename(tar_file) +
                              " -C /tmp", self.hosts)
        SequentialActions([rm_files, put_tar, tar_xf]).run()

        # 2. Move installation to base dir
        logger.info("Create installation directories")
        action = Remote(
            "mv /tmp/" +
            os.path.basename(tar_file).replace(".tgz", "") + " " +
            self.base_dir,
            self.hosts)
        action.run()

        # 3 Create other dirs
        mkdirs = TaktukRemote("mkdir -p " + self.data_dir +
                              " && mkdir -p " + self.conf_dir +
                              " && mkdir -p " + self.logs_dir +
                              " && touch " + os.path.join(self.conf_dir,
                                                          CONF_FILE),
                              self.hosts)
        mkdirs.run()

        # 4. Generate initial configuration
        self._initialize_conf()
Пример #8
0
    def stop(self):
        """Stop MongoDB servers."""

        self._check_initialization()

        logger.info("Stopping MongoDB")

        proc = TaktukRemote(self.bin_dir + "/mongod "
                            "--shutdown "
                            "--dbpath " + self.data_dir,
                            self.hosts)
        proc.run()

        proc = TaktukRemote("kill $(more " + self.mongos_pid_file + ")",
                            [self.master])
        proc.run()

        self.running = False
Пример #9
0
    def load(self, hc, dest, desired_size=None):
        """Load the dataset in the given dfs folder by copying it from the
        local folder.
        
        Args:
          hc (HadoopCluster):
            The Hadoop cluster where to deploy the dataset.
          dest (str):
            The dfs destination folder.
          desired_size (int, optional):
            The size of the data to be copied. If indicated only the first files
            of the dataset up to the given size are copied, if not, the whole
            dataset is transferred.
        """

        dataset_files = [os.path.join(self.local_path, f) for f in
                         os.listdir(self.local_path)]
        hosts = hc.hosts

        # Define and create temp dir
        tmp_dir = "/tmp" + dest
        action_remove = TaktukRemote("rm -rf " + tmp_dir, hosts)
        action_remove.run()
        action_create = TaktukRemote("mkdir -p " + tmp_dir, hosts)
        action_create.run()

        # Generate list of files to copy
        if desired_size:
            all_files_to_copy = []
            dataset_files.sort()
            real_size = 0
            while real_size < desired_size:
                if dataset_files:
                    all_files_to_copy.append(dataset_files[0])
                    real_size += os.path.getsize(dataset_files[0])
                    del dataset_files[0]
                else:
                    logger.warn(
                        "Dataset files do not fill up to desired size "
                        "(real size = " + str(real_size) + ")")
                    break

        else:
            real_size = 0
            all_files_to_copy = dataset_files
            for f in all_files_to_copy:
                real_size += os.path.getsize(f)

        # Assign files to hosts
        files_per_host = [[]] * len(hosts)
        for idx in range(0, len(hosts)):
            files_per_host[idx] = all_files_to_copy[idx::len(hosts)]

        # Create threads and launch them
        logger.info(
            "Loading dataset in parallel into " + str(len(hosts)) + " hosts")
        if not hc.running:
            hc.start()

        class SizeCollector:
            size = 0
            lock = threading.Lock()

            def __init__(self):
                pass

            def increment(self, qty):
                self.lock.acquire()
                try:
                    self.size += qty
                finally:
                    self.lock.release()

        def copy_function(host, files_to_copy, collector=None):
            action = Put([host], files_to_copy, tmp_dir)
            action.run()

            local_final_size = 0

            for f in files_to_copy:
                src_file = os.path.join(tmp_dir, os.path.basename(f))
                if self.pre_load_function:
                    src_file = self.pre_load_function(src_file, host)

                    action = SshProcess("du -b " + src_file + "| cut -f1", host)
                    action.run()

                    local_final_size += int(action.stdout.strip())

                hc.execute("fs -put " + src_file + " " +
                           os.path.join(dest, os.path.basename(src_file)),
                           host, True, False)

            if collector:
                collector.increment(local_final_size)

        if self.pre_load_function:
            final_size = SizeCollector()
        else:
            final_size = None

        threads = []
        for idx, h in enumerate(hosts):
            if files_per_host[idx]:
                t = threading.Thread(target=copy_function,
                                     args=(h, files_per_host[idx], final_size))
                t.start()
                threads.append(t)

        # Wait for the threads to finish
        for t in threads:
            t.join()

        logger.info("Loading completed: real local size = " + str(real_size) +
                    ", final remote size = " + str(final_size.size))

        self.deployments[hc, desired_size] = dest
Пример #10
0
    def load(self, hc, dest, desired_size=None):
        """Load the dataset in the given dfs folder by copying it from the
        local folder.
        
        Args:
          hc (HadoopCluster):
            The Hadoop cluster where to deploy the dataset.
          dest (str):
            The dfs destination folder.
          desired_size (int, optional):
            The size of the data to be copied. If indicated only the first files
            of the dataset up to the given size are copied, if not, the whole
            dataset is transferred.
        """

        dataset_files = [os.path.join(self.local_path, f) for f in
                         os.listdir(self.local_path)]
        hosts = hc.hosts

        # Define and create temp dir
        tmp_dir = "/tmp" + dest
        action_remove = TaktukRemote("rm -rf " + tmp_dir, hosts)
        action_remove.run()
        action_create = TaktukRemote("mkdir -p " + tmp_dir, hosts)
        action_create.run()

        # Generate list of files to copy
        if desired_size:
            all_files_to_copy = []
            dataset_files.sort()
            real_size = 0
            while real_size < desired_size:
                if dataset_files:
                    all_files_to_copy.append(dataset_files[0])
                    real_size += os.path.getsize(dataset_files[0])
                    del dataset_files[0]
                else:
                    logger.warn(
                        "Dataset files do not fill up to desired size "
                        "(real size = " + str(real_size) + ")")
                    break

        else:
            real_size = 0
            all_files_to_copy = dataset_files
            for f in all_files_to_copy:
                real_size += os.path.getsize(f)

        # Assign files to hosts
        files_per_host = [[]] * len(hosts)
        for idx in range(0, len(hosts)):
            files_per_host[idx] = all_files_to_copy[idx::len(hosts)]

        # Create threads and launch them
        logger.info(
            "Loading dataset in parallel into " + str(len(hosts)) + " hosts")
        if not hc.running:
            hc.start()

        class SizeCollector:
            size = 0
            lock = threading.Lock()

            def __init__(self):
                pass

            def increment(self, qty):
                self.lock.acquire()
                try:
                    self.size += qty
                finally:
                    self.lock.release()

        def copy_function(host, files_to_copy, collector=None):
            action = Put([host], files_to_copy, tmp_dir)
            action.run()

            local_final_size = 0

            for f in files_to_copy:
                src_file = os.path.join(tmp_dir, os.path.basename(f))
                if self.pre_load_function:
                    src_file = self.pre_load_function(src_file, host)

                    action = SshProcess("du -b " + src_file + "| cut -f1", host)
                    action.run()

                    local_final_size += int(action.stdout.strip())

                hc.execute("fs -put " + src_file + " " +
                           os.path.join(dest, os.path.basename(src_file)),
                           host, True, False)

            if collector:
                collector.increment(local_final_size)

        if self.pre_load_function:
            final_size = SizeCollector()
        else:
            final_size = None

        threads = []
        for idx, h in enumerate(hosts):
            if files_per_host[idx]:
                t = threading.Thread(target=copy_function,
                                     args=(h, files_per_host[idx], final_size))
                t.start()
                threads.append(t)

        # Wait for the threads to finish
        for t in threads:
            t.join()

        logger.info("Loading completed: real local size = " + str(real_size) +
                    ", final remote size = " + str(final_size.size))

        self.deployments[hc, desired_size] = dest
Пример #11
0
def check_packages(packages, hosts):
    tr = TaktukRemote("dpkg -s " + packages, hosts)
    for p in tr.processes:
        p.nolog_exit_code = p.nolog_error = True
    tr.run()
    return tr.ok
Пример #12
0
    def bootstrap(self, tar_file):

        # 0. Check requirements
        java_major_version = 7
        if not check_java_version(java_major_version, self.hosts):
            msg = "Java 1.%d+ required" % java_major_version
            logger.error(msg)
            raise SparkException(msg)

        self.java_home = get_java_home(self.master)

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote("rm -rf " + self.base_dir +
                               " " + self.conf_dir,
                               self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        rm_tar = TaktukRemote(
            "rm /tmp/" + os.path.basename(tar_file),
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf, rm_tar]).run()

        # 2. Move installation to base dir
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tgz", "") + " " +
            self.base_dir,
            self.hosts)
        mkdirs = TaktukRemote("mkdir -p " + self.conf_dir +
                              " && mkdir -p " + self.logs_dir,
                              self.hosts)
        chmods = TaktukRemote("chmod g+w " + self.base_dir +
                              " && chmod g+w " + self.conf_dir +
                              " && chmod g+w " + self.logs_dir,
                              self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 2.1. Create spark-events dir
        if self.evs_log_dir:
            if self.evs_log_dir.startswith("file://") or \
                            "://" not in self.evs_log_dir:
                mk_evs_dir = TaktukRemote("mkdir -p " + self.evs_log_dir +
                                          " && chmod g+w " + self.evs_log_dir,
                                          self.hosts)
                mk_evs_dir.run()
            elif self.evs_log_dir.startswith("hdfs://"):
                self.hc.execute("fs -mkdir -p " + self.evs_log_dir)

        # 3. Specify environment variables
        env_file = self.conf_dir + "/spark-env.sh"

        command = "cat >> " + env_file + " << EOF\n"
        command += "JAVA_HOME=" + self.java_home + "\n"
        command += "SPARK_LOG_DIR=" + self.logs_dir + "\n"
        if self.hc:
            command += "HADOOP_CONF_DIR=" + self.hc.conf_dir + "\n"
        if self.mode == YARN_MODE:
            command += "YARN_CONF_DIR=" + self.hc.conf_dir + "\n"
        command += "EOF\n"
        command += "echo SPARK_PUBLIC_DNS=$(hostname) >> " + env_file
        command += " && chmod +x " + env_file
        action = Remote(command, self.hosts)
        action.run()

        # 4. Generate initial configuration
        self._initialize_conf()
Пример #13
0
    def bootstrap(self, tar_file):
        """Install Hadoop in all cluster nodes from the specified tar.gz file.
        
        Args:
          tar_file (str):
            The file containing Hadoop binaries.
        """

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages,
                                      self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; " +
                "apt-get update && apt-get install -y --force-yes " +
                required_packages, self.hosts).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess('echo $(readlink -f /usr/bin/javac | '
                                   'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = Remote("rm -rf " + self.base_dir +
                         " " + self.conf_dir +
                         " " + self.logs_dir +
                         " " + self.hadoop_temp_dir,
                         self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir and create other dirs
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" +
            os.path.basename(tar_file).replace(".tar.gz", "") + " " +
            self.base_dir,
            self.hosts)
        mkdirs = TaktukRemote("mkdir -p " + self.conf_dir +
                              " && mkdir -p " + self.logs_dir +
                              " && mkdir -p " + self.hadoop_temp_dir,
                              self.hosts)
        chmods = TaktukRemote("chmod g+w " + self.base_dir +
                              " && chmod g+w " + self.conf_dir +
                              " && chmod g+w " + self.logs_dir +
                              " && chmod g+w " + self.hadoop_temp_dir,
                              self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 4. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n"
        command += "export JAVA_HOME=" + self.java_home + "\n"
        command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n"
        command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n"
        command += "EOF"
        action = Remote(command, self.hosts)
        action.run()

        # 5. Check version
        return self._check_version_compliance()
Пример #14
0
def check_packages(packages, hosts):
    tr = TaktukRemote("dpkg -s " + packages, hosts)
    for p in tr.processes:
        p.nolog_exit_code = p.nolog_error = True
    tr.run()
    return tr.ok
Пример #15
0
    def bootstrap(self, tar_file):
        """Install Hadoop in all cluster nodes from the specified tar.gz file.
        
        Args:
          tar_file (str):
            The file containing Hadoop binaries.
        """

        # 0. Check requirements
        java_major_version = 7
        if not check_java_version(java_major_version, self.hosts):
            msg = "Java 1.%d+ required" % java_major_version
            logger.error(msg)
            raise HadoopException(msg)

        self.java_home = get_java_home(self.master)

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote(
            "rm -rf " + self.base_dir + " " + self.conf_dir + " " +
            self.logs_dir + " " + self.hadoop_temp_dir, self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        rm_tar = TaktukRemote("rm /tmp/" + os.path.basename(tar_file),
                              self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf, rm_tar]).run()

        # 2. Move installation to base dir and create other dirs
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") +
            " " + self.base_dir, self.hosts)
        mkdirs = TaktukRemote(
            "mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir +
            " && mkdir -p " + self.hadoop_temp_dir, self.hosts)
        chmods = TaktukRemote(
            "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir +
            " && chmod g+w " + self.logs_dir + " && chmod g+w " +
            self.hadoop_temp_dir, self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 4. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n"
        command += "export JAVA_HOME=" + self.java_home + "\n"
        command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n"
        command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n"
        command += "EOF"
        action = Remote(command, self.hosts)
        action.run()

        # 5. Check version (cannot do it before)
        if not self._check_version_compliance():
            return False

        # 6. Generate initial configuration
        self._initialize_conf()

        return True
Пример #16
0
    def bootstrap(self, tar_file):
        """Install Hadoop in all cluster nodes from the specified tar.gz file.
        
        Args:
          tar_file (str):
            The file containing Hadoop binaries.
        """

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages,
                                      self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; " +
                "apt-get update && apt-get install -y --force-yes " +
                required_packages, self.hosts).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess('echo $(readlink -f /usr/bin/javac | '
                                   'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = Remote("rm -rf " + self.base_dir +
                         " " + self.conf_dir +
                         " " + self.logs_dir +
                         " " + self.hadoop_temp_dir,
                         self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir and create other dirs
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" +
            os.path.basename(tar_file).replace(".tar.gz", "") + " " +
            self.base_dir,
            self.hosts)
        mkdirs = TaktukRemote("mkdir -p " + self.conf_dir +
                              " && mkdir -p " + self.logs_dir +
                              " && mkdir -p " + self.hadoop_temp_dir,
                              self.hosts)
        chmods = TaktukRemote("chmod g+w " + self.base_dir +
                              " && chmod g+w " + self.conf_dir +
                              " && chmod g+w " + self.logs_dir +
                              " && chmod g+w " + self.hadoop_temp_dir,
                              self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 4. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n"
        command += "export JAVA_HOME=" + self.java_home + "\n"
        command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n"
        command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n"
        command += "EOF"
        action = Remote(command, self.hosts)
        action.run()

        # 5. Check version
        return self._check_version_compliance()
Пример #17
0
    def bootstrap(self, tar_file):

        # 0. Check requirements
        java_major_version = 7
        if not check_java_version(java_major_version, self.hosts):
            msg = "Java 1.%d+ required" % java_major_version
            logger.error(msg)
            raise SparkException(msg)

        self.java_home = get_java_home(self.master)

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote("rm -rf " + self.base_dir + " " + self.conf_dir,
                               self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        rm_tar = TaktukRemote("rm /tmp/" + os.path.basename(tar_file),
                              self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf, rm_tar]).run()

        # 2. Move installation to base dir
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tgz", "") + " " +
            self.base_dir, self.hosts)
        mkdirs = TaktukRemote(
            "mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir,
            self.hosts)
        chmods = TaktukRemote(
            "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir +
            " && chmod g+w " + self.logs_dir, self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 2.1. Create spark-events dir
        if self.evs_log_dir:
            if self.evs_log_dir.startswith("file://") or \
                            "://" not in self.evs_log_dir:
                mk_evs_dir = TaktukRemote(
                    "mkdir -p " + self.evs_log_dir + " && chmod g+w " +
                    self.evs_log_dir, self.hosts)
                mk_evs_dir.run()
            elif self.evs_log_dir.startswith("hdfs://"):
                self.hc.execute("fs -mkdir -p " + self.evs_log_dir)

        # 3. Specify environment variables
        env_file = self.conf_dir + "/spark-env.sh"

        command = "cat >> " + env_file + " << EOF\n"
        command += "JAVA_HOME=" + self.java_home + "\n"
        command += "SPARK_LOG_DIR=" + self.logs_dir + "\n"
        if self.hc:
            command += "HADOOP_CONF_DIR=" + self.hc.conf_dir + "\n"
        if self.mode == YARN_MODE:
            command += "YARN_CONF_DIR=" + self.hc.conf_dir + "\n"
        command += "EOF\n"
        command += "echo SPARK_PUBLIC_DNS=$(hostname) >> " + env_file
        command += " && chmod +x " + env_file
        action = Remote(command, self.hosts)
        action.run()

        # 4. Generate initial configuration
        self._initialize_conf()
Пример #18
0
    def start(self):
        """Start MongoDB server."""

        self._check_initialization()

        logger.info("Starting MongoDB")

        if self.running:
            logger.warn("MongoDB was already started")
            return

        # Start nodes
        procs = []
        for h in self.hosts:
            mongo_command = (NUMA_PREFIX + " " +
                             self.bin_dir + "/mongod "
                             " --fork "
                             " --config " + os.path.join(self.conf_dir,
                                                         CONF_FILE) +
                             " --bind_ip " + h.address +
                             " --port " + str(self.md_port))

            logger.debug(mongo_command)

            proc = SshProcess(mongo_command, h)
            proc.start()
            procs.append(proc)

        finished_ok = True
        for p in procs:
            p.wait()
            if not p.finished_ok:
                finished_ok = False

        if not finished_ok:
            logger.warn("Error while starting MongoDB")
            return
        else:
            self.running = True

        # Start replication
        if self.do_replication:
            logger.info("Configuring replication")
            mongo_command = "rs.initiate();"
            mongo_command += ';'.join(
                'rs.add("' + h.address + ':' + str(self.md_port) + '")'
                for h in self.hosts)

            logger.debug(mongo_command)

            proc = TaktukRemote(self.bin_dir + "/mongo "
                                "--eval '" + mongo_command + "' " +
                                self.master.address,
                                [self.master])
            proc.run()

            if not proc.finished_ok:
                logger.warn("Not able to start replication")

        if self.do_sharding:
            if not self.initialized_sharding:
                logger.info("Configuring sharding")
                time.sleep(2)
                mongo_command = (
                    'rs.initiate({'
                    '_id : "%s",'
                    'configsvr : true,'
                    'members : [%s]})' % (
                        self.rs_name,
                        ",".join('{ _id : %d, host : "%s:%d" }' %
                                 (_id, h.address, self.md_port)
                                 for (_id, h) in enumerate(self.hosts))
                    )
                )

                logger.debug(mongo_command)

                proc = SshProcess(self.bin_dir + "/mongo " +
                                  "--eval '" + mongo_command + "' " +
                                  self.master.address,
                                  self.master)
                proc.run()
                if proc.finished_ok:
                    self.initialized_sharding = True
                else:
                    logger.warn("Not able to configure sharding")

            logger.info("Starting sharding servers")
            mongo_command = (
                NUMA_PREFIX + " " +
                self.bin_dir + "/mongos"
                " --configdb " + self.rs_name + "/" +
                ",".join('%s:%d' % (h.address, self.md_port)
                         for h in self.hosts) +
                " --bind_ip " + self.master.address +
                " --port " + str(self.ms_port) +
                " --fork"
                " --logpath " + self.logs_dir + "/mongos.log"
                " --pidfilepath " + self.mongos_pid_file
            )

            logger.debug(mongo_command)

            start_ms = TaktukRemote(mongo_command, [self.master])
            start_ms.run()