Exemplo n.º 1
0
    def prepare_dataset(self, comb):
        """Prepare the dataset to be used in the next set of experiments.

        Args:
          comb (dict): The combination containing the dataset's parameters.

        Returns:
          dict: The dataset parameters.

        """

        # Create ds_comb
        (ds_class_name, ds_params) = self.comb_manager.get_ds_class_params(comb)

        local_path = ds_params["local_path"]
        remote_path = os.path.join(self.div_p2p.remote_dir,
                                   os.path.basename(local_path))

        ds_comb = {"ds.class.path": remote_path, "ds.class": ds_class_name}

        # Copy dataset to host
        logger.info(self._th_prefix() + "Prepare dataset with combination " +
                    str(self.comb_manager.get_ds_parameters(comb)))

        copy_code = TaktukPut([self.div_p2p.host], [local_path], remote_path)
        copy_code.run()

        # Notify stats manager
        self.stats_manager.add_ds(self.ds_id, comb)

        return ds_comb
Exemplo n.º 2
0
    def _copy_conf(self, conf_dir, hosts=None):

        if not hosts:
            hosts = self.hosts

        conf_files = [os.path.join(conf_dir, f) for f in os.listdir(conf_dir)]

        action = TaktukPut(hosts, conf_files, self.conf_dir)
        action.run()

        if not action.finished_ok:
            logger.warn("Error while copying configuration")
            if not action.ended:
                action.kill()
Exemplo n.º 3
0
    def setup(self):
        """Setup the cluster of hosts. Optionally deploy env and then copy the
        executable jar to all the nodes.
        """

        self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend)

        if self.use_kadeploy:
            (deployed, undeployed) = self.deploy_nodes()
            return (len(deployed) != 0)

        copy_code = TaktukPut(self.hosts, [self.jar_file], self.remote_dir)
        copy_code.run()

        return True
Exemplo n.º 4
0
    def bootstrap(self, tar_file):

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages,
                                      self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; " +
                "apt-get update && apt-get install -y --force-yes " +
                required_packages, self.hosts).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess(
            'echo $(readlink -f /usr/bin/javac | '
            'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy Hive tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote(
            "rm -rf " + self.base_dir + " " + self.conf_dir + " " +
            self.warehouse_dir + " " + self.logs_dir, self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") +
            " " + self.base_dir, self.hosts)
        mkdirs = TaktukRemote(
            "mkdir -p " + self.conf_dir + " && mkdir -p " + self.warehouse_dir,
            self.hosts)
        chmods = TaktukRemote(
            "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir +
            " && chmod g+w " + self.warehouse_dir, self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 3. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hive-env.sh << EOF\n"
        command += "JAVA_HOME=" + self.java_home + "\n"
        command += "HIVE_HOME=" + self.base_dir + "\n"
        command += "HIVE_CONF_DIR=" + self.conf_dir + "\n"
        command += "HADOOP_HOME=" + self.hc.base_dir + "\n"
        command += "EOF\n"
        command += "chmod +x " + self.conf_dir + "/hive-env.sh"
        action = Remote(command, self.hosts)
        action.run()
Exemplo n.º 5
0
    def _copy_conf(self, conf_dir, hosts=None):
        """Copy configuration files from given dir to remote dir in cluster
        hosts.
        
        Args:
          conf_dir (str):
            The remote configuration dir.
          hosts (list of Host, optional):
            The list of hosts where the configuration is going to be copied. If
            not specified, all the hosts of the Hadoop cluster are used.
        """

        if not hosts:
            hosts = self.hosts

        conf_files = [os.path.join(conf_dir, f) for f in os.listdir(conf_dir)]

        action = TaktukPut(hosts, conf_files, self.conf_dir)
        action.run()

        if not action.finished_ok:
            logger.warn("Error while copying configuration")
            if not action.ended:
                action.kill()
Exemplo n.º 6
0
    def _copy_conf(self, conf_dir, hosts=None):
        """Copy configuration files from given dir to remote dir in cluster
        hosts.

        Args:
          conf_dir (str):
            The remote configuration dir.
          hosts (list of Host, optional):
            The list of hosts where the configuration is going to be copied. If
            not specified, all the hosts of the Spark cluster are used.
        """

        if not hosts:
            hosts = self.hosts

        conf_files = [os.path.join(conf_dir, f) for f in os.listdir(conf_dir)]

        action = TaktukPut(hosts, conf_files, self.conf_dir)
        action.run()

        if not action.finished_ok:
            logger.warn("Error while copying configuration")
            if not action.ended:
                action.kill()
Exemplo n.º 7
0
    def bootstrap(self, tar_file):

        # 0. Check requirements
        java_major_version = 7
        if not check_java_version(java_major_version, self.hosts):
            msg = "Java 1.%d+ required" % java_major_version
            logger.error(msg)
            raise SparkException(msg)

        self.java_home = get_java_home(self.master)

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote("rm -rf " + self.base_dir + " " + self.conf_dir,
                               self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        rm_tar = TaktukRemote("rm /tmp/" + os.path.basename(tar_file),
                              self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf, rm_tar]).run()

        # 2. Move installation to base dir
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tgz", "") + " " +
            self.base_dir, self.hosts)
        mkdirs = TaktukRemote(
            "mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir,
            self.hosts)
        chmods = TaktukRemote(
            "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir +
            " && chmod g+w " + self.logs_dir, self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 2.1. Create spark-events dir
        if self.evs_log_dir:
            if self.evs_log_dir.startswith("file://") or \
                            "://" not in self.evs_log_dir:
                mk_evs_dir = TaktukRemote(
                    "mkdir -p " + self.evs_log_dir + " && chmod g+w " +
                    self.evs_log_dir, self.hosts)
                mk_evs_dir.run()
            elif self.evs_log_dir.startswith("hdfs://"):
                self.hc.execute("fs -mkdir -p " + self.evs_log_dir)

        # 3. Specify environment variables
        env_file = self.conf_dir + "/spark-env.sh"

        command = "cat >> " + env_file + " << EOF\n"
        command += "JAVA_HOME=" + self.java_home + "\n"
        command += "SPARK_LOG_DIR=" + self.logs_dir + "\n"
        if self.hc:
            command += "HADOOP_CONF_DIR=" + self.hc.conf_dir + "\n"
        if self.mode == YARN_MODE:
            command += "YARN_CONF_DIR=" + self.hc.conf_dir + "\n"
        command += "EOF\n"
        command += "echo SPARK_PUBLIC_DNS=$(hostname) >> " + env_file
        command += " && chmod +x " + env_file
        action = Remote(command, self.hosts)
        action.run()

        # 4. Generate initial configuration
        self._initialize_conf()
Exemplo n.º 8
0
    def bootstrap(self, tar_file):
        """Install Hadoop in all cluster nodes from the specified tar.gz file.
        
        Args:
          tar_file (str):
            The file containing Hadoop binaries.
        """

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages,
                                      self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; " +
                "apt-get update && apt-get install -y --force-yes " +
                required_packages, self.hosts).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess('echo $(readlink -f /usr/bin/javac | '
                                   'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = Remote("rm -rf " + self.base_dir +
                         " " + self.conf_dir +
                         " " + self.logs_dir +
                         " " + self.hadoop_temp_dir,
                         self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir and create other dirs
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" +
            os.path.basename(tar_file).replace(".tar.gz", "") + " " +
            self.base_dir,
            self.hosts)
        mkdirs = TaktukRemote("mkdir -p " + self.conf_dir +
                              " && mkdir -p " + self.logs_dir +
                              " && mkdir -p " + self.hadoop_temp_dir,
                              self.hosts)
        chmods = TaktukRemote("chmod g+w " + self.base_dir +
                              " && chmod g+w " + self.conf_dir +
                              " && chmod g+w " + self.logs_dir +
                              " && chmod g+w " + self.hadoop_temp_dir,
                              self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 4. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n"
        command += "export JAVA_HOME=" + self.java_home + "\n"
        command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n"
        command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n"
        command += "EOF"
        action = Remote(command, self.hosts)
        action.run()

        # 5. Check version
        return self._check_version_compliance()
Exemplo n.º 9
0
    def bootstrap(self, tar_file):
        """Install Hadoop in all cluster nodes from the specified tar.gz file.
        
        Args:
          tar_file (str):
            The file containing Hadoop binaries.
        """

        # 0. Check requirements
        java_major_version = 7
        if not check_java_version(java_major_version, self.hosts):
            msg = "Java 1.%d+ required" % java_major_version
            logger.error(msg)
            raise HadoopException(msg)

        self.java_home = get_java_home(self.master)

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote(
            "rm -rf " + self.base_dir + " " + self.conf_dir + " " +
            self.logs_dir + " " + self.hadoop_temp_dir, self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        rm_tar = TaktukRemote("rm /tmp/" + os.path.basename(tar_file),
                              self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf, rm_tar]).run()

        # 2. Move installation to base dir and create other dirs
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") +
            " " + self.base_dir, self.hosts)
        mkdirs = TaktukRemote(
            "mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir +
            " && mkdir -p " + self.hadoop_temp_dir, self.hosts)
        chmods = TaktukRemote(
            "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir +
            " && chmod g+w " + self.logs_dir + " && chmod g+w " +
            self.hadoop_temp_dir, self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 4. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n"
        command += "export JAVA_HOME=" + self.java_home + "\n"
        command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n"
        command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n"
        command += "EOF"
        action = Remote(command, self.hosts)
        action.run()

        # 5. Check version (cannot do it before)
        if not self._check_version_compliance():
            return False

        # 6. Generate initial configuration
        self._initialize_conf()

        return True