Пример #1
0
    def prepare_dataset(self, comb):
        """Prepare the dataset to be used in the next set of experiments.

        Args:
          comb (dict): The combination containing the dataset's parameters.

        Returns:
          dict: The dataset parameters.

        """

        # Create ds_comb
        (ds_class_name, ds_params) = self.comb_manager.get_ds_class_params(comb)

        local_path = ds_params["local_path"]
        remote_path = os.path.join(self.div_p2p.remote_dir,
                                   os.path.basename(local_path))

        ds_comb = {"ds.class.path": remote_path, "ds.class": ds_class_name}

        # Copy dataset to host
        logger.info(self._th_prefix() + "Prepare dataset with combination " +
                    str(self.comb_manager.get_ds_parameters(comb)))

        copy_code = TaktukPut([self.div_p2p.host], [local_path], remote_path)
        copy_code.run()

        # Notify stats manager
        self.stats_manager.add_ds(self.ds_id, comb)

        return ds_comb
Пример #2
0
    def _copy_conf(self, conf_dir, hosts=None):

        if not hosts:
            hosts = self.hosts

        conf_files = [os.path.join(conf_dir, f) for f in os.listdir(conf_dir)]

        action = TaktukPut(hosts, conf_files, self.conf_dir)
        action.run()

        if not action.finished_ok:
            logger.warn("Error while copying configuration")
            if not action.ended:
                action.kill()
Пример #3
0
    def setup(self):
        """Setup the cluster of hosts. Optionally deploy env and then copy the
        executable jar to all the nodes.
        """

        self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend)

        if self.use_kadeploy:
            (deployed, undeployed) = self.deploy_nodes()
            return (len(deployed) != 0)

        copy_code = TaktukPut(self.hosts, [self.jar_file], self.remote_dir)
        copy_code.run()

        return True
Пример #4
0
    def _copy_conf(self, conf_dir, hosts=None):
        """Copy configuration files from given dir to remote dir in cluster
        hosts.

        Args:
          conf_dir (str):
            The remote configuration dir.
          hosts (list of Host, optional):
            The list of hosts where the configuration is going to be copied. If
            not specified, all the hosts of the Spark cluster are used.
        """

        if not hosts:
            hosts = self.hosts

        conf_files = [os.path.join(conf_dir, f) for f in os.listdir(conf_dir)]

        action = TaktukPut(hosts, conf_files, self.conf_dir)
        action.run()

        if not action.finished_ok:
            logger.warn("Error while copying configuration")
            if not action.ended:
                action.kill()
Пример #5
0
    def _copy_conf(self, conf_dir, hosts=None):
        """Copy configuration files from given dir to remote dir in cluster
        hosts.
        
        Args:
          conf_dir (str):
            The remote configuration dir.
          hosts (list of Host, optional):
            The list of hosts where the configuration is going to be copied. If
            not specified, all the hosts of the Hadoop cluster are used.
        """

        if not hosts:
            hosts = self.hosts

        conf_files = [os.path.join(conf_dir, f) for f in os.listdir(conf_dir)]

        action = TaktukPut(hosts, conf_files, self.conf_dir)
        action.run()

        if not action.finished_ok:
            logger.warn("Error while copying configuration")
            if not action.ended:
                action.kill()
Пример #6
0
    def bootstrap(self, tar_file):
        """Install Hadoop in all cluster nodes from the specified tar.gz file.
        
        Args:
          tar_file (str):
            The file containing Hadoop binaries.
        """

        # 0. Check requirements
        java_major_version = 7
        if not check_java_version(java_major_version, self.hosts):
            msg = "Java 1.%d+ required" % java_major_version
            logger.error(msg)
            raise HadoopException(msg)

        self.java_home = get_java_home(self.master)

        # 1. Copy hadoop tar file and uncompress
        rm_command = ("rm -rf " + self.base_dir +
                               " " + self.conf_dir +
                               " " + self.logs_dir +
                               " " + self.hadoop_temp_dir)
        rm_dirs = Remote(rm_command, self.hosts)
        logger.info("Cleaning target")
        rm_dirs.run()

        logger.info("Copy " + tar_file + " to hosts")
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        put_tar.run()

        logger.info("Decompressing tar file on hosts")
        tar_xf = Remote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        rm_tar = Remote(
            "rm /tmp/" + os.path.basename(tar_file),
            self.hosts)
        SequentialActions([tar_xf, rm_tar]).run()

        # 2. Move installation to base dir and create other dirs
        logger.info("Create installation directories")
        mv_base_dir = Remote(
            "mv /tmp/" +
            os.path.basename(tar_file).replace(".tar.gz", "") + " " +
            self.base_dir,
            self.hosts)
        mkdirs = Remote("mkdir -p " + self.conf_dir +
                              " && mkdir -p " + self.logs_dir +
                              " && mkdir -p " + self.hadoop_temp_dir,
                              self.hosts)
        chmods = Remote("chmod g+w " + self.base_dir +
                              " && chmod g+w " + self.conf_dir +
                              " && chmod g+w " + self.logs_dir +
                              " && chmod g+w " + self.hadoop_temp_dir,
                              self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 4. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n"
        command += "export JAVA_HOME=" + self.java_home + "\n"
        command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n"
        command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n"
        command += "EOF"
        action = Remote(command, self.hosts)
        action.run()

        # 5. Check version (cannot do it before)
        if not self._check_version_compliance():
            return False

        # 6. Generate initial configuration
        self._initialize_conf()

        return True