def prepare_dataset(self, comb): """Prepare the dataset to be used in the next set of experiments. Args: comb (dict): The combination containing the dataset's parameters. Returns: dict: The dataset parameters. """ # Create ds_comb (ds_class_name, ds_params) = self.comb_manager.get_ds_class_params(comb) local_path = ds_params["local_path"] remote_path = os.path.join(self.div_p2p.remote_dir, os.path.basename(local_path)) ds_comb = {"ds.class.path": remote_path, "ds.class": ds_class_name} # Copy dataset to host logger.info(self._th_prefix() + "Prepare dataset with combination " + str(self.comb_manager.get_ds_parameters(comb))) copy_code = TaktukPut([self.div_p2p.host], [local_path], remote_path) copy_code.run() # Notify stats manager self.stats_manager.add_ds(self.ds_id, comb) return ds_comb
def _copy_conf(self, conf_dir, hosts=None): if not hosts: hosts = self.hosts conf_files = [os.path.join(conf_dir, f) for f in os.listdir(conf_dir)] action = TaktukPut(hosts, conf_files, self.conf_dir) action.run() if not action.finished_ok: logger.warn("Error while copying configuration") if not action.ended: action.kill()
def setup(self): """Setup the cluster of hosts. Optionally deploy env and then copy the executable jar to all the nodes. """ self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend) if self.use_kadeploy: (deployed, undeployed) = self.deploy_nodes() return (len(deployed) != 0) copy_code = TaktukPut(self.hosts, [self.jar_file], self.remote_dir) copy_code.run() return True
def _copy_conf(self, conf_dir, hosts=None): """Copy configuration files from given dir to remote dir in cluster hosts. Args: conf_dir (str): The remote configuration dir. hosts (list of Host, optional): The list of hosts where the configuration is going to be copied. If not specified, all the hosts of the Spark cluster are used. """ if not hosts: hosts = self.hosts conf_files = [os.path.join(conf_dir, f) for f in os.listdir(conf_dir)] action = TaktukPut(hosts, conf_files, self.conf_dir) action.run() if not action.finished_ok: logger.warn("Error while copying configuration") if not action.ended: action.kill()
def _copy_conf(self, conf_dir, hosts=None): """Copy configuration files from given dir to remote dir in cluster hosts. Args: conf_dir (str): The remote configuration dir. hosts (list of Host, optional): The list of hosts where the configuration is going to be copied. If not specified, all the hosts of the Hadoop cluster are used. """ if not hosts: hosts = self.hosts conf_files = [os.path.join(conf_dir, f) for f in os.listdir(conf_dir)] action = TaktukPut(hosts, conf_files, self.conf_dir) action.run() if not action.finished_ok: logger.warn("Error while copying configuration") if not action.ended: action.kill()
def bootstrap(self, tar_file): """Install Hadoop in all cluster nodes from the specified tar.gz file. Args: tar_file (str): The file containing Hadoop binaries. """ # 0. Check requirements java_major_version = 7 if not check_java_version(java_major_version, self.hosts): msg = "Java 1.%d+ required" % java_major_version logger.error(msg) raise HadoopException(msg) self.java_home = get_java_home(self.master) # 1. Copy hadoop tar file and uncompress rm_command = ("rm -rf " + self.base_dir + " " + self.conf_dir + " " + self.logs_dir + " " + self.hadoop_temp_dir) rm_dirs = Remote(rm_command, self.hosts) logger.info("Cleaning target") rm_dirs.run() logger.info("Copy " + tar_file + " to hosts") put_tar = TaktukPut(self.hosts, [tar_file], "/tmp") put_tar.run() logger.info("Decompressing tar file on hosts") tar_xf = Remote( "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts) rm_tar = Remote( "rm /tmp/" + os.path.basename(tar_file), self.hosts) SequentialActions([tar_xf, rm_tar]).run() # 2. Move installation to base dir and create other dirs logger.info("Create installation directories") mv_base_dir = Remote( "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hosts) mkdirs = Remote("mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir + " && mkdir -p " + self.hadoop_temp_dir, self.hosts) chmods = Remote("chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir + " && chmod g+w " + self.logs_dir + " && chmod g+w " + self.hadoop_temp_dir, self.hosts) SequentialActions([mv_base_dir, mkdirs, chmods]).run() # 4. Specify environment variables command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n" command += "export JAVA_HOME=" + self.java_home + "\n" command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n" command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n" command += "EOF" action = Remote(command, self.hosts) action.run() # 5. Check version (cannot do it before) if not self._check_version_compliance(): return False # 6. Generate initial configuration self._initialize_conf() return True