def execute_job(self, job, node=None, verbose=True): """Execute the given Spark job in the specified node. Args: job (SparkJob): The job object. node (Host, optional): The host were the command should be executed. If not provided, self.master is chosen. verbose (bool, optional): If True stdout and stderr of remote process is displayed. Returns (tuple of str): A tuple with the standard and error outputs of the process executing the job. """ if not self.running: logger.warn("The cluster was stopped. Starting it automatically") self.start() if node is None: node = self.master exec_dir = "/tmp" # Copy necessary files to cluster files_to_copy = job.get_files_to_copy() action = Put([node], files_to_copy, exec_dir) action.run() # Get command command = job.get_command(exec_dir) # Execute logger.info("Executing spark job. Command = {" + self.bin_dir + "/spark-submit " + command + "} in " + str(node)) proc = SshProcess(self.bin_dir + "/spark-submit " + command, node) if verbose: red_color = '\033[01;31m' proc.stdout_handlers.append(sys.stdout) proc.stderr_handlers.append( ColorDecorator(sys.stderr, red_color)) proc.start() proc.wait() # Get job info job.stdout = proc.stdout job.stderr = proc.stderr job.success = (proc.exit_code == 0) return proc.stdout, proc.stderr
def execute_job(self, job, node=None, verbose=True): """Execute the given Spark job in the specified node. Args: job (SparkJob): The job object. node (Host, optional): The host were the command should be executed. If not provided, self.master is chosen. verbose (bool, optional): If True stdout and stderr of remote process is displayed. Returns (tuple of str): A tuple with the standard and error outputs of the process executing the job. """ if not self.running: logger.warn("The cluster was stopped. Starting it automatically") self.start() if node is None: node = self.master exec_dir = "/tmp" # Copy necessary files to cluster files_to_copy = job.get_files_to_copy() action = Put([node], files_to_copy, exec_dir) action.run() # Get command command = job.get_command(exec_dir) # Execute logger.info("Executing spark job. Command = {" + self.bin_dir + "/spark-submit " + command + "} in " + str(node)) proc = SshProcess(self.bin_dir + "/spark-submit " + command, node) if verbose: red_color = '\033[01;31m' proc.stdout_handlers.append(sys.stdout) proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color)) proc.start() proc.wait() # Get job info job.stdout = proc.stdout job.stderr = proc.stderr job.success = (proc.exit_code == 0) return proc.stdout, proc.stderr
def change_conf(self, params): """Create a new properties file from configuration and transfer it to the host. Args: params (dict): The parameters of the test. """ # Create a local temporary file with the params (_, temp_file) = tempfile.mkstemp("", "div_p2p-conf-", "/tmp") props = open(temp_file, "w") for key in params: props.write(str(key) + "=" + str(params[key]) + "\n") props.close() # Copy the file to the remote location copy_props = Put([self.host], [temp_file], self.props_path) copy_props.run() # Remove temporary file os.remove(temp_file)
def bootstrap(self, tar_file): # 1. Remove used dirs if existing action = Remote("rm -rf " + self.base_dir, self.hc.hosts) action.run() action = Remote("rm -rf " + self.conf_dir, self.hc.hosts) action.run() # 1. Copy Mahout tar file and uncompress logger.info("Copy " + tar_file + " to hosts and uncompress") action = Put(self.hc.hosts, [tar_file], "/tmp") action.run() action = Remote( "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hc.hosts) action.run() # 2. Move installation to base dir logger.info("Create installation directories") action = Remote( "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hc.hosts) action.run() # 3 Create other dirs action = Remote("mkdir -p " + self.conf_dir, self.hc.hosts) action.run() # 4. Include libraries in Hadoop's classpath list_dirs = SshProcess("ls -1 " + self.base_dir + "/*.jar", self.hc.master) list_dirs.run() libs = " ".join(list_dirs.stdout.splitlines()) action = Remote("cp " + libs + " " + self.hc.base_dir + "/lib", self.hc.hosts) action.run() initialized = True # No need to call initialize()
def copy_function(host, files_to_copy, collector=None): action = Put([host], files_to_copy, tmp_dir) action.run() local_final_size = 0 for f in files_to_copy: src_file = os.path.join(tmp_dir, os.path.basename(f)) if self.pre_load_function: src_file = self.pre_load_function(src_file, host) action = SshProcess("du -b " + src_file + "| cut -f1", host) action.run() local_final_size += int(action.stdout.strip()) hc.execute("fs -put " + src_file + " " + os.path.join(dest, os.path.basename(src_file)), host, True, False) if collector: collector.increment(local_final_size)
def execute_job(self, job, node=None, verbose=True): """Execute the given MapReduce job in the specified node. Args: job (HadoopJarJob): The job object. node (Host, optional): The host were the command should be executed. If not provided, self.master is chosen. verbose (bool, optional): If True stdout and stderr of remote process is displayed. Returns (tuple of str): A tuple with the standard and error outputs of the process executing the job. """ self._check_initialization() if not self.running: logger.warn("The cluster was stopped. Starting it automatically") self.start() if not node: node = self.master exec_dir = "/tmp" # Copy necessary files to cluster files_to_copy = job.get_files_to_copy() action = Put([node], files_to_copy, exec_dir) action.run() # Get command command = job.get_command(exec_dir) # Execute logger.info("Executing jar job. Command = {" + self.bin_dir + "/hadoop " + command + "} in " + str(node)) proc = SshProcess(self.bin_dir + "/hadoop " + command, node) if verbose: red_color = '\033[01;31m' proc.stdout_handlers.append(sys.stdout) proc.stderr_handlers.append( ColorDecorator(sys.stderr, red_color)) proc.start() proc.wait() # Get job info job.stdout = proc.stdout job.stderr = proc.stderr job.success = (proc.exit_code == 0) for line in job.stdout.splitlines(): if "Running job" in line: if "mapred.JobClient" in line or "mapreduce.Job" in line: # TODO: more possible formats? try: match = re.match('.*Running job: (.*)', line) job.job_id = match.group(1) break except: pass return (proc.stdout, proc.stderr)
jobs_specs = get_jobs_specs(wanted, name=job_name) for sub, frontend in jobs_specs: sub.walltime = walltime sub.job_type = "deploy" job = oarsub(jobs_specs)[0] nodes = get_oar_job_nodes(job[0], job[1]) logger.info('Deploying host %s', nodes[0].address) deployed, undeployed = deploy(Deployment(nodes, env_name="jessie-x64-base")) execware_host = list(deployed)[0] logger.info('Installing required packages %s', style.emph(packages)) install_packages = SshProcess( 'apt-get update && apt-get install -y ' + packages, execware_host).run() logger.info('Copying files to host') put_files = Put(execware_host, [source_code], remote_location="/tmp").run() xml_file = """ <settings> <proxies> <proxy> <id>g5k-proxy</id> <active>true</active> <protocol>http</protocol> <host>proxy</host> <port>3128</port> </proxy> <proxy> <id>g5k-proxy-https</id> <active>true</active> <protocol>https</protocol>