def word_count(REDUCES=4): os.system(f"{HDFS_EXECUTABLE} dfs -put {INPUT_PATH}/* input") log.debug(f"Adding executable permissions to mapper file: {MAPPER_PATH}") os.system(f"chmod +x {MAPPER_PATH}") log.debug( f"Adding executable permissions to reducer file: {REDUCER_PATH}") os.system(f"chmod +x {REDUCER_PATH}") WORD_COUNT_COMMAND = f"{MAPRED_EXECUTABLE} streaming " + \ f"-D mapreduce.job.reduces={REDUCES} " + \ "-input input " +\ "-output output " + \ f"-mapper {MAPPER_PATH} " + \ f"-reducer {REDUCER_PATH}" log.debug('Executing Word Count Map Reduce command.') result = subprocess.run([WORD_COUNT_COMMAND], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) stderr = result.stderr.decode() stdout = result.stdout.decode() if stderr.strip() is not '': log.info(f"stdout:\n{stdout}") log.info(f"stderr:\n{stderr}") writeToFile(stdout, MAPRED_OUTPUT_FILE_PATH) appendToFile(stderr, MAPRED_OUTPUT_FILE_PATH) log.info(f"Output of mapreduce step saved to {MAPRED_OUTPUT_FILE_PATH}.")
def writeOutputs(self): """Used to write the output of the commands to the specified files""" if self.outputLock != None: self.outputLock.acquire() if self.validateOutputFile(self.standardOutputFile): stdoutStatus = writeToFile(self.results["stdOut"], self.standardOutputFile) else: stdoutStatus = -1 if self.validateOutputFile(self.standardErrorFile): stderrStatus = writeToFile(self.results["stdError"], self.standardErrorFile) else: stderrStatus = -1 if self.outputLock != None: self.outputLock.release() if stdoutStatus and self.standardOutputFile is not None: self.stdError = "Failed to write to file{" + self.standardOutputFile.encode( 'utf-8') + "}\r\n" + self.results["stdOut"] if stderrStatus and self.standardErrorFile is not None: self.stdError = "Failed to write to file{" + self.standardErrorFile.encode( 'utf-8') + "}\r\n" + self.results["stdError"] if self.results['exitCode']: return self.results['exitCode'] return stdoutStatus + stderrStatus
def writeOutputs(self): """Used to write the output of the commands to the specified files""" if self.outputLock != None: self.outputLock.acquire() standardOut = self.writeOutputsValidateOutputFile(self.standardOutputFile) standardError = self.writeOutputsValidateOutputFile(self.standardErrorFile) #output , filename a = writeToFile(self.results["stdOut"], standardOut) b = writeToFile(self.results["stdError"], standardError) if self.outputLock != None: self.outputLock.release() if a: self.stdError = "Failed to write to file{" + standardOut + "}\r\n" + self.results["stdOut"] if b: self.stdError = "Failed to write to file{" + standardError + "}\r\n" + self.results["stdError"] if self.results['exitCode']: return self.results['exitCode'] return a + b
def write_output(self): """ Write the stdout/stderror we got from MCP Client out to files if necessary. """ for task in self.groupTasks: with task.outputLock: if task.standardOutputFile: try: writeToFile(task.results['stdout'], task.standardOutputFile) except Exception as e: LOGGER.warning("Unable to write to: %s: %s", task.standardOutputFile, str(e)) LOGGER.exception(e) if task.standardErrorFile: try: writeToFile(task.results['stderror'], task.standardErrorFile) except Exception as e: LOGGER.warning("Unable to write to: %s: %s", task.standardErrorFile, str(e)) LOGGER.exception(e)