def run(self): log.info("Setting up directories for {}".format(self.swathDir)) swathDirName = os.path.basename(self.swathDir) workspaceRoot = os.path.join(self.paths["processingDir"], swathDirName) workingFileRoot = os.path.join(workspaceRoot, "working") if not os.path.exists(workingFileRoot): os.makedirs(workingFileRoot) stateFileRoot = os.path.join(workspaceRoot, "state") if not os.path.exists(stateFileRoot): os.makedirs(stateFileRoot) localTmpDir = os.path.join(workingFileRoot, "tmp") if not os.path.exists(localTmpDir): os.makedirs(localTmpDir) outputFile = { "swathDir": self.swathDir, "workspaceRoot": workspaceRoot, "workingFileRoot": workingFileRoot, "stateFileRoot": stateFileRoot, "localTmpDir": localTmpDir, "demFilename": self.demFilename, "arcsiReprojection": self.arcsiReprojection, "outWktFilename": self.outWktFilename, "projAbbv": self.projAbbv, "metadataConfigFile": self.metadataConfigFile, "metadataTemplate": self.metadataTemplate, "maxCogProcesses": self.maxCogProcesses } with self.output().open("w") as outFile: outFile.write(wc.getFormattedJson(outputFile))
def run(self): getInputSwaths = {} with self.input().open('r') as getInputSwathsInfo: getInputSwaths = json.load(getInputSwathsInfo) tasks = [] for swath in getInputSwaths["swaths"]: task = SetupWorkDir(swathDir=swath["swathDir"], paths=self.paths, demFilename=self.demFilename, arcsiReprojection=self.arcsiReprojection, outWktFilename=self.outWktFilename, projAbbv=self.projAbbv, metadataConfigFile=self.metadataConfigFile, metadataTemplate=self.metadataTemplate, maxCogProcesses=self.maxCogProcesses) tasks.append(task) yield tasks outputFile = {"swathSetups": []} for task in tasks: with task.output().open('r') as taskOutput: swathSetup = json.load(taskOutput) outputFile["swathSetups"].append(swathSetup) with self.output().open("w") as outFile: outFile.write(wc.getFormattedJson(outputFile))
def run(self): basketDir = self.paths["basketDir"] inputFiles = [] for inputFile in glob.glob(os.path.join(basketDir, "S1*")): inputFiles.append(inputFile) outputFile = {"basket": basketDir, "inputFiles": inputFiles} with self.output().open("w") as outFile: outFile.write(wc.getFormattedJson(outputFile))
def run(self): basketDir = self.paths["basketDir"] basketSubDirs = next(os.walk(basketDir))[1] swaths = [] for subDir in basketSubDirs: subDirProducts = [] swathDir = os.path.join(basketDir, subDir) for product in glob.glob(os.path.join(swathDir, "S2*")): subDirProducts.append(product) if len(subDirProducts): swath = {"swathDir": swathDir, "productPaths": subDirProducts} swaths.append(swath) outputFile = {"basket": basketDir, "swaths": swaths} with self.output().open("w") as outFile: outFile.write(wc.getFormattedJson(outputFile))
def run(self): try: outputFile = { "productId": self.productName, "sbatchScriptPath": self.sbatchScriptPath, "jobId": None, "submitTime": None } outputString = "" if self.testProcessing: randomJobId = random.randint(1000000, 9999999) outputString = "JOBID USER STAT QUEUE FROM_HOST EXEC_HOST JOB_NAME SUBMIT_TIME"\ +str(randomJobId)+" test001 RUN short-serial jasmin-sci1 16*host290. my-job1 Nov 16 16:51" else: sbatchCmd = "sbatch {}".format(self.sbatchScriptPath) log.info("Submitting job using command: %s", sbatchCmd) output = subprocess.check_output(sbatchCmd, stderr=subprocess.STDOUT, shell=True) outputString = output.decode("utf-8") regex = '[0-9]{5,}' # job ID is at least 5 digits match = re.search(regex, outputString) self.jobId = match.group(0) log.info( "Successfully submitted lotus job <%s> for %s using sbatch script: %s", self.jobId, self.productName, self.sbatchScriptPath) outputFile["jobId"] = self.jobId outputFile["submitTime"] = str(datetime.datetime.now()) with self.output().open('w') as out: out.write(wc.getFormattedJson(outputFile)) except subprocess.CalledProcessError as e: errStr = "command '{}' return with error (code {}): {}".format( e.cmd, e.returncode, e.output) log.error(errStr) raise RuntimeError(errStr)
def run(self): getInputs = {} with self.input().open('r') as getInputsInfo: getInputs = json.load(getInputsInfo) tasks = [] for inputFile in getInputs["inputFiles"]: task = SetupWorkDir(inputPath=inputFile, paths=self.paths, spatialConfig=self.spatialConfig, removeSourceFile=True) tasks.append(task) yield tasks outputFile = {"productSetups": []} for task in tasks: with task.output().open('r') as taskOutput: productSetup = json.load(taskOutput) outputFile["productSetups"].append(productSetup) with self.output().open("w") as outFile: outFile.write(wc.getFormattedJson(outputFile))
def run(self): getInputSwaths = {} with self.input()[0].open('r') as getInputSwathsInfo: getInputSwaths = json.load(getInputSwathsInfo) setupWorkDirs = {} with self.input()[1].open('r') as setupWorkDirsInfo: setupWorkDirs = json.load(setupWorkDirsInfo) basketDir = self.paths["basketDir"] with open( os.path.join(self.paths["templatesDir"], 's2_serial_GenerateReport_job_template.bsub'), 'r') as t: bsubTemplate = Template(t.read()) tasks = [] for swathSetup in setupWorkDirs["swathSetups"]: productName = wc.getProductNameFromPath(swathSetup["swathDir"]) for swath in getInputSwaths["swaths"]: if swath["swathDir"] == swathSetup["swathDir"]: noOfGranules = len(swath["productPaths"]) break arcsiReprojection = "--outWkt={} --projAbbv={}".format( self.outWktFilename, self.projAbbv) if self.arcsiReprojection else "" metadataTemplate = "" if self.metadataTemplate is not None: metadataTemplate = "--metadataTemplate={}".format( self.metadataTemplate) arcsiCmdTemplate = "" if self.arcsiCmdTemplate is not None: arcsiCmdTemplate = "--arcsiCmdTemplate={}".format( self.arcsiCmdTemplate) reportFileName = "{}-{}.csv".format( os.path.basename(self.paths["basketDir"]), datetime.now().strftime("%Y%m%d%H%M")) bsubParams = { "maxRunTime": noOfGranules * self.hoursPerGranule, "jobWorkingDir": swathSetup["workspaceRoot"], "workingMount": swathSetup["workingFileRoot"], "stateMount": swathSetup["stateFileRoot"], "inputMount": swathSetup["swathDir"], "staticMount": self.paths["staticDir"], "outputMount": self.paths["outputDir"], "s2ArdContainer": self.paths["singularityImgPath"], "dem": self.demFilename, "arcsiReprojection": arcsiReprojection, "metadataConfigFile": self.metadataConfigFile, "metadataTemplate": metadataTemplate, "arcsiCmdTemplate": arcsiCmdTemplate, "reportFileName": reportFileName, "reportMount": self.paths["reportDir"], "databaseMount": self.paths["databaseDir"] } bsub = bsubTemplate.substitute(bsubParams) bsubScriptPath = os.path.join( swathSetup["workspaceRoot"], "submit_GenerateReport_job_for_{}.bsub".format(productName)) with open(bsubScriptPath, 'w') as bsubScriptFile: bsubScriptFile.write(bsub) tasks.append( SubmitJob(paths=self.paths, productName=productName, bsubScriptPath=bsubScriptPath, testProcessing=self.testProcessing)) yield tasks outputFile = {"basket": basketDir, "submittedSwaths": []} for task in tasks: with task.output().open('r') as taskOutput: submittedSwath = json.load(taskOutput) outputFile["submittedSwaths"].append(submittedSwath) with self.output().open("w") as outFile: outFile.write(wc.getFormattedJson(outputFile))
def run(self): setupWorkDirs = {} with self.input().open('r') as setupWorkDirsInfo: setupWorkDirs = json.load(setupWorkDirsInfo) basketDir = self.paths["basketDir"] with open(os.path.join(self.paths["templatesDir"], 's1_job_template.sbatch'), 'r') as t: sbatchTemplate = Template(t.read()) reportFileName = "{}-{}.csv".format(os.path.basename(self.paths["basketDir"]), datetime.now().strftime("%Y%m%d%H%M")) tasks = [] for productSetup in setupWorkDirs["productSetups"]: productName = wc.getProductNameFromPath(productSetup["inputPath"]) path = Path(productSetup["inputPath"]) inputDir = path.parent removeSourceFileFlag = "--removeInputFile" if self.removeSourceFile else "" sbatchParams = { "jobWorkingDir" : productSetup["workspaceRoot"], "reportMount": self.paths["reportDir"], "databaseMount": self.paths["databaseDir"], "workingMount": productSetup["workingFileRoot"], "stateMount": productSetup["stateFileRoot"], "inputMount" :inputDir, "staticMount" :self.paths["staticDir"], "outputMount": self.paths["outputDir"], "s1ArdContainer": self.paths["singularityImgPath"], "productName": productName, "snapConfigUtmProj": self.spatialConfig["snapConfigUtmProj"], "snapConfigCentralMeridian": self.spatialConfig["snapConfigCentralMeridian"], "snapConfigFalseNorthing": self.spatialConfig["snapConfigFalseNorthing"], "snapRunArguments": self.spatialConfig["snapRunArguments"], "sourceSrs": self.spatialConfig["sourceSrs"], "targetSrs": self.spatialConfig["targetSrs"], "filenameDemData": self.spatialConfig["filenameDemData"], "filenameSrs": self.spatialConfig["filenameSrs"], "demFilename": self.spatialConfig["demFilename"], "demTitle": self.spatialConfig["demTitle"], "metadataProjection": self.spatialConfig["metadataProjection"], "metadataPlaceName": self.spatialConfig["metadataPlaceName"], "metadataParentPlaceName": self.spatialConfig["metadataParentPlaceName"], "removeSourceFileFlag": removeSourceFileFlag, "reportFileName": reportFileName } bsub = sbatchTemplate.substitute(sbatchParams) sbatchScriptPath = os.path.join(productSetup["workspaceRoot"], "process_s1_ard.sbatch") with open(sbatchScriptPath, 'w') as sbatchScriptFile: sbatchScriptFile.write(bsub) task = SubmitJob( paths = self.paths, productName = productName, sbatchScriptPath = sbatchScriptPath, testProcessing = self.testProcessing ) tasks.append(task) yield tasks outputFile = { "basket": basketDir, "submittedProducts": [] } for task in tasks: with task.output().open('r') as taskOutput: submittedProduct = json.load(taskOutput) outputFile["submittedProducts"].append(submittedProduct) with self.output().open("w") as outFile: outFile.write(wc.getFormattedJson(outputFile))
def run(self): setupWorkDirs = {} with self.input()[0].open('r') as setupWorkDirsInfo: setupWorkDirs = json.load(setupWorkDirsInfo) prepareArdProcessingJobs = {} with self.input()[1].open('r') as submitProcessRawToArdJobsInfo: prepareArdProcessingJobs = json.load(submitProcessRawToArdJobsInfo) basketDir = self.paths["basketDir"] with open( os.path.join(self.paths["templatesDir"], 's2_mpi_GenerateReport_job_template.sbatch'), 'r') as t: sbatchTemplate = Template(t.read()) tasks = [] for swathSetup in setupWorkDirs["swathSetups"]: productName = wc.getProductNameFromPath(swathSetup["swathDir"]) for submittedSwath in prepareArdProcessingJobs["submittedSwaths"]: if submittedSwath["productId"] == productName: upstreamJobId = submittedSwath["jobId"] arcsiReprojection = "--outWkt={} --projAbbv={}".format( self.outWktFilename, self.projAbbv) if self.arcsiReprojection else "" metadataTemplate = "" if self.metadataTemplate is not None: metadataTemplate = "--metadataTemplate={}".format( self.metadataTemplate) arcsiCmdTemplate = "" if self.arcsiCmdTemplate is not None: arcsiCmdTemplate = "--arcsiCmdTemplate={}".format( self.arcsiCmdTemplate) reportFileName = "{}-{}.csv".format( os.path.basename(self.paths["basketDir"]), datetime.now().strftime("%Y%m%d%H%M")) sbatchParams = { "upstreamJobId": upstreamJobId, "jobWorkingDir": swathSetup["workspaceRoot"], "workingMount": swathSetup["workingFileRoot"], "stateMount": swathSetup["stateFileRoot"], "inputMount": swathSetup["swathDir"], "staticMount": self.paths["staticDir"], "outputMount": self.paths["outputDir"], "s2ArdContainer": self.paths["singularityImgPath"], "arcsiReprojection": arcsiReprojection, "dem": self.demFilename, "metadataConfigFile": self.metadataConfigFile, "metadataTemplate": metadataTemplate, "arcsiCmdTemplate": arcsiCmdTemplate, "maxCogProcesses": self.maxCogProcesses, "reportFileName": reportFileName, "reportMount": self.paths["reportDir"], "databaseMount": self.paths["databaseDir"] } sbatch = sbatchTemplate.substitute(sbatchParams) sbatchScriptPath = os.path.join( swathSetup["workspaceRoot"], "submit_GenerateReport_job_for_{}.sbatch".format(productName)) with open(sbatchScriptPath, 'w') as sbatchScriptFile: sbatchScriptFile.write(sbatch) tasks.append( SubmitJob(paths=self.paths, productName=productName, sbatchScriptPath=sbatchScriptPath, testProcessing=self.testProcessing)) yield tasks outputFile = {"basket": basketDir, "submittedSwaths": []} for task in tasks: with task.output().open('r') as taskOutput: submittedSwath = json.load(taskOutput) outputFile["submittedSwaths"].append(submittedSwath) with self.output().open("w") as outFile: outFile.write(wc.getFormattedJson(outputFile))
def run(self): getInputSwaths = {} with self.input()[0].open('r') as getInputSwathsInfo: getInputSwaths = json.load(getInputSwathsInfo) setupWorkDirs = {} with self.input()[1].open('r') as setupWorkDirsInfo: setupWorkDirs = json.load(setupWorkDirsInfo) prepareArdProcessingJobs = {} with self.input()[2].open('r') as submitPrepareArdProcessingJobsInfo: prepareArdProcessingJobs = json.load( submitPrepareArdProcessingJobsInfo) basketDir = self.paths["basketDir"] with open( os.path.join(self.paths["templatesDir"], 's2_mpi_ProcessRawToArd_job_template.sbatch'), 'r') as t: sbatchTemplate = Template(t.read()) tasks = [] for swathSetup in setupWorkDirs["swathSetups"]: productName = wc.getProductNameFromPath(swathSetup["swathDir"]) for swath in getInputSwaths["swaths"]: if swath["swathDir"] == swathSetup["swathDir"]: noOfGranules = len(swath["productPaths"]) break for submittedSwath in prepareArdProcessingJobs["submittedSwaths"]: if submittedSwath["productId"] == productName: upstreamJobId = submittedSwath["jobId"] testProcessing = "--testProcessing" if self.testProcessing else "" sbatchParams = { "upstreamJobId": upstreamJobId, "nodes": noOfGranules + 1, "jobWorkingDir": swathSetup["workspaceRoot"], "workingMount": swathSetup["workingFileRoot"], "stateMount": swathSetup["stateFileRoot"], "inputMount": swathSetup["swathDir"], "staticMount": self.paths["staticDir"], "singularityDir": self.paths["singularityDir"], "arcsiContainer": self.paths["arcsiMpiBaseImg"], "testProcessing": testProcessing } sbatch = sbatchTemplate.substitute(sbatchParams) sbatchScriptPath = os.path.join( swathSetup["workspaceRoot"], "submit_ProcessRawToArd_job_for_{}.sbatch".format(productName)) with open(sbatchScriptPath, 'w') as sbatchScriptFile: sbatchScriptFile.write(sbatch) tasks.append( SubmitJob(paths=self.paths, productName=productName, sbatchScriptPath=sbatchScriptPath, testProcessing=self.testProcessing)) yield tasks outputFile = {"basket": basketDir, "submittedSwaths": []} for task in tasks: with task.output().open('r') as taskOutput: submittedSwath = json.load(taskOutput) outputFile["submittedSwaths"].append(submittedSwath) with self.output().open("w") as outFile: outFile.write(wc.getFormattedJson(outputFile))