def run(self):
        log.info("Setting up directories for {}".format(self.swathDir))

        swathDirName = os.path.basename(self.swathDir)
        workspaceRoot = os.path.join(self.paths["processingDir"], swathDirName)
        
        workingFileRoot = os.path.join(workspaceRoot, "working")
        if not os.path.exists(workingFileRoot):
            os.makedirs(workingFileRoot)

        stateFileRoot = os.path.join(workspaceRoot, "state")
        if not os.path.exists(stateFileRoot):
            os.makedirs(stateFileRoot)

        localTmpDir = os.path.join(workingFileRoot, "tmp")
        if not os.path.exists(localTmpDir):
            os.makedirs(localTmpDir)

        outputFile = {
            "swathDir": self.swathDir,
            "workspaceRoot": workspaceRoot,
            "workingFileRoot": workingFileRoot,
            "stateFileRoot": stateFileRoot,
            "localTmpDir": localTmpDir,
            "demFilename": self.demFilename,
            "arcsiReprojection": self.arcsiReprojection,
            "outWktFilename": self.outWktFilename,
            "projAbbv": self.projAbbv,
            "metadataConfigFile": self.metadataConfigFile,
            "metadataTemplate": self.metadataTemplate,
            "maxCogProcesses": self.maxCogProcesses
        }

        with self.output().open("w") as outFile:
            outFile.write(wc.getFormattedJson(outputFile))
    def run(self):
        getInputSwaths = {}
        with self.input().open('r') as getInputSwathsInfo:
            getInputSwaths = json.load(getInputSwathsInfo)

        tasks = []
        for swath in getInputSwaths["swaths"]:
            task = SetupWorkDir(swathDir=swath["swathDir"],
                                paths=self.paths,
                                demFilename=self.demFilename,
                                arcsiReprojection=self.arcsiReprojection,
                                outWktFilename=self.outWktFilename,
                                projAbbv=self.projAbbv,
                                metadataConfigFile=self.metadataConfigFile,
                                metadataTemplate=self.metadataTemplate,
                                maxCogProcesses=self.maxCogProcesses)
            tasks.append(task)
        yield tasks

        outputFile = {"swathSetups": []}

        for task in tasks:
            with task.output().open('r') as taskOutput:
                swathSetup = json.load(taskOutput)
                outputFile["swathSetups"].append(swathSetup)

        with self.output().open("w") as outFile:
            outFile.write(wc.getFormattedJson(outputFile))
示例#3
0
    def run(self):
        basketDir = self.paths["basketDir"]

        inputFiles = []
        for inputFile in glob.glob(os.path.join(basketDir, "S1*")):
            inputFiles.append(inputFile)

        outputFile = {"basket": basketDir, "inputFiles": inputFiles}

        with self.output().open("w") as outFile:
            outFile.write(wc.getFormattedJson(outputFile))
    def run(self):
        basketDir = self.paths["basketDir"]
        basketSubDirs = next(os.walk(basketDir))[1]
        swaths = []

        for subDir in basketSubDirs:
            subDirProducts = []
            swathDir = os.path.join(basketDir, subDir)
            for product in glob.glob(os.path.join(swathDir, "S2*")):
                subDirProducts.append(product)

            if len(subDirProducts):
                swath = {"swathDir": swathDir, "productPaths": subDirProducts}
                swaths.append(swath)

        outputFile = {"basket": basketDir, "swaths": swaths}
        with self.output().open("w") as outFile:
            outFile.write(wc.getFormattedJson(outputFile))
    def run(self):
        try:
            outputFile = {
                "productId": self.productName,
                "sbatchScriptPath": self.sbatchScriptPath,
                "jobId": None,
                "submitTime": None
            }

            outputString = ""
            if self.testProcessing:
                randomJobId = random.randint(1000000, 9999999)
                outputString = "JOBID     USER    STAT  QUEUE      FROM_HOST   EXEC_HOST   JOB_NAME   SUBMIT_TIME"\
                                +str(randomJobId)+"   test001  RUN   short-serial jasmin-sci1 16*host290. my-job1 Nov 16 16:51"
            else:
                sbatchCmd = "sbatch {}".format(self.sbatchScriptPath)
                log.info("Submitting job using command: %s", sbatchCmd)
                output = subprocess.check_output(sbatchCmd,
                                                 stderr=subprocess.STDOUT,
                                                 shell=True)
                outputString = output.decode("utf-8")

            regex = '[0-9]{5,}'  # job ID is at least 5 digits
            match = re.search(regex, outputString)
            self.jobId = match.group(0)

            log.info(
                "Successfully submitted lotus job <%s> for %s using sbatch script: %s",
                self.jobId, self.productName, self.sbatchScriptPath)

            outputFile["jobId"] = self.jobId
            outputFile["submitTime"] = str(datetime.datetime.now())

            with self.output().open('w') as out:
                out.write(wc.getFormattedJson(outputFile))

        except subprocess.CalledProcessError as e:
            errStr = "command '{}' return with error (code {}): {}".format(
                e.cmd, e.returncode, e.output)
            log.error(errStr)
            raise RuntimeError(errStr)
    def run(self):
        getInputs = {}
        with self.input().open('r') as getInputsInfo:
            getInputs = json.load(getInputsInfo)

        tasks = []
        for inputFile in getInputs["inputFiles"]:
            task = SetupWorkDir(inputPath=inputFile,
                                paths=self.paths,
                                spatialConfig=self.spatialConfig,
                                removeSourceFile=True)

            tasks.append(task)
        yield tasks

        outputFile = {"productSetups": []}

        for task in tasks:
            with task.output().open('r') as taskOutput:
                productSetup = json.load(taskOutput)
                outputFile["productSetups"].append(productSetup)

        with self.output().open("w") as outFile:
            outFile.write(wc.getFormattedJson(outputFile))
    def run(self):
        getInputSwaths = {}
        with self.input()[0].open('r') as getInputSwathsInfo:
            getInputSwaths = json.load(getInputSwathsInfo)

        setupWorkDirs = {}
        with self.input()[1].open('r') as setupWorkDirsInfo:
            setupWorkDirs = json.load(setupWorkDirsInfo)

        basketDir = self.paths["basketDir"]

        with open(
                os.path.join(self.paths["templatesDir"],
                             's2_serial_GenerateReport_job_template.bsub'),
                'r') as t:
            bsubTemplate = Template(t.read())

        tasks = []
        for swathSetup in setupWorkDirs["swathSetups"]:
            productName = wc.getProductNameFromPath(swathSetup["swathDir"])

            for swath in getInputSwaths["swaths"]:
                if swath["swathDir"] == swathSetup["swathDir"]:
                    noOfGranules = len(swath["productPaths"])
                    break

            arcsiReprojection = "--outWkt={} --projAbbv={}".format(
                self.outWktFilename,
                self.projAbbv) if self.arcsiReprojection else ""

            metadataTemplate = ""
            if self.metadataTemplate is not None:
                metadataTemplate = "--metadataTemplate={}".format(
                    self.metadataTemplate)

            arcsiCmdTemplate = ""
            if self.arcsiCmdTemplate is not None:
                arcsiCmdTemplate = "--arcsiCmdTemplate={}".format(
                    self.arcsiCmdTemplate)

            reportFileName = "{}-{}.csv".format(
                os.path.basename(self.paths["basketDir"]),
                datetime.now().strftime("%Y%m%d%H%M"))

            bsubParams = {
                "maxRunTime": noOfGranules * self.hoursPerGranule,
                "jobWorkingDir": swathSetup["workspaceRoot"],
                "workingMount": swathSetup["workingFileRoot"],
                "stateMount": swathSetup["stateFileRoot"],
                "inputMount": swathSetup["swathDir"],
                "staticMount": self.paths["staticDir"],
                "outputMount": self.paths["outputDir"],
                "s2ArdContainer": self.paths["singularityImgPath"],
                "dem": self.demFilename,
                "arcsiReprojection": arcsiReprojection,
                "metadataConfigFile": self.metadataConfigFile,
                "metadataTemplate": metadataTemplate,
                "arcsiCmdTemplate": arcsiCmdTemplate,
                "reportFileName": reportFileName,
                "reportMount": self.paths["reportDir"],
                "databaseMount": self.paths["databaseDir"]
            }

            bsub = bsubTemplate.substitute(bsubParams)
            bsubScriptPath = os.path.join(
                swathSetup["workspaceRoot"],
                "submit_GenerateReport_job_for_{}.bsub".format(productName))

            with open(bsubScriptPath, 'w') as bsubScriptFile:
                bsubScriptFile.write(bsub)

            tasks.append(
                SubmitJob(paths=self.paths,
                          productName=productName,
                          bsubScriptPath=bsubScriptPath,
                          testProcessing=self.testProcessing))
        yield tasks

        outputFile = {"basket": basketDir, "submittedSwaths": []}

        for task in tasks:
            with task.output().open('r') as taskOutput:
                submittedSwath = json.load(taskOutput)
                outputFile["submittedSwaths"].append(submittedSwath)

        with self.output().open("w") as outFile:
            outFile.write(wc.getFormattedJson(outputFile))
    def run(self):
        setupWorkDirs = {}
        with self.input().open('r') as setupWorkDirsInfo:
            setupWorkDirs = json.load(setupWorkDirsInfo)

        basketDir = self.paths["basketDir"]

        with open(os.path.join(self.paths["templatesDir"], 's1_job_template.sbatch'), 'r') as t:
            sbatchTemplate = Template(t.read())

        reportFileName = "{}-{}.csv".format(os.path.basename(self.paths["basketDir"]), datetime.now().strftime("%Y%m%d%H%M"))

        tasks = []
        for productSetup in setupWorkDirs["productSetups"]:
            productName = wc.getProductNameFromPath(productSetup["inputPath"])

            path = Path(productSetup["inputPath"])
            inputDir = path.parent
            removeSourceFileFlag = "--removeInputFile" if self.removeSourceFile else ""

            sbatchParams = {
                "jobWorkingDir" : productSetup["workspaceRoot"],
                "reportMount": self.paths["reportDir"],
                "databaseMount": self.paths["databaseDir"], 
                "workingMount": productSetup["workingFileRoot"],
                "stateMount": productSetup["stateFileRoot"],
                "inputMount" :inputDir,
                "staticMount" :self.paths["staticDir"],
                "outputMount": self.paths["outputDir"],
                "s1ArdContainer": self.paths["singularityImgPath"],
                "productName": productName,
                "snapConfigUtmProj": self.spatialConfig["snapConfigUtmProj"],
                "snapConfigCentralMeridian": self.spatialConfig["snapConfigCentralMeridian"],
                "snapConfigFalseNorthing": self.spatialConfig["snapConfigFalseNorthing"],
                "snapRunArguments": self.spatialConfig["snapRunArguments"],
                "sourceSrs": self.spatialConfig["sourceSrs"],
                "targetSrs": self.spatialConfig["targetSrs"],
                "filenameDemData": self.spatialConfig["filenameDemData"],
                "filenameSrs": self.spatialConfig["filenameSrs"],
                "demFilename": self.spatialConfig["demFilename"],
                "demTitle": self.spatialConfig["demTitle"],
                "metadataProjection": self.spatialConfig["metadataProjection"],
                "metadataPlaceName": self.spatialConfig["metadataPlaceName"],
                "metadataParentPlaceName": self.spatialConfig["metadataParentPlaceName"],
                "removeSourceFileFlag": removeSourceFileFlag,
                "reportFileName": reportFileName
            }

            bsub = sbatchTemplate.substitute(sbatchParams)
            sbatchScriptPath = os.path.join(productSetup["workspaceRoot"], "process_s1_ard.sbatch")

            with open(sbatchScriptPath, 'w') as sbatchScriptFile:
                sbatchScriptFile.write(bsub)

            task = SubmitJob(
                paths = self.paths,
                productName = productName,
                sbatchScriptPath = sbatchScriptPath,
                testProcessing = self.testProcessing
            )

            tasks.append(task)
        yield tasks

        outputFile = {
            "basket": basketDir,
            "submittedProducts": []
        }

        for task in tasks:
            with task.output().open('r') as taskOutput:
                submittedProduct = json.load(taskOutput)
                outputFile["submittedProducts"].append(submittedProduct)

        with self.output().open("w") as outFile:
            outFile.write(wc.getFormattedJson(outputFile))
    def run(self):
        setupWorkDirs = {}
        with self.input()[0].open('r') as setupWorkDirsInfo:
            setupWorkDirs = json.load(setupWorkDirsInfo)

        prepareArdProcessingJobs = {}
        with self.input()[1].open('r') as submitProcessRawToArdJobsInfo:
            prepareArdProcessingJobs = json.load(submitProcessRawToArdJobsInfo)

        basketDir = self.paths["basketDir"]

        with open(
                os.path.join(self.paths["templatesDir"],
                             's2_mpi_GenerateReport_job_template.sbatch'),
                'r') as t:
            sbatchTemplate = Template(t.read())

        tasks = []
        for swathSetup in setupWorkDirs["swathSetups"]:
            productName = wc.getProductNameFromPath(swathSetup["swathDir"])

            for submittedSwath in prepareArdProcessingJobs["submittedSwaths"]:
                if submittedSwath["productId"] == productName:
                    upstreamJobId = submittedSwath["jobId"]

            arcsiReprojection = "--outWkt={} --projAbbv={}".format(
                self.outWktFilename,
                self.projAbbv) if self.arcsiReprojection else ""

            metadataTemplate = ""
            if self.metadataTemplate is not None:
                metadataTemplate = "--metadataTemplate={}".format(
                    self.metadataTemplate)

            arcsiCmdTemplate = ""
            if self.arcsiCmdTemplate is not None:
                arcsiCmdTemplate = "--arcsiCmdTemplate={}".format(
                    self.arcsiCmdTemplate)

            reportFileName = "{}-{}.csv".format(
                os.path.basename(self.paths["basketDir"]),
                datetime.now().strftime("%Y%m%d%H%M"))

            sbatchParams = {
                "upstreamJobId": upstreamJobId,
                "jobWorkingDir": swathSetup["workspaceRoot"],
                "workingMount": swathSetup["workingFileRoot"],
                "stateMount": swathSetup["stateFileRoot"],
                "inputMount": swathSetup["swathDir"],
                "staticMount": self.paths["staticDir"],
                "outputMount": self.paths["outputDir"],
                "s2ArdContainer": self.paths["singularityImgPath"],
                "arcsiReprojection": arcsiReprojection,
                "dem": self.demFilename,
                "metadataConfigFile": self.metadataConfigFile,
                "metadataTemplate": metadataTemplate,
                "arcsiCmdTemplate": arcsiCmdTemplate,
                "maxCogProcesses": self.maxCogProcesses,
                "reportFileName": reportFileName,
                "reportMount": self.paths["reportDir"],
                "databaseMount": self.paths["databaseDir"]
            }

            sbatch = sbatchTemplate.substitute(sbatchParams)
            sbatchScriptPath = os.path.join(
                swathSetup["workspaceRoot"],
                "submit_GenerateReport_job_for_{}.sbatch".format(productName))

            with open(sbatchScriptPath, 'w') as sbatchScriptFile:
                sbatchScriptFile.write(sbatch)

            tasks.append(
                SubmitJob(paths=self.paths,
                          productName=productName,
                          sbatchScriptPath=sbatchScriptPath,
                          testProcessing=self.testProcessing))
        yield tasks

        outputFile = {"basket": basketDir, "submittedSwaths": []}

        for task in tasks:
            with task.output().open('r') as taskOutput:
                submittedSwath = json.load(taskOutput)
                outputFile["submittedSwaths"].append(submittedSwath)

        with self.output().open("w") as outFile:
            outFile.write(wc.getFormattedJson(outputFile))
    def run(self):
        getInputSwaths = {}
        with self.input()[0].open('r') as getInputSwathsInfo:
            getInputSwaths = json.load(getInputSwathsInfo)

        setupWorkDirs = {}
        with self.input()[1].open('r') as setupWorkDirsInfo:
            setupWorkDirs = json.load(setupWorkDirsInfo)

        prepareArdProcessingJobs = {}
        with self.input()[2].open('r') as submitPrepareArdProcessingJobsInfo:
            prepareArdProcessingJobs = json.load(
                submitPrepareArdProcessingJobsInfo)

        basketDir = self.paths["basketDir"]

        with open(
                os.path.join(self.paths["templatesDir"],
                             's2_mpi_ProcessRawToArd_job_template.sbatch'),
                'r') as t:
            sbatchTemplate = Template(t.read())

        tasks = []
        for swathSetup in setupWorkDirs["swathSetups"]:
            productName = wc.getProductNameFromPath(swathSetup["swathDir"])

            for swath in getInputSwaths["swaths"]:
                if swath["swathDir"] == swathSetup["swathDir"]:
                    noOfGranules = len(swath["productPaths"])
                    break

            for submittedSwath in prepareArdProcessingJobs["submittedSwaths"]:
                if submittedSwath["productId"] == productName:
                    upstreamJobId = submittedSwath["jobId"]

            testProcessing = "--testProcessing" if self.testProcessing else ""

            sbatchParams = {
                "upstreamJobId": upstreamJobId,
                "nodes": noOfGranules + 1,
                "jobWorkingDir": swathSetup["workspaceRoot"],
                "workingMount": swathSetup["workingFileRoot"],
                "stateMount": swathSetup["stateFileRoot"],
                "inputMount": swathSetup["swathDir"],
                "staticMount": self.paths["staticDir"],
                "singularityDir": self.paths["singularityDir"],
                "arcsiContainer": self.paths["arcsiMpiBaseImg"],
                "testProcessing": testProcessing
            }

            sbatch = sbatchTemplate.substitute(sbatchParams)
            sbatchScriptPath = os.path.join(
                swathSetup["workspaceRoot"],
                "submit_ProcessRawToArd_job_for_{}.sbatch".format(productName))

            with open(sbatchScriptPath, 'w') as sbatchScriptFile:
                sbatchScriptFile.write(sbatch)

            tasks.append(
                SubmitJob(paths=self.paths,
                          productName=productName,
                          sbatchScriptPath=sbatchScriptPath,
                          testProcessing=self.testProcessing))
        yield tasks

        outputFile = {"basket": basketDir, "submittedSwaths": []}

        for task in tasks:
            with task.output().open('r') as taskOutput:
                submittedSwath = json.load(taskOutput)
                outputFile["submittedSwaths"].append(submittedSwath)

        with self.output().open("w") as outFile:
            outFile.write(wc.getFormattedJson(outputFile))