def __init__(self, localCache): self.baseUrl = "http://localhost:5984" # CouchDB self.ngascon = NgasConnection() self.xtss = ExecutorClient('localhost', 'msgq', 'xtss') self.select = "ingest.JAO" self.mq = MqConnection('localhost', 'msgq', self.select) self.localCache = localCache
def __init__(self): self.weblogsBaseUrl = "http://localhost:8000" self.baseUrl = "http://localhost:5984" # CouchDB self.dbconn = DbConnection(self.baseUrl) self.dbName = "pipeline-reports" self.xtss = ExecutorClient('localhost', 'msgq', 'xtss') self.select = "pipeline.report.JAO" self.mq = MqConnection('localhost', 'msgq', select)
def __init__(self, progID, ousUID, recipe): self._progID = progID self._ousUID = ousUID self._recipe = recipe #Initialization code for pipeline execution # Make sure we know where we are self.location = os.environ.get( 'DRAWS_LOCATION' ) if self.location == None: raise RuntimeError( "DRAWS_LOCATION env variable is not defined" ) # Make sure we know where the local replicated cache directory is self.replicatedCache = os.environ.get( 'DRAWS_LOCAL_CACHE' ) if self.replicatedCache == None: raise RuntimeError( "DRAWS_LOCAL_CACHE env variable is not defined" ) self.pipelineScript = "pipeline.py" self.thisDirectory = os.getcwd() # Assume the pipeline script is in this same directory self.pipelineExecutable = self.thisDirectory + "/" + self.pipelineScript self.pipelineRunDirectory = tempfile.mkdtemp( prefix="drw-" ) self.workingDirectory = tempfile.mkdtemp( prefix="drw-" ) print( ">>> PipelineDriver: pipelineRunDirectory:", self.pipelineRunDirectory ) print( ">>> PipelineDriver: workingDirectory:", self.workingDirectory ) self.xtss = ExecutorClient( 'localhost', 'msgq', 'xtss' ) self.mq = MqConnection( 'localhost', 'msgq' ) params = collections.OrderedDict() params['progID'] = self._progID params['ousUID'] = self._ousUID params['recipe'] = self._recipe params['location'] = self.location params['pipelineRunDir'] = self.pipelineRunDirectory params['replicatedCache'] = self.replicatedCache self._temp = Template(params)
def __init__(self, location): self._baseUrl = "http://localhost:5984" # CouchDB self._dbconn = DbConnection(baseUrl) self._xtss = ExecutorClient('localhost', 'msgq', 'xtss') self._mq = MqConnection('localhost', 'msgq') self._broker = RabbitMqMessageBroker() self.location = location
if location == None: raise RuntimeError("DRAWS_LOCATION env variable is not defined") # Make sure we know where the local replicated cache directory is replicatedCache = os.environ.get('DRAWS_LOCAL_CACHE') if replicatedCache == None: raise RuntimeError("DRAWS_LOCAL_CACHE env variable is not defined") parser = argparse.ArgumentParser(description='Pipeline Driver mock-up') parser.add_argument(dest="progID", help="ID of the project containing the OUS") parser.add_argument(dest="ousUID", help="ID of the OUS that should be processed") parser.add_argument(dest="recipe", help="Pipeline recipe to run") args = parser.parse_args() print(">>> PipelineDriver: progID=%s, ousUID=%s, recipe=%s" % (args.progID, args.ousUID, args.recipe)) pipelineScript = "pipeline.py" thisDirectory = os.getcwd( ) # Assume the pipeline script is in this same directory pipelineExecutable = thisDirectory + "/" + pipelineScript pipelineRunDirectory = tempfile.mkdtemp(prefix="drw-") workingDirectory = tempfile.mkdtemp(prefix="drw-") print(">>> PipelineDriver: pipelineRunDirectory:", pipelineRunDirectory) print(">>> PipelineDriver: workingDirectory:", workingDirectory) xtss = ExecutorClient('localhost', 'msgq', 'xtss') mq = MqConnection('localhost', 'msgq') wrapPipeline(args.progID, args.ousUID, args.recipe)
def __init__(self): self._mq = MqConnection('localhost', 'msgq') self._xtss = ExecutorClient('localhost', 'msgq', 'xtss')
class APAFunc(Task): def __init__(self): self._mq = MqConnection('localhost', 'msgq') self._xtss = ExecutorClient('localhost', 'msgq', 'xtss') def execute(self, params): self._params = params #Check parameters! self._check(params, [ 'progID', 'ousUID', 'location', 'pipelineRunDir', 'replicatedCache' ]) self._progID = params['progID'] self._ousUID = params['ousUID'] self._pipelineRunDirectory = params['pipelineRunDir'] self._replicatedCache = params['replicatedCache'] self._location = params['location'] # Copy the products directory to the replicating cache directory # and signal that to the JAO cache productsDir = self._findProductsDir(self._progID) productsBasedir = os.path.basename(productsDir) repCacheDir = os.path.join(self._replicatedCache, productsBasedir) print(">>> PipelineDriver: Products dir name:", productsDir) print(">>> PipelineDriver: Replicating dir name:", repCacheDir) self._copyAndReplaceDir(productsDir, repCacheDir) # message = '{"fileType":"productsdir", "cachedAt":"%s", "name": "%s"}' % (self._location,productsBasedir) message = {} message["fileType"] = "productsdir" message["cachedAt"] = self._location message["name"] = productsBasedir selector = "cached.JAO" self._mq.send(message, selector) # Copy the weblog to the replicating cache directory # and signal that to the JAO *and* the local cache (if # they are not one and the same) weblog = self._findWeblog(productsDir, self._ousUID) print(">>> PipelineDriver: weblog: copying", weblog, "to", self._replicatedCache) shutil.copy(weblog, self._replicatedCache) # message = '{"fileType":"weblog", "cachedAt":"%s", "name": "%s"}' % (self._location, os.path.basename(weblog)) message = {} message["fileType"] = "weblog" message["cachedAt"] = self._location message["name"] = os.path.basename(weblog) selector = "cached.JAO" self._mq.send(message, selector) if self._replicatedCache != "JAO": selector = "cached.%s" % self._location self._mq.send(message, selector) # Send the XML text of the pipeline report to AQUA at JAO # We need to BASE64-encode it because it will be wrapped in a JSON field timestamp = self._getTimestamp(productsBasedir) plReportFile = self._findPipelineReport(productsDir, self._ousUID) plReport = dbdrwutils.readTextFileIntoString(plReportFile) plReport = dbdrwutils.b64encode(plReport) message = ''' { "ousUID" : "%s", "timestamp" : "%s", "source" : "%s", "report" : "%s", "productsDir": "%s" } ''' % (self._ousUID, timestamp, self._replicatedCache, plReport, productsBasedir) message = json.loads(message) # convert to a Python dict selector = "pipeline.report.JAO" self._mq.send(message, selector) # We are done, set the OUS state to ReadyForReview dbdrwutils.setState(self._xtss, self._ousUID, "ReadyForReview") def _isProductsDirectory(self, f, progID): return (not os.path.isfile(os.path.join(self._pipelineRunDirectory, f)) and f.startswith(progID)) def _findProductsDir(self, progID): "Get the most recent product directory" allFiles = os.listdir(self._pipelineRunDirectory) prodDirs = [ f for f in allFiles if self._isProductsDirectory(f, progID) ] prodDir = sorted(prodDirs)[-1:] prodDir = prodDir[0] print(">>> PipelineDriver: prodDir:", prodDir) return os.path.join(self._pipelineRunDirectory, prodDir) def _findWeblog(self, productsDir, ousUID): # DEMO ONLY: the "products" subdirectory should be looked for # here we just take the hardcoded path ousUID = dbdrwutils.encode(ousUID) productsDir = os.path.join(productsDir, "SOUS", "GOUS", ousUID, "products") for file in os.listdir(productsDir): print(">>> PipelineDriver: file:", file) if (file.startswith("weblog-") and file.endswith(".zip")): return (os.path.join(productsDir, file)) raise RuntimeError("No weblog found in %s" % productsDir) def _findPipelineReport(self, productsDir, ousUID): # DEMO ONLY: the "products" subdirectory should be looked for # here we just take the hardcoded path ousUID = dbdrwutils.encode(ousUID) productsDir = os.path.join(productsDir, "SOUS", "GOUS", ousUID, "products") for file in os.listdir(productsDir): print(">>> PipelineDriver: file:", file) if (file.startswith("pl-report-") and file.endswith(".xml")): return (os.path.join(productsDir, file)) raise RuntimeError("No pipeline report found in %s" % productsDir) def _copyAndReplaceDir(self, from_path, to_path): if os.path.exists(to_path): shutil.rmtree(to_path) shutil.copytree(from_path, to_path) def _getTimestamp(self, productsDirName): ''' If productsDirName is something like '2015.1.00657.S_2018_07_19T08_50_10.228' will return 2018-07-19T08:50:10.228 ''' n = productsDirName.index('_') timestamp = productsDirName[n + 1:] timestamp = timestamp.replace('_', '-', 2) timestamp = timestamp.replace('_', ':') return timestamp
################################################################### ## Main program ################################################################### # Make sure we know where we are location = os.environ.get( 'DRAWS_LOCATION' ) if location == None: raise RuntimeError( "DRAWS_LOCATION env variable is not defined" ) # Make sure we know where the local replicated cache directory is lcache = os.environ.get( 'DRAWS_LOCAL_CACHE' ) if lcache == None: raise RuntimeError( "DRAWS_LOCAL_CACHE env variable is not defined" ) parser = argparse.ArgumentParser( description='Replicated cache' ) parser.add_argument( "--eacache", "-eac", dest="eacache", help="Absolute pathname or rsync location of the EA cache dir" ) parser.add_argument( "--nacache", "-nac", dest="nacache", help="Absolute pathname or rsync location of the NA cache dir" ) parser.add_argument( "--eucache", "-euc", dest="eucache", help="Absolute pathname or rsync location of the EU cache dir" ) parser.add_argument( "--port", "-p", dest="port", help="Port number of the embedded Web server, default is 8000", default=8000 ) args=parser.parse_args() listen_to = "cached." + location port = int(args.port) mq = MqConnection( 'localhost', 'msgq', listen_to ) ngas = NgasConnection() dbdrwutils.bgRunHttpServer( port, lcache ) print(' [*] Waiting for messages matching %s' % (listen_to) ) mq.listen( callback )
class QA2(): def __init__(self): self.weblogsBaseUrl = "http://localhost:8000" self.baseUrl = "http://localhost:5984" # CouchDB self.dbconn = DbConnection(self.baseUrl) self.dbName = "pipeline-reports" self.xtss = ExecutorClient('localhost', 'msgq', 'xtss') self.select = "pipeline.report.JAO" self.mq = MqConnection('localhost', 'msgq', select) def start(self): # Launch the listener in the background print(' [*] Waiting for messages matching %s' % (self.select)) dbdrwutils.bgRun(self.mq.listen, (self.callback,)) # This is the program's text-based UI # Loop forever: # Show Pipeline runs awaiting review # Ask for an OUS UID # Lookup the most recent PL execution for that # Print it out # Ask for Fail, Pass, or SemiPass # Set the OUS state accordingly while True: print() print() print('------------------------------------------') print() print("OUSs ready to be reviewed") ouss = self.findReadyForReview() if (ouss == None or len(ouss) == 0): print("(none)") else: for ous in ouss: print(ous['entityId']) print() ousUID = input('Please enter an OUS UID: ') plReport = self.findMostRecentPlReport(ousUID) if plReport == None: print("No Pipeline executions for OUS", ousUID) continue # We are reviewing this OUS, set its state accordingly dbdrwutils.setState(self.xtss, ousUID, "Reviewing") timestamp = plReport['timestamp'] report = dbdrwutils.b64decode(plReport['encodedReport']) productsDir = plReport['productsDir'] source = plReport['source'] print("Pipeline report for UID %s, processed %s" % (ousUID,timestamp)) print(report) print() print("Weblog available at: %s/weblogs/%s" % (self.weblogsBaseUrl, dbdrwutils.makeWeblogName(ousUID, timestamp))) print() while True: reply = input("Enter [F]ail, [P]ass, [S]emipass, [C]ancel: ") reply = reply[0:1].upper() if ((reply=='F') or (reply=='P') or (reply=='S') or (reply=='C')): break if reply == 'C': continue # Set the OUS state according to the QA2 flag self.processQA2flag(ousUID, reply) if reply == 'F': continue # Tell the Product Ingestor that it should ingest those Pipeline products selector = "ingest.JAO" message = '{"ousUID" : "%s", "timestamp" : "%s", "productsDir" : "%s"}' % \ (ousUID, timestamp, productsDir) message = {} message["ousUID"] = ousUID message["timestamp"] = timestamp message["productsDir"] = productsDir self.mq.send(message, selector) # Wait some, mainly for effect waitTime = random.randint(3,8) time.sleep(waitTime) # Now we can set the state of the OUS to DeliveryInProgress dbdrwutils.setState(self.xtss, ousUID, "DeliveryInProgress") def savePlReport(self, ousUID, timestamp, encodedReport, productsDir, source): ''' Saves a pipeline run report to 'Oracle' ''' plReport = {} plReport['ousUID'] = ousUID plReport['timestamp'] = timestamp plReport['encodedReport'] = encodedReport plReport['productsDir'] = productsDir plReport['source'] = source plReportID = timestamp + "." + ousUID retcode,msg = self.dbconn.save(self.dbName, plReportID, plReport) if retcode != 201: raise RuntimeError("Error saving Pipeline report: %d, %s" % (retcode,msg)) def findMostRecentPlReport(self, ousUID): selector = { "selector": { "ousUID": ousUID }} retcode,reports = self.dbconn.find(self.dbName, selector) if len(reports) == 0: return None if retcode != 200: print(reports) return None # Find the most recent report and return it reports.sort(key=lambda x: x['timestamp'], reverse=True) return reports[0] def findReadyForReview(self): selector = { "selector": { "state": "ReadyForReview" } } retcode,ouss = self.dbconn.find("status-entities", selector) if len(ouss) == 0: return None if retcode != 200: print(ouss) return None ouss.sort(key=lambda x: x['entityId']) return ouss def processQA2flag(self, ousUID, flag): "Flag should be one of 'F' (fail), 'P' (pass) or 'S' (semi-pass)" newState = "ReadyForProcessing" if (flag == "F") else "Verified" print(">>> Setting the state of", ousUID, "to", newState) # Set the OUS state according to the input flag dbdrwutils.setState(self.xtss, ousUID, newState) if flag == "F": dbdrwutils.setSubstate(self.xtss, ousUID, "") # Clear Pipeline recipe def callback(self, message): """ Message is a JSON object: ousUID is the UID of the OUS source is the executive where the Pipeline was running report is the report's XML text, BASE64-encoded timestamp is the Pipeline run's timestamp productsDir is the name of the products directory for that Pipeline run For instance { "ousUID" : "uid://X1/X1/Xaf", "source" : "EU", "report" : "Cjw/eG1sIHZlcnNpb2..." "timestamp" : "2018-07-19T08:50:10.228", "productsDir": "2015.1.00657.S_2018_07_19T08_50_10.228" } """ # print(">>> message:", message) ousUID = message["ousUID"] source = message["source"] encodedReport = message["report"] timestamp = message["timestamp"] productsDir = message["productsDir"] # report = dbdrwutils.b64decode(encodedReport) # print(">>> report:", report) # Save the report to Oracle self.savePlReport(ousUID, timestamp, encodedReport, productsDir, source) print(">>> AQUA/QA2: saved PL report: ousUID=%s, timestamp=%s" % (ousUID,timestamp))
time.sleep(2) # pretend this actually takes time # Now populate the ASA metadata and delivery status tables writeDeliveryStatus(progID, ousUID, timestamp, ingestedDataProds, complete=True) setSubstate(ousUID, 'ProductsIngested') ################################################################### ## Main program ################################################################### baseUrl = "http://localhost:5984" # CouchDB ngascon = NgasConnection() xtss = ExecutorClient('localhost', 'msgq', 'xtss') select = "ingest.JAO" mq = MqConnection('localhost', 'msgq', select) # Make sure we know where the local replicated cache directory is localCache = os.environ.get('DRAWS_LOCAL_CACHE') if localCache == None: raise RuntimeError("DRAWS_LOCAL_CACHE env variable is not defined") print(' [*] Waiting for messages matching', select) mq.listen(callback)
savePlReport(ousUID, timestamp, encodedReport, productsDir, source) print(">>> AQUA/QA2: saved PL report: ousUID=%s, timestamp=%s" % (ousUID, timestamp)) ################################################################### ## Main program ################################################################### weblogsBaseUrl = "http://localhost:8000" baseUrl = "http://localhost:5984" # CouchDB dbconn = DbConnection(baseUrl) dbName = "pipeline-reports" xtss = ExecutorClient('localhost', 'msgq', 'xtss') select = "pipeline.report.JAO" mq = MqConnection('localhost', 'msgq', select) # Launch the listener in the background print(' [*] Waiting for messages matching %s' % (select)) dbdrwutils.bgRun(mq.listen, (callback, )) # This is the program's text-based UI # Loop forever: # Show Pipeline runs awaiting review # Ask for an OUS UID # Lookup the most recent PL execution for that # Print it out # Ask for Fail, Pass, or SemiPass # Set the OUS state accordingly while True: print()
#!/usr/bin/env python3 import sys import argparse sys.path.insert(0, "../shared") from dbmsgq import MqConnection parser = argparse.ArgumentParser(description='') parser.add_argument(dest="groupName") args = parser.parse_args() queue = MqConnection("localhost", "msgq") queue.joinGroup(args.groupName, listener="listener.NA") queue.joinGroup(args.groupName, listener="listener.EA") queue.joinGroup(args.groupName, listener="listener.EU") msg = {} msg['text'] = "all is fine" queue.send(msg, "selector.*") queue.send(msg, "selector.NA")
class ProductIngestor(): def __init__(self, localCache): self.baseUrl = "http://localhost:5984" # CouchDB self.ngascon = NgasConnection() self.xtss = ExecutorClient('localhost', 'msgq', 'xtss') self.select = "ingest.JAO" self.mq = MqConnection('localhost', 'msgq', self.select) self.localCache = localCache def start(self): print(' [*] Waiting for messages matching', self.select) self.mq.listen(self.callback) def encode(self, entityID): return entityID.replace(":", "_").replace("/", "_") def setSubstate(self, ousUID, substate): dbdrwutils.setSubstate(self.xtss, ousUID, substate) def writeMetadata(self, progID, ousUID, timestamp, dataProduct): # No error checking! dbcon = DbConnection(self.baseUrl) dbName = "products-metadata" metadata = {} metadata['progID'] = progID metadata['ousUID'] = ousUID metadata['timestamp'] = timestamp retcode, retmsg = dbcon.save(dbName, dataProduct, metadata) if retcode != 201: raise RuntimeError("setSubstate: error %d, %s" % (retcode, retmsg)) def writeDeliveryStatus(self, progID, ousUID, timestamp, dataProducts, complete=False): # No error checking! dbcon = DbConnection(self.baseUrl) dbName = "delivery-status" delStatus = {} delStatus['progID'] = progID delStatus['timestamp'] = timestamp delStatus['dataProducts'] = sorted(dataProducts) delStatus['complete'] = complete delStatus['ousUID'] = ousUID retcode, retmsg = dbcon.save(dbName, ousUID, delStatus) if retcode != 201: raise RuntimeError("setSubstate: error %d, %s" % (retcode, retmsg)) def callback(self, message): """ Message is a JSON object: ousUID is the UID of the OUS timestamp is the Pipeline run's timestamp productsDir is the name of the products directory for that Pipeline run For instance { "ousUID" : "uid://X1/X1/Xc1", "timestamp" : "2018-07-23T09:44:13.604", "productsDir" : "2015.1.00657.S_2018_07_23T09_44_13.604" } """ print(">>> message:", message) ousUID = message["ousUID"] timestamp = message["timestamp"] productsDir = message["productsDir"] self.setSubstate(ousUID, 'IngestionTriggered') # Locate the data products in the replicated cache dir dataProductsDir = os.path.join(self.localCache, productsDir, "SOUS", "GOUS", self.encode(ousUID), "products") dataProdNames = os.listdir(dataProductsDir) time.sleep(5) # pretend this actually takes time self.setSubstate(ousUID, 'AnalyzingProducts') time.sleep(3) # pretend this actually takes time self.setSubstate(ousUID, 'IngestingProducts') progID = productsDir.split('_')[0] ingestedDataProds = [] for dataProdName in sorted(dataProdNames): if dataProdName.startswith("product") and dataProdName.endswith( ".data"): dataProdPathname = os.path.join(dataProductsDir, dataProdName) print(">>> Ingesting:", dataProdPathname) self.ngascon.put(dataProdPathname) self.writeMetadata(progID, ousUID, timestamp, dataProdName) ingestedDataProds.append(dataProdName) self.writeDeliveryStatus(progID, ousUID, timestamp, ingestedDataProds) time.sleep(2) # pretend this actually takes time # Now populate the ASA metadata and delivery status tables self.writeDeliveryStatus(progID, ousUID, timestamp, ingestedDataProds, complete=True) self.setSubstate(ousUID, 'ProductsIngested')