class WMStatsReader(): def __init__(self, couchURL, dbName = None): couchURL = sanitizeURL(couchURL)['url'] # set the connection for local couchDB call if dbName: self.couchURL = couchURL self.dbName = dbName else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False) def workflowsByStatus(self, statusList, format = "list", stale = "update_after"): keys = statusList options = {} if stale: options = {"stale": stale} result = self.couchDB.loadView("WMStats", "requestByStatus", options, keys) if format == "dict": workflowDict = {} for item in result["rows"]: workflowDict[item["id"]] = None return workflowDict else: workflowList = [] for item in result["rows"]: workflowList.append(item["id"]) return workflowList def workflowStatus(self, stale = "update_after"): """ _workflowStatus_ Return a dictionary with all available workflows, grouped by status and with the timestamp of the status """ options = {} if stale: options = {"stale" : stale} result = self.couchDB.loadView("WMStats", "requestByStatus", options) stateDict = {} for item in result['rows']: if item["key"] not in stateDict: stateDict[item["key"]] = {} stateDict[item["key"]][item["id"]] = item["value"] return stateDict def getDBInstance(self): return self.couchDB def getHeartbeat(self): try: return self.couchDB.info(); except Exception, ex: return {'error_message': str(ex)}
class LocalCouchDBData(): def __init__(self, couchURL, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/fwjrs", False) self.summaryLevel = summaryLevel def getJobSummaryByWorkflowAndSite(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100}, {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100}, {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100}, {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\ ]} and convert to {'request_name1': {'queue_first': { 'siteA': 100}} 'request_name1': {'queue_first': { 'siteB': 100}} } if taskflag is set, convert to {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}}, 'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}}, } """ options = {"group": True, "stale": "ok"} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite", options) # reformat the doc to upload to reqmon db data = {} if self.summaryLevel == "task": for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][x['key'][3]] = x['value'] else: for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault(x['key'][2], {}) #data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value'] logging.info("Found %i requests" % len(data)) return data def getJobPerformanceByTaskAndSite(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", 'task_name1', "siteA"], "value": {wrappedTotalJobTime: 1612, cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, totalJobTime: 421.0489, totalEventCPU: 4.064402}, inputEvents: 0, dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: {size: 5093504, events: 10000, totalLumis: 100}}}}, {"key":['request_name1", 'task_name2', "siteA"], "value":{wrappedTotalJobTime: 1612, cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, totalJobTime: 421.0489, totalEventCPU: 4.064402}, inputEvents: 0, dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: {size: 5093504, events: 10000, totalLumis: 100}}}}} ]} and convert to {'request_name1': {'tasks': {'task_name1' : { 'siteA': {wrappedTotalJobTime: 1612, cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, totalJobTime: 421.0489, totalEventCPU: 4.064402}, inputEvents: 0, dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: {size: 5093504, events: 10000, totalLumis: 100}}}, {'task_name2' : { 'siteA': {wrappedTotalJobTime: 1612, cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, totalJobTime: 421.0489, totalEventCPU: 4.064402, totalLumis: 100}, inputEvents: 0, dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: {size: 5093504, events: 10000, totalLumis: 100}}} }} } """ options = {"group": True, "stale": "ok", "reduce":True} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.fwjrsCouchDB.loadView("FWJRDump", "performanceSummaryByTask", options) data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]].setdefault('sites', {}) data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]] = x['value'] data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]].setdefault('dataset', {}) if x['key'][3]: data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]]['dataset'][x['key'][3]] = x['value']['datasetStat'] #there is duplicate datasets in data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]], just ignore return data def getEventSummaryByWorkflow(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", "/test/output_dataset1"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}}, {"key":['request_name1", "/test/output_dataset2"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}}, {"key":['request_name1", "/test/output_dataset3"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset3"}}, {"key":['request_name1", "/test/output_dataset4"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset4"}}, ]} and convert to {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}, {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}]} 'request_name2': ... """ options = {"group": True, "stale": "ok", "reduce":True} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.fwjrsCouchDB.loadView("FWJRDump", "outputByWorkflowName", options) # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], []) data[x['key'][0]].append(x['value']) logging.info("Found %i requests" % len(data)) return data def getHeartbeat(self): try: return self.jobCouchDB.info(); except Exception, ex: return {'error_message': str(ex)}
class LocalCouchDBData(object): def __init__(self, couchURL, statSummaryDB, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) fwjrDBname = "%s/fwjrs" % self.dbName self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname) self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False) self.summaryLevel = summaryLevel def getJobSummaryByWorkflowAndSite(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100}, {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100}, {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100}, {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\ ]} and convert to {'request_name1': {'queue_first': { 'siteA': 100}} 'request_name1': {'queue_first': { 'siteB': 100}} } if taskflag is set, convert to {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}}, 'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}}, } """ options = {"group": True, "stale": "ok"} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite", options) # reformat the doc to upload to reqmon db data = {} if self.summaryLevel == "task": for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][x['key'][3]] = x['value'] else: for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault(x['key'][2], {}) # data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value'] logging.info("Found %i requests", len(data)) return data def getJobPerformanceByTaskAndSiteFromSummaryDB(self): options = {"include_docs": True} results = self.summaryStatsDB.allDocs(options) data = {} for row in results['rows']: if not row['id'].startswith("_"): data[row['id']] = {} data[row['id']]['tasks'] = row['doc']['tasks'] return data def getEventSummaryByWorkflow(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", "/test/output_dataset1"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}}, {"key":['request_name1", "/test/output_dataset2"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}}, {"key":['request_name1", "/test/output_dataset3"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset3"}}, {"key":['request_name1", "/test/output_dataset4"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset4"}}, ]} and convert to {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}, {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}]} 'request_name2': ... """ results = self.fwjrAPI.outputByWorkflowName() # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], []) data[x['key'][0]].append(x['value']) logging.info("Found %i requests", len(data)) return data def getHeartbeat(self): try: return self.jobCouchDB.info() except Exception as ex: return {'error_message': str(ex)} def getSkippedFilesSummaryByWorkflow(self): """ get skipped file summary gets the data with following format {u'rows': [{u'value': {u'skippedFile': 5}, u'key': ["sryu_StepChain_MC_reqmgr2_170609_180852_5295", "/sryu_StepChain_MC_reqmgr2_170609_180852_5295/GENSIM/GENSIMMergeRAWSIMoutput", "T1_US_FNAL_Disk"]}]} and covert to {'sryu_TaskChain_Data_wq_testt_160204_061048_5587': {'tasks': {'/sryu_TaskChain_Data_wq_testt_160204_061048_5587/RECOCOSD : {'skippedFiles':2}}}} """ results = self.fwjrAPI.getFWJRWithSkippedFiles() # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]] = x['value'] data[x['key'][0]]['skipped'] = True return data
class LocalCouchDBData(object): def __init__(self, couchURL, statSummaryDB, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase( self.dbName + "/jobs", False) fwjrDBname = "%s/fwjrs" % self.dbName self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname) self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase( statSummaryDB, False) self.summaryLevel = summaryLevel def getJobSummaryByWorkflowAndSite(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100}, {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100}, {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100}, {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\ ]} and convert to {'request_name1': {'queue_first': { 'siteA': 100}} 'request_name1': {'queue_first': { 'siteB': 100}} } if taskflag is set, convert to {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}}, 'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}}, } """ options = {"group": True, "stale": "ok"} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite", options) # reformat the doc to upload to reqmon db data = {} if self.summaryLevel == "task": for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]].setdefault( x['key'][2], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][ x['key'][3]] = x['value'] else: for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault(x['key'][2], {}) # data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value'] logging.info("Found %i requests", len(data)) return data def getJobPerformanceByTaskAndSiteFromSummaryDB(self): options = {"include_docs": True} results = self.summaryStatsDB.allDocs(options) data = {} for row in results['rows']: if not row['id'].startswith("_"): data[row['id']] = {} data[row['id']]['tasks'] = row['doc']['tasks'] return data def getEventSummaryByWorkflow(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", "/test/output_dataset1"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}}, {"key":['request_name1", "/test/output_dataset2"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}}, {"key":['request_name1", "/test/output_dataset3"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset3"}}, {"key":['request_name1", "/test/output_dataset4"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset4"}}, ]} and convert to {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}, {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}]} 'request_name2': ... """ results = self.fwjrAPI.outputByWorkflowName() # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], []) data[x['key'][0]].append(x['value']) logging.info("Found %i requests", len(data)) return data def getHeartbeat(self): try: return self.jobCouchDB.info() except Exception as ex: return {'error_message': str(ex)} def getSkippedFilesSummaryByWorkflow(self): """ get skipped file summary gets the data with following format {u'rows': [{u'value': {u'skippedFile': 5}, u'key': ["sryu_StepChain_MC_reqmgr2_170609_180852_5295", "/sryu_StepChain_MC_reqmgr2_170609_180852_5295/GENSIM/GENSIMMergeRAWSIMoutput", "T1_US_FNAL_Disk"]}]} and covert to {'sryu_TaskChain_Data_wq_testt_160204_061048_5587': {'tasks': {'/sryu_TaskChain_Data_wq_testt_160204_061048_5587/RECOCOSD : {'skippedFiles':2}}}} """ results = self.fwjrAPI.getFWJRWithSkippedFiles() # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]] = x['value'] data[x['key'][0]]['skipped'] = True return data
class LocalCouchDBData(): def __init__(self, couchURL, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase( self.dbName + "/jobs", False) self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase( self.dbName + "/fwjrs", False) self.summaryLevel = summaryLevel def getJobSummaryByWorkflowAndSite(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100}, {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100}, {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100}, {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\ ]} and convert to {'request_name1': {'queue_first': { 'siteA': 100}} 'request_name1': {'queue_first': { 'siteB': 100}} } if taskflag is set, convert to {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}}, 'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}}, } """ options = {"group": True, "stale": "ok"} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite", options) # reformat the doc to upload to reqmon db data = {} if self.summaryLevel == "task": for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]].setdefault( x['key'][2], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][ x['key'][3]] = x['value'] else: for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault(x['key'][2], {}) #data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value'] logging.info("Found %i requests" % len(data)) return data def getJobPerformanceByTaskAndSite(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", 'task_name1', "siteA"], "value": {wrappedTotalJobTime: 1612, cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, totalJobTime: 421.0489, totalEventCPU: 4.064402}, inputEvents: 0, dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: {size: 5093504, events: 10000, totalLumis: 100}}}}, {"key":['request_name1", 'task_name2', "siteA"], "value":{wrappedTotalJobTime: 1612, cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, totalJobTime: 421.0489, totalEventCPU: 4.064402}, inputEvents: 0, dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: {size: 5093504, events: 10000, totalLumis: 100}}}}} ]} and convert to {'request_name1': {'tasks': {'task_name1' : { 'siteA': {wrappedTotalJobTime: 1612, cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, totalJobTime: 421.0489, totalEventCPU: 4.064402}, inputEvents: 0, dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: {size: 5093504, events: 10000, totalLumis: 100}}}, {'task_name2' : { 'siteA': {wrappedTotalJobTime: 1612, cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, totalJobTime: 421.0489, totalEventCPU: 4.064402, totalLumis: 100}, inputEvents: 0, dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: {size: 5093504, events: 10000, totalLumis: 100}}} }} } """ options = {"group": True, "stale": "ok", "reduce": True} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.fwjrsCouchDB.loadView("FWJRDump", "performanceSummaryByTask", options) data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]].setdefault('sites', {}) data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'] [2]] = x['value'] data[x['key'][0]]['tasks'][x['key'][1]]['sites'][ x['key'][2]].setdefault('dataset', {}) if x['key'][3]: data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]][ 'dataset'][x['key'][3]] = x['value']['datasetStat'] #there is duplicate datasets in data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]], just ignore return data def getEventSummaryByWorkflow(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", "/test/output_dataset1"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}}, {"key":['request_name1", "/test/output_dataset2"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}}, {"key":['request_name1", "/test/output_dataset3"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset3"}}, {"key":['request_name1", "/test/output_dataset4"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset4"}}, ]} and convert to {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}, {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}]} 'request_name2': ... """ options = {"group": True, "stale": "ok", "reduce": True} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.fwjrsCouchDB.loadView("FWJRDump", "outputByWorkflowName", options) # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], []) data[x['key'][0]].append(x['value']) logging.info("Found %i requests" % len(data)) return data def getHeartbeat(self): try: return self.jobCouchDB.info() except Exception, ex: return {'error_message': str(ex)}
class LocalCouchDBData(): def __init__(self, couchURL, statSummaryDB, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/fwjrs", False) #TODO: remove the hard coded name (wma_summarydb) self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False) self.summaryLevel = summaryLevel def getJobSummaryByWorkflowAndSite(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100}, {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100}, {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100}, {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\ ]} and convert to {'request_name1': {'queue_first': { 'siteA': 100}} 'request_name1': {'queue_first': { 'siteB': 100}} } if taskflag is set, convert to {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}}, 'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}}, } """ options = {"group": True, "stale": "ok"} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite", options) # reformat the doc to upload to reqmon db data = {} if self.summaryLevel == "task": for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][x['key'][3]] = x['value'] else: for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault(x['key'][2], {}) #data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value'] logging.info("Found %i requests" % len(data)) return data def getJobPerformanceByTaskAndSiteFromSummaryDB(self): options = {"include_docs": True} results = self.summaryStatsDB.allDocs(options) data = {} for row in results['rows']: if not row['id'].startswith("_"): data[row['id']] = {} data[row['id']]['tasks'] = row['doc']['tasks'] return data def getEventSummaryByWorkflow(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", "/test/output_dataset1"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}}, {"key":['request_name1", "/test/output_dataset2"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}}, {"key":['request_name1", "/test/output_dataset3"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset3"}}, {"key":['request_name1", "/test/output_dataset4"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset4"}}, ]} and convert to {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}, {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}]} 'request_name2': ... """ options = {"group": True, "stale": "ok", "reduce":True} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.fwjrsCouchDB.loadView("FWJRDump", "outputByWorkflowName", options) # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], []) data[x['key'][0]].append(x['value']) logging.info("Found %i requests" % len(data)) return data def getHeartbeat(self): try: return self.jobCouchDB.info(); except Exception as ex: return {'error_message': str(ex)}