def testAverageStdDev(self): """ _testAverageStdDev_ Test average, standard deviation function """ numList = ['a', 'b', 'c'] self.assertRaises(MathAlgos.MathAlgoException, MathAlgos.getAverageStdDev, numList) numList = [1, 1, 1, 1, 1, 1, 1, 1] result = MathAlgos.getAverageStdDev(numList = numList) self.assertEqual(result[0], 1.0) # Average should be zero self.assertEqual(result[1], 0.0) # stdDev should be zero numList = [1, 2, 3, 4, 5, 6, 7, 8] result = MathAlgos.getAverageStdDev(numList = numList) self.assertEqual(result[0], 4.5) self.assertEqual(result[1], 2.2912878474779199) # I think this is right return
def testAverageStdDev(self): """ _testAverageStdDev_ Test average, standard deviation function """ numList = ['a', 'b', 'c'] self.assertRaises(MathAlgos.MathAlgoException, MathAlgos.getAverageStdDev, numList) numList = [1, 1, 1, 1, 1, 1, 1, 1] result = MathAlgos.getAverageStdDev(numList=numList) self.assertEqual(result[0], 1.0) # Average should be zero self.assertEqual(result[1], 0.0) # stdDev should be zero numList = [1, 2, 3, 4, 5, 6, 7, 8] result = MathAlgos.getAverageStdDev(numList=numList) self.assertEqual(result[0], 4.5) self.assertEqual(result[1], 2.2912878474779199) # I think this is right return
def handleCouchPerformance(self, workflowName): """ _handleCouchPerformance_ The couch performance stuff is convoluted enough I think I want to handle it separately. """ perf = self.fwjrdatabase.loadView("FWJRDump", "performanceByWorkflowName", options = {"startkey": [workflowName], "endkey": [workflowName]})['rows'] taskList = {} finalTask = {} for row in perf: taskName = row['value']['taskName'] stepName = row['value']['stepName'] if not taskName in taskList.keys(): taskList[taskName] = {} if not stepName in taskList[taskName].keys(): taskList[taskName][stepName] = [] value = row['value'] taskList[taskName][stepName].append(value) for taskName in taskList.keys(): final = {} for stepName in taskList[taskName].keys(): output = {'jobTime': []} final[stepName] = {} masterList = [] # For each step put the data into a dictionary called output # keyed by the name of the value for row in taskList[taskName][stepName]: masterList.append(row) for key in row.keys(): if key in ['startTime', 'stopTime', 'taskName', 'stepName', 'jobID']: continue if not key in output.keys(): output[key] = [] try: output[key].append(float(row[key])) except TypeError: # Why do we get None values here? # We may want to look into it logging.debug("Got a None performance value for key %s" % key) if row[key] == None: output[key].append(0.0) else: raise try: jobTime = row.get('stopTime', None) - row.get('startTime', None) output['jobTime'].append(jobTime) row['jobTime'] = jobTime except TypeError: # One of those didn't have a real value pass # Now that we've sorted the data, we process it one key at a time for key in output.keys(): final[stepName][key] = {} # Assemble the 'worstOffenders' # These are the top [self.nOffenders] in that particular category # i.e., those with the highest values offenders = MathAlgos.getLargestValues(dictList = masterList, key = key, n = self.nOffenders) for x in offenders: try: logArchive = self.fwjrdatabase.loadView("FWJRDump", "logArchivesByJobID", options = {"startkey": [x['jobID']], "endkey": [x['jobID'], x['retry_count']]})['rows'][0]['value']['lfn'] logCollectID = self.jobsdatabase.loadView("JobDump", "jobsByInputLFN", options = {"startkey": [workflowName, logArchive], "endkey": [workflowName, logArchive]})['rows'][0]['value'] logCollect = self.fwjrdatabase.loadView("FWJRDump", "outputByJobID", options = {"startkey": logCollectID, "endkey": logCollectID})['rows'][0]['value']['lfn'] x['logArchive'] = logArchive.split('/')[-1] x['logCollect'] = logCollect except IndexError, ex: logging.debug("Unable to find final logArchive tarball for %i" % x['jobID']) logging.debug(str(ex)) except KeyError, ex: logging.debug("Unable to find final logArchive tarball for %i" % x['jobID']) logging.debug(str(ex)) if key in self.histogramKeys: histogram = MathAlgos.createHistogram(numList = output[key], nBins = self.histogramBins, limit = self.histogramLimit) final[stepName][key]['histogram'] = histogram else: average, stdDev = MathAlgos.getAverageStdDev(numList = output[key]) final[stepName][key]['average'] = average final[stepName][key]['stdDev'] = stdDev final[stepName][key]['worstOffenders'] = [{'jobID': x['jobID'], 'value': x.get(key, 0.0), 'log': x.get('logArchive', None), 'logCollect': x.get('logCollect', None)} for x in offenders]
def handleCouchPerformance(self, workflowName): """ _handleCouchPerformance_ The couch performance stuff is convoluted enough I think I want to handle it separately. """ perf = self.fwjrdatabase.loadView("FWJRDump", "performanceByWorkflowName", options={ "startkey": [workflowName], "endkey": [workflowName] })['rows'] failedJobs = self.getFailedJobs(workflowName) taskList = {} finalTask = {} for row in perf: taskName = row['value']['taskName'] stepName = row['value']['stepName'] if not taskName in taskList.keys(): taskList[taskName] = {} if not stepName in taskList[taskName].keys(): taskList[taskName][stepName] = [] value = row['value'] taskList[taskName][stepName].append(value) for taskName in taskList.keys(): final = {} for stepName in taskList[taskName].keys(): output = {'jobTime': []} outputFailed = { 'jobTime': [] } # This will be same, but only for failed jobs final[stepName] = {} masterList = [] # For each step put the data into a dictionary called output # keyed by the name of the value for row in taskList[taskName][stepName]: masterList.append(row) for key in row.keys(): if key in [ 'startTime', 'stopTime', 'taskName', 'stepName', 'jobID' ]: continue if not key in output.keys(): output[key] = [] if len(failedJobs) > 0: outputFailed[key] = [] try: output[key].append(float(row[key])) if (row['jobID'] in failedJobs): outputFailed[key].append(float(row[key])) except TypeError: # Why do we get None values here? # We may want to look into it logging.debug( "Got a None performance value for key %s" % key) if row[key] == None: output[key].append(0.0) else: raise try: jobTime = row.get('stopTime', None) - row.get( 'startTime', None) output['jobTime'].append(jobTime) row['jobTime'] = jobTime # Account job running time here only if the job has failed if (row['jobID'] in failedJobs): outputFailed['jobTime'].append(jobTime) except TypeError: # One of those didn't have a real value pass # Now that we've sorted the data, we process it one key at a time for key in output.keys(): final[stepName][key] = {} # Assemble the 'worstOffenders' # These are the top [self.nOffenders] in that particular category # i.e., those with the highest values offenders = MathAlgos.getLargestValues(dictList=masterList, key=key, n=self.nOffenders) for x in offenders: try: logArchive = self.fwjrdatabase.loadView( "FWJRDump", "logArchivesByJobID", options={ "startkey": [x['jobID']], "endkey": [x['jobID'], x['retry_count']] })['rows'][0]['value']['lfn'] logCollectID = self.jobsdatabase.loadView( "JobDump", "jobsByInputLFN", options={ "startkey": [workflowName, logArchive], "endkey": [workflowName, logArchive] })['rows'][0]['value'] logCollect = self.fwjrdatabase.loadView( "FWJRDump", "outputByJobID", options={ "startkey": logCollectID, "endkey": logCollectID })['rows'][0]['value']['lfn'] x['logArchive'] = logArchive.split('/')[-1] x['logCollect'] = logCollect except IndexError, ex: logging.debug( "Unable to find final logArchive tarball for %i" % x['jobID']) logging.debug(str(ex)) except KeyError, ex: logging.debug( "Unable to find final logArchive tarball for %i" % x['jobID']) logging.debug(str(ex)) if key in self.histogramKeys: # Usual histogram that was always done histogram = MathAlgos.createHistogram( numList=output[key], nBins=self.histogramBins, limit=self.histogramLimit) final[stepName][key]['histogram'] = histogram # Histogram only picking values from failed jobs # Operators can use it to find out quicker why a workflow/task/step is failing : if len(failedJobs) > 0: failedJobsHistogram = MathAlgos.createHistogram( numList=outputFailed[key], nBins=self.histogramBins, limit=self.histogramLimit) final[stepName][key][ 'errorsHistogram'] = failedJobsHistogram else: average, stdDev = MathAlgos.getAverageStdDev( numList=output[key]) final[stepName][key]['average'] = average final[stepName][key]['stdDev'] = stdDev final[stepName][key]['worstOffenders'] = [{ 'jobID': x['jobID'], 'value': x.get(key, 0.0), 'log': x.get('logArchive', None), 'logCollect': x.get('logCollect', None) } for x in offenders]
def handleCouchPerformance(self, workflowName): """ _handleCouchPerformance_ The couch performance stuff is convoluted enough I think I want to handle it separately. """ perf = self.fwjrdatabase.loadView( "FWJRDump", "performanceByWorkflowName", options={"startkey": [workflowName], "endkey": [workflowName]} )["rows"] failedJobs = self.getFailedJobs(workflowName) taskList = {} finalTask = {} for row in perf: taskName = row["value"]["taskName"] stepName = row["value"]["stepName"] if not taskName in taskList.keys(): taskList[taskName] = {} if not stepName in taskList[taskName].keys(): taskList[taskName][stepName] = [] value = row["value"] taskList[taskName][stepName].append(value) for taskName in taskList.keys(): final = {} for stepName in taskList[taskName].keys(): output = {"jobTime": []} outputFailed = {"jobTime": []} # This will be same, but only for failed jobs final[stepName] = {} masterList = [] # For each step put the data into a dictionary called output # keyed by the name of the value for row in taskList[taskName][stepName]: masterList.append(row) for key in row.keys(): if key in ["startTime", "stopTime", "taskName", "stepName", "jobID"]: continue if not key in output.keys(): output[key] = [] if len(failedJobs) > 0: outputFailed[key] = [] try: output[key].append(float(row[key])) if row["jobID"] in failedJobs: outputFailed[key].append(float(row[key])) except TypeError: # Why do we get None values here? # We may want to look into it logging.debug("Got a None performance value for key %s" % key) if row[key] == None: output[key].append(0.0) else: raise try: jobTime = row.get("stopTime", None) - row.get("startTime", None) output["jobTime"].append(jobTime) row["jobTime"] = jobTime # Account job running time here only if the job has failed if row["jobID"] in failedJobs: outputFailed["jobTime"].append(jobTime) except TypeError: # One of those didn't have a real value pass # Now that we've sorted the data, we process it one key at a time for key in output.keys(): final[stepName][key] = {} # Assemble the 'worstOffenders' # These are the top [self.nOffenders] in that particular category # i.e., those with the highest values offenders = MathAlgos.getLargestValues(dictList=masterList, key=key, n=self.nOffenders) for x in offenders: try: logArchive = self.fwjrdatabase.loadView( "FWJRDump", "logArchivesByJobID", options={"startkey": [x["jobID"]], "endkey": [x["jobID"], x["retry_count"]]}, )["rows"][0]["value"]["lfn"] logCollectID = self.jobsdatabase.loadView( "JobDump", "jobsByInputLFN", options={"startkey": [workflowName, logArchive], "endkey": [workflowName, logArchive]}, )["rows"][0]["value"] logCollect = self.fwjrdatabase.loadView( "FWJRDump", "outputByJobID", options={"startkey": logCollectID, "endkey": logCollectID} )["rows"][0]["value"]["lfn"] x["logArchive"] = logArchive.split("/")[-1] x["logCollect"] = logCollect except IndexError, ex: logging.debug("Unable to find final logArchive tarball for %i" % x["jobID"]) logging.debug(str(ex)) except KeyError, ex: logging.debug("Unable to find final logArchive tarball for %i" % x["jobID"]) logging.debug(str(ex)) if key in self.histogramKeys: # Usual histogram that was always done histogram = MathAlgos.createHistogram( numList=output[key], nBins=self.histogramBins, limit=self.histogramLimit ) final[stepName][key]["histogram"] = histogram # Histogram only picking values from failed jobs # Operators can use it to find out quicker why a workflow/task/step is failing : if len(failedJobs) > 0: failedJobsHistogram = MathAlgos.createHistogram( numList=outputFailed[key], nBins=self.histogramBins, limit=self.histogramLimit ) final[stepName][key]["errorsHistogram"] = failedJobsHistogram else: average, stdDev = MathAlgos.getAverageStdDev(numList=output[key]) final[stepName][key]["average"] = average final[stepName][key]["stdDev"] = stdDev final[stepName][key]["worstOffenders"] = [ { "jobID": x["jobID"], "value": x.get(key, 0.0), "log": x.get("logArchive", None), "logCollect": x.get("logCollect", None), } for x in offenders ]