示例#1
0
    def testHistogram(self):
        """
        _testHistogram_

        Test our ability to build histogram objects out of lists
        """

        # Check that we override correctly when we give a uniform list
        numList = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        result = MathAlgos.createHistogram(numList=numList, nBins=10, limit=10)
        self.assertEqual(result[0]['lowerEdge'], 0.0)
        self.assertEqual(result[0]['upperEdge'], 2.0)
        self.assertEqual(result[0]['nEvents'], 10)

        numList = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        result = MathAlgos.createHistogram(numList=numList, nBins=2, limit=10)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['nEvents'], 5)
        self.assertEqual(result[1]['nEvents'], 5)
        self.assertEqual(result[0]['average'], 3.0)
        self.assertEqual(result[1]['average'], 8.0)
        self.assertEqual(result[0]['stdDev'], 1.4142135623730951)
        self.assertEqual(result[1]['stdDev'], 1.4142135623730951)

        # Check that we generate overflow and underflow bins correctly
        result = MathAlgos.createHistogram(numList=numList, nBins=2, limit=1)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0]['type'], 'underflow')
        self.assertEqual(result[1]['type'], 'overflow')
        self.assertEqual(result[0]['nEvents'], 2)
        self.assertEqual(result[1]['nEvents'], 2)
        self.assertEqual(result[0]['stdDev'], 0.5)
        self.assertEqual(result[1]['stdDev'], 0.5)
示例#2
0
    def testHistogram(self):
        """
        _testHistogram_

        Test our ability to build histogram objects out of lists
        """

        # Check that we override correctly when we give a uniform list
        numList = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        result = MathAlgos.createHistogram(numList = numList, nBins = 10, limit = 10)
        self.assertEqual(result[0]['lowerEdge'], 0.0)
        self.assertEqual(result[0]['upperEdge'], 2.0)
        self.assertEqual(result[0]['nEvents'],   10)

        numList = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        result = MathAlgos.createHistogram(numList = numList, nBins = 2, limit = 10)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['nEvents'], 5)
        self.assertEqual(result[1]['nEvents'], 5)
        self.assertEqual(result[0]['average'], 3.0)
        self.assertEqual(result[1]['average'], 8.0)
        self.assertEqual(result[0]['stdDev'],  1.4142135623730951)
        self.assertEqual(result[1]['stdDev'],  1.4142135623730951)

        # Check that we generate overflow and underflow bins correctly
        result = MathAlgos.createHistogram(numList = numList, nBins = 2, limit = 1)
        self.assertEqual(len(result), 4)
        self.assertEqual(result[0]['type'], 'underflow')
        self.assertEqual(result[1]['type'], 'overflow')
        self.assertEqual(result[0]['nEvents'], 2)
        self.assertEqual(result[1]['nEvents'], 2)
        self.assertEqual(result[0]['stdDev'], 0.5)
        self.assertEqual(result[1]['stdDev'], 0.5)
示例#3
0
    def testSortListByKeys(self):
        """
        _testSortListByKeys_

        Test our ability to sort a list of dictionaries in the order of a single
        numerical key
        """

        l = [{
            'a': 102,
            'b': 200,
            'name': 'One'
        }, {
            'a': 101,
            'b': 199,
            'name': 'Two'
        }, {
            'a': 100,
            'b': 198,
            'name': 'Three'
        }, {
            'a': 103,
            'b': 197,
            'name': 'Four'
        }]

        result = MathAlgos.sortDictionaryListByKey(dictList=l, key='a')
        self.assertEqual(result[0]['name'], 'Three')
        self.assertEqual(result[1]['name'], 'Two')
        self.assertEqual(result[2]['name'], 'One')
        self.assertEqual(result[3]['name'], 'Four')

        result = MathAlgos.sortDictionaryListByKey(dictList=l, key='b')
        self.assertEqual(result[0]['name'], 'Four')
        self.assertEqual(result[1]['name'], 'Three')
        self.assertEqual(result[2]['name'], 'Two')
        self.assertEqual(result[3]['name'], 'One')

        result = MathAlgos.sortDictionaryListByKey(dictList=l,
                                                   key='b',
                                                   reverse=True)
        self.assertEqual(result[3]['name'], 'Four')
        self.assertEqual(result[2]['name'], 'Three')
        self.assertEqual(result[1]['name'], 'Two')
        self.assertEqual(result[0]['name'], 'One')

        # This shouldn't fail, it just should return a flat list
        result = MathAlgos.sortDictionaryListByKey(dictList=l,
                                                   key='c',
                                                   reverse=True)

        return
示例#4
0
    def testGetLargestValue(self):
        """
        _testGetLargestValue_

        See if we can get the largest value from a list of histograms
        for a specific key
        """

        l = [{
            'a': 102,
            'b': 200,
            'name': 'One'
        }, {
            'a': 101,
            'b': 199,
            'name': 'Two'
        }, {
            'a': 100,
            'b': 198,
            'name': 'Three'
        }, {
            'a': 103,
            'b': 197,
            'name': 'Four'
        }]

        result = MathAlgos.getLargestValues(dictList=l, key='a', n=2)
        self.assertEqual(result, [{
            'a': 103,
            'b': 197,
            'name': 'Four'
        }, {
            'a': 102,
            'b': 200,
            'name': 'One'
        }])
        result = MathAlgos.getLargestValues(dictList=l, key='b', n=3)
        self.assertEqual(result, [{
            'a': 102,
            'b': 200,
            'name': 'One'
        }, {
            'a': 101,
            'b': 199,
            'name': 'Two'
        }, {
            'a': 100,
            'b': 198,
            'name': 'Three'
        }])
        return
示例#5
0
    def testTruncate(self):
        """
        _testTruncate_

        Test the floorTruncate function
        """

        self.assertEqual(MathAlgos.floorTruncate(1.23456), 1.234)
        return
示例#6
0
    def testTruncate(self):
        """
        _testTruncate_

        Test the floorTruncate function
        """

        self.assertEqual(MathAlgos.floorTruncate(1.23456), 1.234)
        return
示例#7
0
    def testGetLargestValue(self):
        """
        _testGetLargestValue_
        
        See if we can get the largest value from a list of histograms
        for a specific key
        """

        l = [{'a': 102, 'b': 200, 'name': 'One'},
             {'a': 101, 'b': 199, 'name': 'Two'},
             {'a': 100, 'b': 198, 'name': 'Three'},
             {'a': 103, 'b': 197, 'name': 'Four'}]

        result = MathAlgos.getLargestValues(dictList = l, key = 'a', n = 2)
        self.assertEqual(result, [{'a': 103, 'b': 197, 'name': 'Four'},
                                  {'a': 102, 'b': 200, 'name': 'One'}])
        result = MathAlgos.getLargestValues(dictList = l, key = 'b', n = 3)
        self.assertEqual(result, [{'a': 102, 'b': 200, 'name': 'One'},
                                  {'a': 101, 'b': 199, 'name': 'Two'},
                                  {'a': 100, 'b': 198, 'name': 'Three'}])
        return
示例#8
0
    def testAverageStdDev(self):
        """
        _testAverageStdDev_

        Test average, standard deviation function
        """

        numList = ['a', 'b', 'c']
        self.assertRaises(MathAlgos.MathAlgoException,
                          MathAlgos.getAverageStdDev, numList)

        numList = [1, 1, 1, 1, 1, 1, 1, 1]
        result = MathAlgos.getAverageStdDev(numList = numList)
        self.assertEqual(result[0], 1.0)  # Average should be zero
        self.assertEqual(result[1], 0.0)  # stdDev should be zero

        numList = [1, 2, 3, 4, 5, 6, 7, 8]
        result = MathAlgos.getAverageStdDev(numList = numList)
        self.assertEqual(result[0], 4.5)  
        self.assertEqual(result[1], 2.2912878474779199) # I think this is right

        return
示例#9
0
    def testSortListByKeys(self):
        """
        _testSortListByKeys_
        
        Test our ability to sort a list of dictionaries in the order of a single
        numerical key
        """

        l = [{'a': 102, 'b': 200, 'name': 'One'},
             {'a': 101, 'b': 199, 'name': 'Two'},
             {'a': 100, 'b': 198, 'name': 'Three'},
             {'a': 103, 'b': 197, 'name': 'Four'}]

        result = MathAlgos.sortDictionaryListByKey(dictList = l, key = 'a')
        self.assertEqual(result[0]['name'], 'Three')
        self.assertEqual(result[1]['name'], 'Two')
        self.assertEqual(result[2]['name'], 'One')
        self.assertEqual(result[3]['name'], 'Four')

        result = MathAlgos.sortDictionaryListByKey(dictList = l, key = 'b')
        self.assertEqual(result[0]['name'], 'Four')
        self.assertEqual(result[1]['name'], 'Three')
        self.assertEqual(result[2]['name'], 'Two')
        self.assertEqual(result[3]['name'], 'One')

        result = MathAlgos.sortDictionaryListByKey(dictList = l, key = 'b',
                                                   reverse = True)
        self.assertEqual(result[3]['name'], 'Four')
        self.assertEqual(result[2]['name'], 'Three')
        self.assertEqual(result[1]['name'], 'Two')
        self.assertEqual(result[0]['name'], 'One')


        # This shouldn't fail, it just should return a flat list
        result = MathAlgos.sortDictionaryListByKey(dictList = l, key = 'c',
                                                   reverse = True)
        

        return
示例#10
0
    def testAverageStdDev(self):
        """
        _testAverageStdDev_

        Test average, standard deviation function
        """

        numList = ['a', 'b', 'c']
        self.assertRaises(MathAlgos.MathAlgoException,
                          MathAlgos.getAverageStdDev, numList)

        numList = [1, 1, 1, 1, 1, 1, 1, 1]
        result = MathAlgos.getAverageStdDev(numList=numList)
        self.assertEqual(result[0], 1.0)  # Average should be zero
        self.assertEqual(result[1], 0.0)  # stdDev should be zero

        numList = [1, 2, 3, 4, 5, 6, 7, 8]
        result = MathAlgos.getAverageStdDev(numList=numList)
        self.assertEqual(result[0], 4.5)
        self.assertEqual(result[1],
                         2.2912878474779199)  # I think this is right

        return
示例#11
0
    def handleCouchPerformance(self, workflowName):
        """
        _handleCouchPerformance_

        The couch performance stuff is convoluted enough I think I want to handle it separately.
        """
        perf = self.fwjrdatabase.loadView("FWJRDump",
                                          "performanceByWorkflowName",
                                          options={
                                              "startkey": [workflowName],
                                              "endkey": [workflowName]
                                          })['rows']

        failedJobs = self.getFailedJobs(workflowName)

        taskList = {}
        finalTask = {}

        for row in perf:
            taskName = row['value']['taskName']
            stepName = row['value']['stepName']
            if not taskName in taskList.keys():
                taskList[taskName] = {}
            if not stepName in taskList[taskName].keys():
                taskList[taskName][stepName] = []
            value = row['value']
            taskList[taskName][stepName].append(value)

        for taskName in taskList.keys():
            final = {}
            for stepName in taskList[taskName].keys():
                output = {'jobTime': []}
                outputFailed = {
                    'jobTime': []
                }  # This will be same, but only for failed jobs
                final[stepName] = {}
                masterList = []

                # For each step put the data into a dictionary called output
                # keyed by the name of the value
                for row in taskList[taskName][stepName]:
                    masterList.append(row)
                    for key in row.keys():
                        if key in [
                                'startTime', 'stopTime', 'taskName',
                                'stepName', 'jobID'
                        ]:
                            continue
                        if not key in output.keys():
                            output[key] = []
                            if len(failedJobs) > 0:
                                outputFailed[key] = []
                        try:
                            output[key].append(float(row[key]))
                            if (row['jobID'] in failedJobs):
                                outputFailed[key].append(float(row[key]))

                        except TypeError:
                            # Why do we get None values here?
                            # We may want to look into it
                            logging.debug(
                                "Got a None performance value for key %s" %
                                key)
                            if row[key] == None:
                                output[key].append(0.0)
                            else:
                                raise
                    try:
                        jobTime = row.get('stopTime', None) - row.get(
                            'startTime', None)
                        output['jobTime'].append(jobTime)
                        row['jobTime'] = jobTime
                        # Account job running time here only if the job has failed
                        if (row['jobID'] in failedJobs):
                            outputFailed['jobTime'].append(jobTime)
                    except TypeError:
                        # One of those didn't have a real value
                        pass

                # Now that we've sorted the data, we process it one key at a time
                for key in output.keys():
                    final[stepName][key] = {}
                    # Assemble the 'worstOffenders'
                    # These are the top [self.nOffenders] in that particular category
                    # i.e., those with the highest values
                    offenders = MathAlgos.getLargestValues(dictList=masterList,
                                                           key=key,
                                                           n=self.nOffenders)
                    for x in offenders:
                        try:
                            logArchive = self.fwjrdatabase.loadView(
                                "FWJRDump",
                                "logArchivesByJobID",
                                options={
                                    "startkey": [x['jobID']],
                                    "endkey": [x['jobID'], x['retry_count']]
                                })['rows'][0]['value']['lfn']
                            logCollectID = self.jobsdatabase.loadView(
                                "JobDump",
                                "jobsByInputLFN",
                                options={
                                    "startkey": [workflowName, logArchive],
                                    "endkey": [workflowName, logArchive]
                                })['rows'][0]['value']
                            logCollect = self.fwjrdatabase.loadView(
                                "FWJRDump",
                                "outputByJobID",
                                options={
                                    "startkey": logCollectID,
                                    "endkey": logCollectID
                                })['rows'][0]['value']['lfn']
                            x['logArchive'] = logArchive.split('/')[-1]
                            x['logCollect'] = logCollect
                        except IndexError, ex:
                            logging.debug(
                                "Unable to find final logArchive tarball for %i"
                                % x['jobID'])
                            logging.debug(str(ex))
                        except KeyError, ex:
                            logging.debug(
                                "Unable to find final logArchive tarball for %i"
                                % x['jobID'])
                            logging.debug(str(ex))

                    if key in self.histogramKeys:
                        # Usual histogram that was always done
                        histogram = MathAlgos.createHistogram(
                            numList=output[key],
                            nBins=self.histogramBins,
                            limit=self.histogramLimit)
                        final[stepName][key]['histogram'] = histogram
                        # Histogram only picking values from failed jobs
                        # Operators  can use it to find out quicker why a workflow/task/step is failing :
                        if len(failedJobs) > 0:
                            failedJobsHistogram = MathAlgos.createHistogram(
                                numList=outputFailed[key],
                                nBins=self.histogramBins,
                                limit=self.histogramLimit)

                            final[stepName][key][
                                'errorsHistogram'] = failedJobsHistogram
                    else:
                        average, stdDev = MathAlgos.getAverageStdDev(
                            numList=output[key])
                        final[stepName][key]['average'] = average
                        final[stepName][key]['stdDev'] = stdDev

                    final[stepName][key]['worstOffenders'] = [{
                        'jobID':
                        x['jobID'],
                        'value':
                        x.get(key, 0.0),
                        'log':
                        x.get('logArchive', None),
                        'logCollect':
                        x.get('logCollect', None)
                    } for x in offenders]
示例#12
0
    def handleCouchPerformance(self, workflowName):
        """
        _handleCouchPerformance_

        The couch performance stuff is convoluted enough I think I want to handle it separately.
        """
        perf = self.fwjrdatabase.loadView("FWJRDump", "performanceByWorkflowName",
                                          options = {"startkey": [workflowName],
                                                     "endkey": [workflowName]})['rows']

        taskList   = {}
        finalTask  = {}

        for row in perf:
            taskName = row['value']['taskName']
            stepName = row['value']['stepName']
            if not taskName in taskList.keys():
                taskList[taskName] = {}
            if not stepName in taskList[taskName].keys():
                taskList[taskName][stepName] = []
            value = row['value']
            taskList[taskName][stepName].append(value)

        for taskName in taskList.keys():
            final = {}
            for stepName in taskList[taskName].keys():
                output = {'jobTime': []}
                final[stepName] = {}
                masterList = []

                # For each step put the data into a dictionary called output
                # keyed by the name of the value
                for row in taskList[taskName][stepName]:
                    masterList.append(row)
                    for key in row.keys():
                        if key in ['startTime', 'stopTime', 'taskName', 'stepName', 'jobID']:
                            continue
                        if not key in output.keys():
                            output[key] = []
                        try:
                            output[key].append(float(row[key]))
                        except TypeError:
                            # Why do we get None values here?
                            # We may want to look into it
                            logging.debug("Got a None performance value for key %s" % key)
                            if row[key] == None:
                                output[key].append(0.0)
                            else:
                                raise
                    try:
                        jobTime = row.get('stopTime', None) - row.get('startTime', None)
                        output['jobTime'].append(jobTime)
                        row['jobTime'] = jobTime
                    except TypeError:
                        # One of those didn't have a real value
                        pass

                # Now that we've sorted the data, we process it one key at a time
                for key in output.keys():
                    final[stepName][key] = {}
                    # Assemble the 'worstOffenders'
                    # These are the top [self.nOffenders] in that particular category
                    # i.e., those with the highest values
                    offenders = MathAlgos.getLargestValues(dictList = masterList, key = key,
                                                           n = self.nOffenders)
                    for x in offenders:
                        try:
                            logArchive = self.fwjrdatabase.loadView("FWJRDump", "logArchivesByJobID",
                                                                    options = {"startkey": [x['jobID']],
                                                                               "endkey": [x['jobID'],
                                                                                          x['retry_count']]})['rows'][0]['value']['lfn']
                            logCollectID = self.jobsdatabase.loadView("JobDump", "jobsByInputLFN",
                                                                      options = {"startkey": [workflowName, logArchive],
                                                                                 "endkey": [workflowName, logArchive]})['rows'][0]['value']
                            logCollect = self.fwjrdatabase.loadView("FWJRDump", "outputByJobID",
                                                                    options = {"startkey": logCollectID,
                                                                               "endkey": logCollectID})['rows'][0]['value']['lfn']
                            x['logArchive'] = logArchive.split('/')[-1]
                            x['logCollect'] = logCollect
                        except IndexError, ex:
                            logging.debug("Unable to find final logArchive tarball for %i" % x['jobID'])
                            logging.debug(str(ex))
                        except KeyError, ex:
                            logging.debug("Unable to find final logArchive tarball for %i" % x['jobID'])
                            logging.debug(str(ex))


                    if key in self.histogramKeys:
                        histogram = MathAlgos.createHistogram(numList = output[key],
                                                              nBins = self.histogramBins,
                                                              limit = self.histogramLimit)
                        final[stepName][key]['histogram'] = histogram
                    else:
                        average, stdDev = MathAlgos.getAverageStdDev(numList = output[key])
                        final[stepName][key]['average'] = average
                        final[stepName][key]['stdDev']  = stdDev

                    final[stepName][key]['worstOffenders'] = [{'jobID': x['jobID'], 'value': x.get(key, 0.0),
                                                               'log': x.get('logArchive', None),
                                                               'logCollect': x.get('logCollect', None)} for x in offenders]
示例#13
0
    def handleCouchPerformance(self, workflowName):
        """
        _handleCouchPerformance_

        The couch performance stuff is convoluted enough I think I want to handle it separately.
        """
        perf = self.fwjrdatabase.loadView(
            "FWJRDump", "performanceByWorkflowName", options={"startkey": [workflowName], "endkey": [workflowName]}
        )["rows"]

        failedJobs = self.getFailedJobs(workflowName)

        taskList = {}
        finalTask = {}

        for row in perf:
            taskName = row["value"]["taskName"]
            stepName = row["value"]["stepName"]
            if not taskName in taskList.keys():
                taskList[taskName] = {}
            if not stepName in taskList[taskName].keys():
                taskList[taskName][stepName] = []
            value = row["value"]
            taskList[taskName][stepName].append(value)

        for taskName in taskList.keys():
            final = {}
            for stepName in taskList[taskName].keys():
                output = {"jobTime": []}
                outputFailed = {"jobTime": []}  # This will be same, but only for failed jobs
                final[stepName] = {}
                masterList = []

                # For each step put the data into a dictionary called output
                # keyed by the name of the value
                for row in taskList[taskName][stepName]:
                    masterList.append(row)
                    for key in row.keys():
                        if key in ["startTime", "stopTime", "taskName", "stepName", "jobID"]:
                            continue
                        if not key in output.keys():
                            output[key] = []
                            if len(failedJobs) > 0:
                                outputFailed[key] = []
                        try:
                            output[key].append(float(row[key]))
                            if row["jobID"] in failedJobs:
                                outputFailed[key].append(float(row[key]))

                        except TypeError:
                            # Why do we get None values here?
                            # We may want to look into it
                            logging.debug("Got a None performance value for key %s" % key)
                            if row[key] == None:
                                output[key].append(0.0)
                            else:
                                raise
                    try:
                        jobTime = row.get("stopTime", None) - row.get("startTime", None)
                        output["jobTime"].append(jobTime)
                        row["jobTime"] = jobTime
                        # Account job running time here only if the job has failed
                        if row["jobID"] in failedJobs:
                            outputFailed["jobTime"].append(jobTime)
                    except TypeError:
                        # One of those didn't have a real value
                        pass

                # Now that we've sorted the data, we process it one key at a time
                for key in output.keys():
                    final[stepName][key] = {}
                    # Assemble the 'worstOffenders'
                    # These are the top [self.nOffenders] in that particular category
                    # i.e., those with the highest values
                    offenders = MathAlgos.getLargestValues(dictList=masterList, key=key, n=self.nOffenders)
                    for x in offenders:
                        try:
                            logArchive = self.fwjrdatabase.loadView(
                                "FWJRDump",
                                "logArchivesByJobID",
                                options={"startkey": [x["jobID"]], "endkey": [x["jobID"], x["retry_count"]]},
                            )["rows"][0]["value"]["lfn"]
                            logCollectID = self.jobsdatabase.loadView(
                                "JobDump",
                                "jobsByInputLFN",
                                options={"startkey": [workflowName, logArchive], "endkey": [workflowName, logArchive]},
                            )["rows"][0]["value"]
                            logCollect = self.fwjrdatabase.loadView(
                                "FWJRDump", "outputByJobID", options={"startkey": logCollectID, "endkey": logCollectID}
                            )["rows"][0]["value"]["lfn"]
                            x["logArchive"] = logArchive.split("/")[-1]
                            x["logCollect"] = logCollect
                        except IndexError, ex:
                            logging.debug("Unable to find final logArchive tarball for %i" % x["jobID"])
                            logging.debug(str(ex))
                        except KeyError, ex:
                            logging.debug("Unable to find final logArchive tarball for %i" % x["jobID"])
                            logging.debug(str(ex))

                    if key in self.histogramKeys:
                        # Usual histogram that was always done
                        histogram = MathAlgos.createHistogram(
                            numList=output[key], nBins=self.histogramBins, limit=self.histogramLimit
                        )
                        final[stepName][key]["histogram"] = histogram
                        # Histogram only picking values from failed jobs
                        # Operators  can use it to find out quicker why a workflow/task/step is failing :
                        if len(failedJobs) > 0:
                            failedJobsHistogram = MathAlgos.createHistogram(
                                numList=outputFailed[key], nBins=self.histogramBins, limit=self.histogramLimit
                            )

                            final[stepName][key]["errorsHistogram"] = failedJobsHistogram
                    else:
                        average, stdDev = MathAlgos.getAverageStdDev(numList=output[key])
                        final[stepName][key]["average"] = average
                        final[stepName][key]["stdDev"] = stdDev

                    final[stepName][key]["worstOffenders"] = [
                        {
                            "jobID": x["jobID"],
                            "value": x.get(key, 0.0),
                            "log": x.get("logArchive", None),
                            "logCollect": x.get("logCollect", None),
                        }
                        for x in offenders
                    ]