def setup(self, numTests=0, cycles=1, xargs=None, threads=0, testoutdir=u'', runner=None, **kwargs): super(GitHubActionsCIWriter, self).setup(numTests=numTests, cycles=cycles, xargs=xargs, threads=threads, testoutdir=testoutdir, runner=runner, **kwargs) self.__outputs = set() self.remainingAnnotations = int(self.maxAnnotations)-2 # one is used up for the non-zero exit status and one is used for the summary if str(self.failureTestLogAnnotations).lower()!='true': self.remainingAnnotations = 0 self.failureTestLogAnnotations = [] self.runner = runner self.runid = os.path.basename(testoutdir) if runner.printLogs is None: # if setting was not overridden by user, default for CI is # to only print failures since otherwise the output is too long # and hard to find the logs of interest runner.printLogs = PrintLogs.FAILURES self.outputGitHubCommand(u'group', u'Logs for test run: %s' % self.runid) # enable coloring automatically, since this CI provider supports it runner.project.formatters.stdout.color = True # in this provider, colors render more clearly with bright=False ColorLogFormatter.configureANSIEscapeCodes(bright=False) self.artifacts = {} # map of category:[paths]
def setup(self, numTests=0, cycles=1, xargs=None, threads=0, testoutdir=u'', runner=None, **kwargs): self.runid = os.path.basename(testoutdir) if runner.printLogs is None: # if setting was not overridden by user, default for Travis is # to only print failures since otherwise the output is too long # and hard to find the logs of interest runner.printLogs = PrintLogs.FAILURES stdoutPrint(u'travis_fold:start:PySys-%s'%self.runid.replace(' ', '-')) # enable coloring automatically, since this CI provider supports it, # but must explicitly disable bright colors since it doesn't yet support that runner.project.formatters.stdout.color = True ColorLogFormatter.configureANSIEscapeCodes(bright=False)
def logForOutcome(decider): for cycle in self.results: cyclestr = '' if len(self.results) > 1: cyclestr = '[CYCLE %d] ' % (cycle + 1) for outcome in OUTCOMES: if not decider(outcome): continue # sort similar outcomes together to make the results easier to read; by reason then testDir self.results[cycle][outcome].sort( key=lambda test: [test[1], test[3]]) for (id, reason, testTitle, testDir, outputdir) in self.results[cycle][outcome]: log(" %s%s: %s ", cyclestr, outcome, id, extra=ColorLogFormatter.tag(str(outcome).lower())) if showTestTitle and testTitle: log(" (title: %s)", testTitle, extra=ColorLogFormatter.tag(LOG_DEBUG)) if showOutcomeReason and reason: log(" %s", reason, extra=ColorLogFormatter.tag(LOG_TEST_OUTCOMES)) try: outputdir = os.path.normpath( os.path.relpath( fromLongPathSafe(outputdir))) + os.sep testDir = os.path.normpath( os.path.relpath( fromLongPathSafe(testDir))) + os.sep except Exception as ex: # relpath can fail if on different Windows drives logging.getLogger('pysys.writer').debug( 'Failed to generate relative paths for "%s" and "%s": %s', outputdir, testDir, ex) if showTestDir and not (showOutputDir and outputdir.startswith(testDir)): # don't confuse things by showing the testDir unless its information is not present in the outputDir (due to --outdir) log(" %s", testDir) if showOutputDir: log(" %s", outputdir)
def processResult(self, testObj, cycle=-1, **kwargs): if self.numTests == 1: return log = logging.getLogger('pysys.resultsprogress') id = self.testToDisplay(testObj, cycle) self.inprogress.remove(id) outcome = testObj.getOutcome() self.outcomes[outcome] += 1 executed = sum(self.outcomes.values()) if outcome.isFailure(): m = '%s: %s' % (outcome, id) if testObj.getOutcomeReason(): m += ': ' + testObj.getOutcomeReason() self._recentFailureReasons.append(m) self._recentFailureReasons = self._recentFailureReasons[ -1 * self.recentFailures:] # keep last N # nb: no need to lock since this always executes on the main thread timediv = 1 if time.time() - self.startTime > 60: timediv = 60 log.info('Test progress: %s = %s of tests in %d %s', ('completed %d/%d' % (executed, self.numTests)), '%0.1f%%' % (100.0 * executed / self.numTests), int((time.time() - self.startTime) / timediv), 'seconds' if timediv == 1 else 'minutes', extra=ColorLogFormatter.tag(LOG_TEST_PROGRESS, [0, 1])) failednumber = sum( [self.outcomes[o] for o in OUTCOMES if o.isFailure()]) passed = ', '.join([ '%d %s' % (self.outcomes[o], o) for o in OUTCOMES if not o.isFailure() and self.outcomes[o] > 0 ]) failed = ', '.join([ '%d %s' % (self.outcomes[o], o) for o in OUTCOMES if o.isFailure() and self.outcomes[o] > 0 ]) if passed: log.info(' %s (%0.1f%%)', passed, 100.0 * (executed - failednumber) / executed, extra=ColorLogFormatter.tag(LOG_PASSES)) if failed: log.info(' %s', failed, extra=ColorLogFormatter.tag(LOG_FAILURES)) if self._recentFailureReasons: log.info('Recent failures: ', extra=ColorLogFormatter.tag(LOG_TEST_PROGRESS)) for f in self._recentFailureReasons: log.info(' ' + f, extra=ColorLogFormatter.tag(LOG_FAILURES)) inprogress = list(self.inprogress) if self.threads > 1 and inprogress: log.info('Currently executing: %s', ', '.join(sorted(inprogress)), extra=ColorLogFormatter.tag(LOG_TEST_PROGRESS)) log.info('')
def printNonPassesSummary(self, log): showOutcomeReason = str(self.showOutcomeReason).lower() == 'true' showOutputDir = str(self.showOutputDir).lower() == 'true' log.critical("Summary of non passes: ") fails = 0 for cycle in list(self.results.keys()): for outcome in list(self.results[cycle].keys()): if outcome in FAILS : fails = fails + len(self.results[cycle][outcome]) if fails == 0: log.critical(" THERE WERE NO NON PASSES", extra=ColorLogFormatter.tag(LOG_PASSES)) else: for cycle in list(self.results.keys()): cyclestr = '' if len(self.results) > 1: cyclestr = '[CYCLE %d] '%(cycle+1) for outcome in FAILS: for (id, reason, outputdir) in self.results[cycle][outcome]: log.critical(" %s%s: %s ", cyclestr, LOOKUP[outcome], id, extra=ColorLogFormatter.tag(LOOKUP[outcome].lower())) if showOutputDir: log.critical(" %s", os.path.normpath(os.path.relpath(outputdir))) if showOutcomeReason and reason: log.critical(" %s", reason, extra=ColorLogFormatter.tag(LOG_TEST_OUTCOMES))
def logForOutcome(decider): for cycle in self.results: cyclestr = '' if len(self.results) > 1: cyclestr = '[CYCLE %d] ' % (cycle + 1) for outcome in OUTCOMES: if not decider(outcome): continue # sort similar outcomes together to make the results easier to read; by reason then testDir self.results[cycle][outcome].sort( key=lambda test: [test[1], test[3]]) for (id, reason, testTitle, testDir, outputdir) in self.results[cycle][outcome]: log(" %s%s: %s ", cyclestr, outcome, id, extra=ColorLogFormatter.tag(str(outcome).lower())) if showTestTitle and testTitle: log(" (title: %s)", testTitle, extra=ColorLogFormatter.tag(LOG_DEBUG)) if showOutcomeReason and reason: log(" %s", reason, extra=ColorLogFormatter.tag(LOG_TEST_OUTCOMES)) outputdir = os.path.normpath( os.path.relpath(outputdir)) + os.sep testDir = os.path.normpath( os.path.relpath(testDir)) + os.sep if showTestDir and not (showOutputDir and outputdir.startswith(testDir)): # don't confuse things by showing the testDir unless its information is not present in the outputDir (due to --outdir) log(" %s", testDir) if showOutputDir: log(" %s", outputdir)
def __init__(self, root, projectFile, outdir=None): assert projectFile self.root = root if not outdir: outdir = DEFAULT_OUTDIR if not os.path.exists(os.path.join(root, projectFile)): raise UserError("Project file not found: %s" % os.path.normpath(os.path.join(root, projectFile))) try: parser = _XMLProjectParser(root, projectFile, outdir=outdir) except UserError: raise except Exception as e: raise Exception( "Error parsing project file \"%s\": %s" % (os.path.join(root, projectFile), sys.exc_info()[1])) parser.checkVersions() self.projectFile = os.path.join(root, projectFile) self.startTimestamp = parser.startTimestamp # get the properties properties = parser.getProperties() keys = list(properties.keys()) keys.sort() for key in keys: if not hasattr( self, key ): # don't overwrite existing props; people will have to use .getProperty() to access them setattr(self, key, properties[key]) self.properties = dict(properties) # add to the python path parser.addToPath() # get the runner if specified self.runnerClassname = parser.getRunnerDetails() # get the maker if specified self.makerClassname = parser.getMakerDetails() self.writers = parser.getWriterDetails() self.testPlugins = parser.getTestPlugins() self.runnerPlugins = parser.getRunnerPlugins() self._descriptorLoaderPlugins = parser.getDescriptorLoaderPlugins() self.perfReporterConfig = parser.getPerformanceReporterDetails() self.descriptorLoaderClass = parser.getDescriptorLoaderClass() # get the stdout and runlog formatters stdoutformatter, runlogformatter = parser.createFormatters() self.defaultFileEncodings = parser.getDefaultFileEncodings() self.executionOrderHints, self.executionOrderSecondaryModesHintDelta = parser.getExecutionOrderHints( ) self.collectTestOutput = parser.getCollectTestOutputDetails() self.projectHelp = parser.getProjectHelp() self.projectHelp = parser.expandProperties(self.projectHelp, default=None, name='project-help') # set the data attributes parser.unlink() if not stdoutformatter: stdoutformatter = ColorLogFormatter({'__formatterName': 'stdout'}) if not runlogformatter: runlogformatter = BaseLogFormatter({'__formatterName': 'runlog'}) PySysFormatters = collections.namedtuple('PySysFormatters', ['stdout', 'runlog']) self.formatters = PySysFormatters(stdoutformatter, runlogformatter) # for safety (test independence, and thread-safety), make it hard for people to accidentally edit project properties later self.properties = makeReadOnlyDict(self.properties) self.__frozen = True
def logComparisons(self, comparisonFiles, sortby=None, printmethod=stdoutPrint): # comparisonFiles are a list of PerformanceRunData instances # NB: this method implementation mutates the passed in comparisonFiles sortby = sortby or os.getenv('PYSYS_PERFORMANCE_SORT_BY', 'resultKey') files = comparisonFiles out = printmethod # we may not have the project available here if running this standalone, but can still reuse the same # logic for deciding if coloring is enabled colorFormatter = ColorLogFormatter({}) def addColor(category, s): if not category: return s return colorFormatter.formatArg(category, s) # usually resultKey is itself the unique key, but for comparisons we also # want to include units/biggerIsBetter since if these change the # comparison would be invalidated ComparisonKey = collections.namedtuple( 'ComparisonKey', ['resultKey', 'unit', 'biggerIsBetter']) # iterate over each comparison item, stashing various aggregated information we need later, and printing summary info for p in files: p.keyedResults = { ComparisonKey(resultKey=r['resultKey'], unit=r['unit'], biggerIsBetter=r['biggerIsBetter']): r for r in p.results } commonRunDetails = {} for k in list(files[-1].runDetails.keys()): if all([ k in p.runDetails and p.runDetails[k] == files[-1].runDetails[k] for p in files ]): commonRunDetails[k] = files[-1].runDetails[k] def formatRunDetails(k, val): valsplit = val.split(';') if k == 'startTime' and len(valsplit) >= 3: val = ' .. '.join([valsplit[0].strip(), valsplit[-1].strip()]) else: val = val.replace('; ', ';') return '%s=%s' % (k, addColor(LOG_TEST_DETAILS, val)) #out('Comparing performance files with these common run details: %s'% ', '.join([formatRunDetails(k, commonRunDetails[k]) for k in commonRunDetails])) # Assign a comparison label to each; use outDirName if unique, else make something based on the filename if len(set(p.runDetails.get('outDirName') for p in files)) == len(files): for p in files: p.comparisonLabel = p.runDetails.get('outDirName', p.name) else: for p in files: p.comparisonLabel = os.path.splitext(os.path.basename( p.name))[0].replace('perf_', '') out('Performance comparison summary between the following runs (the differing "run details" are shown):' ) for p in files: out('- %s (%d result keys, %d samples/result)%s:' % ( addColor(LOG_TEST_DETAILS, p.comparisonLabel), len(p.results), float(sum([r['samples'] for r in p.results])) / len(p.results), '' if p.comparisonLabel != p.runDetails.get('outDirName') else f' from {p.name if not os.path.abspath(p.name) else os.path.relpath(p.name)}' )) out(' %s' % ', '.join([ formatRunDetails(k, p.runDetails[k]) for k in p.runDetails if k not in commonRunDetails ])) out('') out('Comparison results:') out(' where fromRun->toRun format is: (%improvement) = (speedup ratio) = (sigmas/stdDevs where change above +/- 2.0 gives 95% probability it\'s significant; only significant results are colored)' ) out('') ComparisonData = collections.namedtuple( 'ComparisonData', [ 'comparisonPercent', #%improvement 'comparisonSigmas', #improvements as a multiple of stddev 'ratio', # speedup ratio of from->to, how much faster we are now 'rfrom', # the "from" result value 'rto', # the "to" result value 'relativeStdDevTo', # relative stdDev as a % of the "to" value, or None if only one sample (or value is 0) 'toleranceStdDevs', # The configured tolerance specified in this test, if any ]) # now compute comparison info, comparing each path to the final one comparisons = { } # ComparisonKey:[ComparisonInfo or string if not available, ...] comparetoresults = files[-1].keyedResults comparisonheaders = [] for p in files[:-1]: if len(files) > 2: comparisonheaders.append( '%s->%s' % (p.comparisonLabel, files[-1].comparisonLabel)) keyedResults = p.keyedResults for k in comparetoresults: c = comparisons.setdefault(k, []) if k not in keyedResults: c.append('Compare from value is missing') else: rfrom = keyedResults[k] rto = comparetoresults[k] # avoid div by zero errors; results are nonsensical anyway if either is zero if rfrom['value'] == 0: c.append('Compare from value is zero') continue if rto['value'] == 0: c.append('Compare to value is zero') continue # frequently at least one of these will have only one sample so # not much point doing a statistically accurate stddev estimate; so just # take whichever is bigger (which neatly ignores any that are zero due # to only one sample) relStdDev = max(abs(rfrom['stdDev'] / rfrom['value']), abs(rto['stdDev'] / rto['value'])) # how many times faster are we now ratio = rto['value'] / rfrom[ 'value'] # initially, we ignore biggerIsBetter in the ratio calculation... # use a + or - sign to indicate improvement vs regression in the % (rather than a reciprocal which is harder to understand) sign = 1.0 if k.biggerIsBetter else -1.0 comparisonPercent = 100.0 * (ratio - 1) * sign # assuming a normal distribution, 1.0 or more gives 68% confidence of # a real difference, and 2.0 or more gives 95%. comparisonSigmas = sign * ( (ratio - 1) / relStdDev) if relStdDev else None # = (new-old)/stddev # but +/- isn't relevant when displaying the ratio; we want ratios >1 to always be a good thing, so use reciprocal here if not k.biggerIsBetter: ratio = 1.0 / ratio c.append( ComparisonData( comparisonPercent=comparisonPercent, comparisonSigmas=comparisonSigmas, ratio=ratio, rfrom=rfrom['value'], rto=rto['value'], relativeStdDevTo=100.0 * rto['stdDev'] / rto['value'] if rto['samples'] > 0 and rto['value'] != 0 else None, toleranceStdDevs=files[-1].keyedResults[k].get( 'toleranceStdDevs', 0.0), )) if comparisonheaders: headerpad = max([len(h) for h in comparisonheaders]) comparisonheaders = [ ('%' + str(headerpad + 1) + 's') % (h + ':') for h in ([files[-1].comparisonLabel] + comparisonheaders) ] def getComparisonKey(k): if sortby == 'resultKey': return k if sortby == 'testId': return (allresultinfo[k]['testId'], k) # sort with regressions at the bottom, so they're more prominent if sortby == 'comparison%': return [(-1 * c.comparisonPercent) if hasattr( c, 'comparisonPercent') else -10000000.0 for c in comparisons[k]] + [k] raise Exception( f'Invalid sortby key "{sortby}"; valid values are: resultKey, testId, comparison%' ) sortedkeys = sorted(comparisons.keys(), key=getComparisonKey) for k in sortedkeys: out('%s from %s' % (colorFormatter.formatArg(LOG_TEST_PERFORMANCE, k.resultKey), files[-1].keyedResults[k]['testId'])) r = files[-1].keyedResults[k] out( ' ' + f"Mean from this run = {colorFormatter.formatArg(LOG_TEST_PERFORMANCE, self.valueToDisplayString(r['value']))} {r['unit']}" + ('' if r['samples'] == 1 or ['value'] == 0 else f" with stdDev={self.valueToDisplayString(r['stdDev'])} ({100.0*r['stdDev']/r['value']:0.1f}% of mean)" ) + ('' if not r.get('toleranceStdDevs') else f"; configured toleranceStdDevs={self.valueToDisplayString(r['toleranceStdDevs'])}" ), ) i = 0 for c in comparisons[k]: i += 1 prefix = ('%s ' % comparisonheaders[i]) if comparisonheaders else '' if not hasattr(c, 'comparisonPercent'): # strings for error messages out(' ' + prefix + c) continue out(' ' + prefix + self.formatResultComparison(c, addColor=addColor)) out('')
def logSummary(self, log, showDuration=None, showOutcomeStats=None, showTestIdList=None, showFailureSummary=True, showRunDetails=None, **kwargs): """ Writes a textual summary using the specified log function, with colored output if enabled. The keyword arguments can be used to disable sections of the output (overriding the settings) if needed by the caller. :param Callable[format,args,kwargs=] log: The function to call for each line of the summary (e.g. log.critical). The message is obtained with ``format % args``, and color information is available from the ``extra=`` keyword argument. """ assert not kwargs, kwargs.keys() if showDuration is None: showDuration = self.showDuration and self.numTests > 1 if showOutcomeStats is None: showOutcomeStats = self.showOutcomeStats and self.numTests > 1 if showTestIdList is None: showTestIdList = self.showTestIdList and self.numTests > 1 if showRunDetails is None: showRunDetails = self.showRunDetails and self.numTests > 1 # details from showFailureSummary: showOutcomeReason = self.showOutcomeReason showOutputDir = self.showOutputDir showTestDir = self.showTestDir showTestTitle = self.showTestTitle showInspectSummary = self.showInspectSummary showNotVerifiedSummary = self.showNotVerifiedSummary if showDuration: log("Completed test run at: %s", time.strftime('%A %Y-%m-%d %H:%M:%S %Z', time.localtime(time.time())), extra=ColorLogFormatter.tag(LOG_DEBUG, 0)) if self.threads > 1: log("Total test duration (absolute): %s", '%.2f secs' % (time.time() - self.startTime), extra=ColorLogFormatter.tag(LOG_DEBUG, 0)) log("Total test duration (additive): %s", '%.2f secs' % self.duration, extra=ColorLogFormatter.tag(LOG_DEBUG, 0)) else: log("Total test duration: %s", "%.2f secs" % (time.time() - self.startTime), extra=ColorLogFormatter.tag(LOG_DEBUG, 0)) log('') if showRunDetails: log("Run details:") for k, v in self.runner.runDetails.items(): log(" %23s%s", k + ': ', v, extra=ColorLogFormatter.tag(LOG_TEST_DETAILS, 1)) log("") if showOutcomeStats: executed = sum(self.outcomes.values()) failednumber = sum( [self.outcomes[o] for o in OUTCOMES if o.isFailure()]) passed = ', '.join([ '%d %s' % (self.outcomes[o], o) for o in OUTCOMES if not o.isFailure() and self.outcomes[o] > 0 ]) failed = ', '.join([ '%d %s' % (self.outcomes[o], o) for o in OUTCOMES if o.isFailure() and self.outcomes[o] > 0 ]) if failed: log('Failure outcomes: %s (%0.1f%%)', failed, 100.0 * (failednumber) / executed, extra=ColorLogFormatter.tag(str(FAILED).lower(), [0])) if passed: log('Success outcomes: %s', passed, extra=ColorLogFormatter.tag(str(PASSED).lower(), [0])) log('') def logForOutcome(decider): for cycle in self.results: cyclestr = '' if len(self.results) > 1: cyclestr = '[CYCLE %d] ' % (cycle + 1) for outcome in OUTCOMES: if not decider(outcome): continue # sort similar outcomes together to make the results easier to read; by reason then testDir self.results[cycle][outcome].sort( key=lambda test: [test[1], test[3]]) for (id, reason, testTitle, testDir, outputdir) in self.results[cycle][outcome]: log(" %s%s: %s ", cyclestr, outcome, id, extra=ColorLogFormatter.tag(str(outcome).lower())) if showTestTitle and testTitle: log(" (title: %s)", testTitle, extra=ColorLogFormatter.tag(LOG_DEBUG)) if showOutcomeReason and reason: log(" %s", reason, extra=ColorLogFormatter.tag(LOG_TEST_OUTCOMES)) outputdir = os.path.normpath( os.path.relpath(outputdir)) + os.sep testDir = os.path.normpath( os.path.relpath(testDir)) + os.sep if showTestDir and not (showOutputDir and outputdir.startswith(testDir)): # don't confuse things by showing the testDir unless its information is not present in the outputDir (due to --outdir) log(" %s", testDir) if showOutputDir: log(" %s", outputdir) if showNotVerifiedSummary and self.outcomes[NOTVERIFIED] > 0: log("Summary of not verified outcomes:") logForOutcome(lambda outcome: outcome == NOTVERIFIED) log('') if showInspectSummary and self.outcomes[INSPECT] > 0: log("Summary of inspect outcomes: ") logForOutcome(lambda outcome: outcome == INSPECT) log('') if showFailureSummary: log("Summary of failures: ") fails = 0 for cycle in self.results: for outcome, tests in self.results[cycle].items(): if outcome.isFailure(): fails = fails + len(tests) if fails == 0: log(" THERE WERE NO FAILURES", extra=ColorLogFormatter.tag(LOG_PASSES)) else: logForOutcome(lambda outcome: outcome.isFailure()) log('') if showTestIdList: failedids = [] failedidsAlreadyDone = set() for cycle in self.results: for outcome in OUTCOMES: if not outcome.isFailure(): continue for (id, reason, testTitle, testDir, outputdir) in self.results[cycle][outcome]: if id in failedidsAlreadyDone: continue failedidsAlreadyDone.add(id) failedids.append(id) if len(failedids) > 1: # display just the ids, in a way that's easy to copy and paste into a command line; # for maximum usability, use the sort order given above failedids = failedids if len( failedids ) > 100: # this feature is only useful for small test runs failedids = failedids[:100] + ['...'] log('List of failed test ids:') log('%s', ' '.join(failedids))
def __init__(self, root, projectFile): self.root = root self.runnerClassname, self.runnerModule = DEFAULT_RUNNER self.makerClassname, self.makerModule = DEFAULT_MAKER self.writers = [DEFAULT_WRITER] self._createPerformanceReporters = lambda outdir: [] stdoutformatter, runlogformatter = None, None self.projectFile = None if projectFile is not None and os.path.exists( os.path.join(root, projectFile)): from pysys.xml.project import XMLProjectParser try: parser = XMLProjectParser(root, projectFile) except Exception as e: raise Exception( "Error parsing project file \"%s\": %s" % (os.path.join(root, projectFile), sys.exc_info()[1])) else: parser.checkVersions() self.projectFile = os.path.join(root, projectFile) # get the properties properties = parser.getProperties() keys = list(properties.keys()) keys.sort() for key in keys: setattr(self, key, properties[key]) # add to the python path parser.addToPath() # get the runner if specified self.runnerClassname, self.runnerModule = parser.getRunnerDetails( ) # get the maker if specified self.makerClassname, self.makerModule = parser.getMakerDetails( ) # get the loggers to use self.writers = parser.getWriterDetails() perfReporterDetails = parser.getPerformanceReporterDetails() self._createPerformanceReporters = lambda testoutdir: [ perfReporterDetails[0] (self, perfReporterDetails[1], testoutdir) ] # get the stdout and runlog formatters stdoutformatter, runlogformatter = parser.createFormatters() # set the data attributes parser.unlink() if not stdoutformatter: stdoutformatter = ColorLogFormatter({}) if not runlogformatter: runlogformatter = BaseLogFormatter({}) PySysFormatters = collections.namedtuple('PySysFormatters', ['stdout', 'runlog']) self.formatters = PySysFormatters(stdoutformatter, runlogformatter)
def comparePerformanceFiles(compareList, format='text', sortby='comparison%'): """ Compare one or more baseline files against the latest. @param compareList: a list, where each is either a CVSPerformanceFile object (with its label attribute optionally set) or a path specifier string (a file or directory of CVS files, with glob and optional @latest filter). Labels can be specified as LABEL=specifier. """ assert format == 'text' # will support CVS soon # we don't have the project available here, but can still reuse the same # logic for deciding if coloring is enabled colorFormatter = ColorLogFormatter({ 'color:' + LOG_PERF_BETTER: 'GREEN', 'color:' + LOG_PERF_WORSE: 'RED', }) i = -1 files = [ ] # list of CSVPerformanceFile instance which we'll add extra data to for p in compareList: i += 1 if isinstance(p, CSVPerformanceFile): p.numberOfFiles = getattr(p, 'numberOfFiles', 1) p.index = i files.append(p) elif isstring(p): expanded = expandPathSpecifier(p) p = CSVPerformanceFile.aggregate(expanded['files']) p.index = i p.label = expanded['label'] p.numberOfFiles = len(expanded['files']) files.append(p) else: assert False, 'unknown type: %s: %s' % (type(p), p) def addColor(category, s): if not category: return s return colorFormatter.formatArg(category, s) output = [''] try: def out(s): output[0] += '%s\n' % s # usually resultKey is itself the unique key, but for comparisons we also # want to include units/biggerIsBetter since if these change the # comparison would be invalidated ComparisonKey = collections.namedtuple( 'ComparisonKey', ['resultKey', 'unit', 'biggerIsBetter']) # iterate over each comparison item, stashing various aggregated information we need later, and printing summary info for p in files: p.keyedResults = { ComparisonKey(resultKey=r['resultKey'], unit=r['unit'], biggerIsBetter=r['biggerIsBetter']): r for r in p.results } # end iteration over paths commonRunDetails = collections.OrderedDict() for k in list(files[-1].runDetails.keys()): if all([ k in p.runDetails and p.runDetails[k] == files[-1].runDetails[k] for p in files ]): commonRunDetails[k] = files[-1].runDetails[k] def formatRunDetails(k, val): valsplit = val.split(';') if k == 'time' and len(valsplit) > 3: val = ' .. '.join([valsplit[0].strip(), valsplit[-1].strip()]) else: val = val.replace('; ', ';') return '%s=%s' % (k, addColor(LOG_TEST_DETAILS, val)) out('Common run details: %s' % ', '.join([ formatRunDetails(k, commonRunDetails[k]) for k in commonRunDetails ])) out('Comparing: ') for p in files: out('- %s (%d results, %d samples/result, from %d files):' % ( addColor(LOG_TEST_DETAILS, p.label or '#%d' % (p.index + 1)), len(p.results), float(sum([r['samples'] for r in p.results])) / len(p.results), p.numberOfFiles, )) out(' %s' % ', '.join([ formatRunDetails(k, p.runDetails[k]) for k in p.runDetails if k not in commonRunDetails ])) out('') ComparisonData = collections.namedtuple( 'ComparisonData', ['comparisonPercent', 'comparisonSigmas', 'ratio', 'rfrom', 'rto']) # now compute comparison info, comparing each path to the final one comparisons = { } # ComparisonKey:[ComparisonInfo or string if not available, ...] comparetoresults = files[-1].keyedResults comparisonheaders = [] for p in files[:-1]: if len(files) > 2: comparisonheaders.append('%s->%s' % (p.label, files[-1].label)) keyedResults = p.keyedResults for k in comparetoresults: c = comparisons.setdefault(k, []) if k not in keyedResults: c.append('Compare from value is missing') else: rfrom = keyedResults[k] rto = comparetoresults[k] # avoid div by zero errors; results are nonsensical anyway if either is zero if rfrom['value'] == 0: c.append('Compare from value is zero') continue if rto['value'] == 0: c.append('Compare to value is zero') continue # how many times faster are we now ratio = rto['value'] / rfrom['value'] # use a + or - sign to indicate improvement vs regression sign = 1.0 if k.biggerIsBetter else -1 # frequently at least one of these will have only one sample so # not much point doing a statistically accurate stddev estimate; so just # take whichever is bigger (which neatly ignores any that are zero due # to only one sample) stddevRatio = max(abs(rfrom['stdDev'] / rfrom['value']), abs(rto['stdDev'] / rto['value'])) comparisonPercent = 100.0 * (ratio - 1) * sign # assuming a normal distribution, 1.0 or more gives 68% confidence of # a real difference, and 2.0 or more gives 95% # this is effectively comparisonSigmas = sign * ( (ratio - 1) / stddevRatio) if stddevRatio else None c.append( ComparisonData(comparisonPercent=comparisonPercent, comparisonSigmas=comparisonSigmas, ratio=ratio, rfrom=rfrom['value'], rto=rto['value'])) if comparisonheaders: headerpad = max([len(h) for h in comparisonheaders]) comparisonheaders = [('%' + str(headerpad + 1) + 's') % (h + ':') for h in comparisonheaders] def getComparisonKey(k): if sortby == 'resultKey': return k if sortby == 'testId': return (allresultinfo[k]['testId'], k) # sort with regressions at the bottom, so they're more prominent if sortby == 'comparison%': return [(-1 * c.comparisonPercent) if hasattr( c, 'comparisonPercent') else -10000000.0 for c in comparisons[k]] + [k] assert False, sortby def addPlus(s): if not s.startswith('-'): return '+' + s return s def valueToDisplayString( value): # TODO: refactor this to avoid needing to copy """Pretty-print an integer or float value to a moderate number of significant figures. The method additionally adds a "," grouping for large numbers. @param value: the value to be displayed """ if float(value) > 1000.0: return '{0:,}'.format(int(value)) else: valtostr = '%0.4g' % value if 'e' in valtostr: valtostr = '%f' % value return valtostr sortedkeys = sorted(comparisons.keys(), key=getComparisonKey) for k in sortedkeys: out('%s from %s' % (colorFormatter.formatArg(LOG_TEST_PERFORMANCE, k.resultKey), files[-1].keyedResults[k]['testId'])) i = 0 for c in comparisons[k]: i += 1 prefix = ( '%s ' % comparisonheaders[i - 1]) if comparisonheaders else '' if not hasattr(c, 'comparisonPercent'): # strings for error messages out(' ' + prefix + c) continue if c.comparisonSigmas != None: significantresult = abs( c.comparisonSigmas) >= (files[-1].keyedResults[k].get( 'toleranceStdDevs', 0.0) or 2.0) else: significantresult = abs(c.comparisonPercent) >= 10 category = None if significantresult: category = LOG_PERF_BETTER if c.comparisonPercent > 0 else LOG_PERF_WORSE if c.comparisonSigmas is None: sigmas = '' else: sigmas = ' = %s sigmas' % addColor( category, addPlus('%0.1f' % c.comparisonSigmas)) out(' ' + prefix + '%s%s ( -> %s %s)' % (addColor( category, '%6s' % (addPlus('%0.1f' % c.comparisonPercent) + '%')), sigmas, valueToDisplayString(c.rto), files[-1].keyedResults[k]['unit'])) out('') return output[0] except Exception as e: _log.info('Got exception, performance comparison output so far: \n%s' % output[0]) raise
def logSummary(self, log, showDuration=None, showOutcomeStats=None, showOutcomeReason=None, showOutputDir=None, showTestIdList=None, **kwargs): """ Writes a textual summary using the specified log function, with colored output if enabled. :param Callable[format,args,kwargs=] log: The function to call for each line of the summary (e.g. log.critical). The message is obtained with ``format % args``, and color information is available from the ``extra=`` keyword argument. """ assert not kwargs, kwargs.keys() if showDuration is None: showDuration = str(self.showDuration).lower() == 'true' if showOutcomeStats is None: showOutcomeStats = str(self.showOutcomeStats).lower() == 'true' if showOutcomeReason is None: showOutcomeReason = str(self.showOutcomeReason).lower() == 'true' if showOutputDir is None: showOutputDir = str(self.showOutputDir).lower() == 'true' if showTestIdList is None: showTestIdList = str(self.showTestIdList).lower() == 'true' if showDuration: log( "Completed test run at: %s", time.strftime('%A %Y-%m-%d %H:%M:%S %Z', time.localtime(time.time())), extra=ColorLogFormatter.tag(LOG_DEBUG, 0)) if self.threads > 1: log("Total test duration (absolute): %s", '%.2f secs'%(time.time() - self.startTime), extra=ColorLogFormatter.tag(LOG_DEBUG, 0)) log("Total test duration (additive): %s", '%.2f secs'%self.duration, extra=ColorLogFormatter.tag(LOG_DEBUG, 0)) else: log("Total test duration: %s", "%.2f secs"%(time.time() - self.startTime), extra=ColorLogFormatter.tag(LOG_DEBUG, 0)) log('') if showOutcomeStats: executed = sum(self.outcomes.values()) failednumber = sum([self.outcomes[o] for o in FAILS]) passed = ', '.join(['%d %s'%(self.outcomes[o], LOOKUP[o]) for o in PRECEDENT if o not in FAILS and self.outcomes[o]>0]) failed = ', '.join(['%d %s'%(self.outcomes[o], LOOKUP[o]) for o in PRECEDENT if o in FAILS and self.outcomes[o]>0]) if failed: log('Failure outcomes: %s (%0.1f%%)', failed, 100.0 * (failednumber) / executed, extra=ColorLogFormatter.tag(LOOKUP[FAILED].lower(), [0,1])) if passed: log('Success outcomes: %s', passed, extra=ColorLogFormatter.tag(LOOKUP[PASSED].lower(), [0])) log('') log("Summary of failures: ") fails = 0 for cycle in self.results: for outcome, tests in self.results[cycle].items(): if outcome in FAILS : fails = fails + len(tests) if fails == 0: log(" THERE WERE NO FAILURES", extra=ColorLogFormatter.tag(LOG_PASSES)) else: failedids = set() for cycle in self.results: cyclestr = '' if len(self.results) > 1: cyclestr = '[CYCLE %d] '%(cycle+1) for outcome in FAILS: for (id, reason, outputdir) in self.results[cycle][outcome]: failedids.add(id) log(" %s%s: %s ", cyclestr, LOOKUP[outcome], id, extra=ColorLogFormatter.tag(LOOKUP[outcome].lower())) if showOutputDir: log(" %s", os.path.normpath(os.path.relpath(outputdir))+os.sep) if showOutcomeReason and reason: log(" %s", reason, extra=ColorLogFormatter.tag(LOG_TEST_OUTCOMES)) if showTestIdList and len(failedids) > 1: # display just the ids, in a way that's easy to copy and paste into a command line failedids = list(failedids) failedids.sort() if len(failedids) > 20: # this feature is only useful for small test runs failedids = failedids[:20]+['...'] log('') log('List of failed test ids:') log('%s', ' '.join(failedids))