def __init__(self,*args,**kwargs): """ Passes appropriate kwargs to Config, pays local attention to these keys: updateInterval: default: '0' format: 'dd:hh:mm:ss', leading parts optional. Must be >= 0 seconds. updateFunction: default: noop(). Takes self as argument. Behavior: Updates default values in argument reEvaluateFunction: default: noop(). Takes self as argument. Behavior: Mutates values in argument signalNumber: default: SIGALRM (14). If 0, then signals will not be handled. Instances that share the same signalNumber will all be update()-ed at every signal. self.internal.updateFunction may be set after construction if desired: Avoids double-work at construction self.internal.reEvalutateFunction may be set after construction if desired, but this is not recommended. """ skwargs = dict([(x,kwargs[x]) for x in socorro_config.getDefaultedConfigOptions().keys() if x in kwargs]) for i in range(len(args)): skwargs[socorro_config.namedConfigOptions[i]] = args[i] super(DynamicConfig,self).__init__(**skwargs) self.internal.updateFunction = kwargs.get('updateFunction',noop) self.internal.reEvaluateFunction = kwargs.get('reEvaluateFunction',noop) self.internal.signalNumber = kwargs.get('signalNumber',14) self.internal.nextUpdate = None updateInterval = kwargs.get('updateInterval','0:0:0:0') self.internal.updateDelta = socorro_config.timeDeltaConverter(updateInterval) if self.internal.updateDelta: if self.internal.updateDelta < datetime.timedelta(0): raise ValueError("updateInterval must be non-negative, but %s"%self.internal.updateDelta) self.internal.nextUpdate = utc_now() + self.internal.updateDelta # finally: make sure we are current if self.internal.signalNumber: priorSignal = signal.signal(self.internal.signalNumber,DynamicConfig.handleAlarm) self.doUpdate() DynamicConfig.instances[id(self)] = self
def getProcessingWindow(configContext,tableName, productVersionRestriction,cursor,logger, **kwargs): """ ProcessingWindow is a single time window over which to aggregate materialized view data. Returns (startWindow,deltaWindow,endWindow) using this heuristic: kwargs beats configContext which beats latest table row if two among startWindow, endWindow, deltaWindow in config or kwargs: they are used. if all three: assert startWindow + deltaWindow == endWindow Backward compatibility: if processingDay is present and windowXxx are not: startWindow = midnight of given day, deltaWindow = timedelta(days=1) else: try to read window_end and window_size from the given table if one is available from config/kwargs it beats the same (or calculated) one from the table On inconsistency or failure, logs the problem and aborts BEWARE: You can get inconsitency by having one item in config and the other two in kwargs: BEWARE """ config = {} config.update(configContext) config.update(kwargs) startWindow = config.get('startWindow') if type(startWindow) is str: startWindow = cm.dateTimeConverter(startWindow) deltaWindow = config.get('deltaWindow') if type(deltaWindow) is str: deltaWindow = cm.timeDeltaConverter(deltaWindow) endWindow = config.get('endWindow') if type(endWindow) is str: endWindow = cm.dateTimeConverter(endWindow) processingDay = config.get('processingDay') if type(processingDay) is str: processingDay = cm.dateTimeConverter(processingDay) try: if startWindow or deltaWindow or endWindow: if startWindow and endWindow and deltaWindow: assert startWindow + deltaWindow == endWindow,"inconsistent: %s + %s != %s"%(startWindow,deltaWindow,endWindow) elif startWindow and endWindow: deltaWindow = endWindow - startWindow elif startWindow and deltaWindow: endWindow = startWindow + deltaWindow elif deltaWindow and endWindow: startWindow = endWindow - deltaWindow else: assert not (startWindow or deltaWindow or endWindow), "insufficient: Need two of window ...Start: %s, ...Delta: %s, ...End:%s"%(startWindow,deltaWindow,endWindow) elif processingDay: dayt = datetime.datetime.fromtimestamp(time.mktime(processingDay.timetuple())) startWindow = dayt.replace(hour=0,minute=0,second=0,microsecond=0) assert startWindow == dayt,'processingDay must be some midnight, but was %s'%dayt deltaWindow = datetime.timedelta(days=1) endWindow = startWindow + deltaWindow else: # no params: try table startWindow,deltaWindow = getLastWindowAndSizeFromTable(cursor,tableName, productVersionRestriction,logger) if startWindow: endWindow = startWindow+deltaWindow return (startWindow,deltaWindow,endWindow) except: lib_util.reportExceptionAndAbort(logger)
jsonFileSuffix.doc = 'the suffix used to identify a json file' jsonFileSuffix.default = '.json' dumpFileSuffix = cm.Option() dumpFileSuffix.doc = 'the suffix used to identify a dump file' dumpFileSuffix.default = '.dump' checkForPriorityFrequency = cm.Option() checkForPriorityFrequency.doc = 'the time bewteen checks for priority jobs (HHH:MM:SS)' checkForPriorityFrequency.default = '0:01:00' checkForPriorityFrequency.fromStringConverter = cm.timeDeltaConverter processorCheckInTime = cm.Option() processorCheckInTime.doc = 'the time after which a processor is considered dead (HH:MM:SS)' processorCheckInTime.default = "00:05:00" processorCheckInTime.fromStringConverter = lambda x: str(cm.timeDeltaConverter(x)) processorCheckInFrequency = cm.Option() processorCheckInFrequency.doc = 'the frequency in seconds for the processor to check in with the monitor' processorCheckInFrequency.default = '0:05:00' processorCheckInFrequency.fromStringConverter = cm.timeDeltaConverter batchJobLimit = cm.Option() batchJobLimit.doc = 'the number of jobs to pull in a time' batchJobLimit.default = 10000 irrelevantSignatureRegEx = cm.Option() irrelevantSignatureRegEx.doc = 'a regular expression matching frame signatures that should be ignored when generating an overall signature' irrelevantSignatureRegEx.default = '@0x[01234567890abcdefABCDEF]{2,}' prefixSignatureRegEx = cm.Option()
def getProcessingDates(configContext, tableName, productVersionRestriction, cursor, logger, **kwargs): """ A processing interval is a time interval greater or equal to a processing window. Used to calculate a series of adjacent materialized view aggregates. Returns (startDate, deltaDate, endDate) using this heuristic: kwargs beats configContext if none are provided, calculates based on latest row of table, now() if only one is provided, logs the insufficiency and aborts if two among startDate, deltaDate, endDate: they are used Checks the table for most recent window_end if startDate < window_end: startDate = window_end logger.info(...that change...) if startDate >= endDate, or deltaDate <= 0, or three provided are inconsistent: logs the inconsistency and aborts """ config = {} config.update(configContext) config.update(kwargs) delta0 = datetime.timedelta(days=0) delay = config.get('processingDelay', datetime.timedelta(hours=2)) startDate = config.get('startDate') if startDate: startDate = "%s"%(startDate) startDate = cm.dateTimeConverter(startDate) deltaDate = config.get('deltaDate') if type(deltaDate) is str: deltaDate = cm.timeDeltaConverter(deltaDate) endDate = config.get('endDate') if endDate: endDate = "%s"%(endDate) endDate = cm.dateTimeConverter(endDate) initialDeltaDate = config.get('initialDeltaDate',config.get('deltaDate')) if not initialDeltaDate: initialDeltaDate = globalInitialDeltaDate defaultDeltaWindow = config.get('defaultDeltaWindow',config.get('deltaWindow')) if not defaultDeltaWindow: defaultDeltaWindow = globalDefaultDeltaWindow try: try: logger.debug('trying getDefaultDateInterval') startDateFromTable,endDateFromTable,latestWindowEnd = getDefaultDateInterval(cursor,tableName,delay,initialDeltaDate,defaultDeltaWindow,productVersionRestriction,logger) except Exception, x: print x if startDate and endDate and deltaDate: assert startDate + deltaDate == endDate,"inconsistent: %s + %s != %s"%(startDate,deltaDate,endDate) elif startDate and endDate: assert startDate < endDate, 'inconsistent: startDate %s >= endDate %s'%(startDate,endDate) deltaDate = endDate - startDate elif startDate and deltaDate: assert deltaDate > delta0, 'inconsistent: deltaDate %s <= 0'%(deltaDate) endDate = startDate + deltaDate elif deltaDate and endDate: assert deltaDate > delta0, 'inconsistent: deltaDate %s <= 0'%(deltaDate) startDate = endDate - deltaDate else: assert not (startDate or deltaDate or endDate), "insufficient: Need two xxxDate: start: %s, delta: %s, end:%s"%(startDate,deltaDate,endDate) startDate = startDateFromTable endDate = endDateFromTable deltaDate = endDate - startDate if latestWindowEnd and startDate < latestWindowEnd: logger.info("given/calculated startDate: %s < latest row in %s. Changing to %s",startDate,tableName,latestWindowEnd) startDate = latestWindowEnd deltaDate = endDate - startDate assert deltaDate > delta0, 'inconsistent (after check with db table %s): deltaDate %s <= 0'%(tableName,deltaDate) return (startDate,deltaDate,endDate)
#dumpDirPrefix.doc = 'dump directory names begin with this prefix' #dumpDirPrefix.default = 'tst_' #jsonFileSuffix = cm.Option() #jsonFileSuffix.doc = 'the suffix used to identify a json file' #jsonFileSuffix.default = '.json' #dumpFileSuffix = cm.Option() #dumpFileSuffix.doc = 'the suffix used to identify a dump file' #dumpFileSuffix.default = '.dump' processorCheckInTime = cm.Option() processorCheckInTime.doc = 'the time after which a processor is considered dead (HH:MM:SS)' processorCheckInTime.default = "00:05:00" processorCheckInTime.fromStringConverter = lambda x: str( cm.timeDeltaConverter(x)) standardLoopDelay = cm.Option() standardLoopDelay.doc = 'the time between scans for jobs (HHH:MM:SS)' standardLoopDelay.default = '00:05:00' standardLoopDelay.fromStringConverter = cm.timeDeltaConverter cleanupJobsLoopDelay = cm.Option() cleanupJobsLoopDelay.doc = 'the time between runs of the job clean up routines (HHH:MM:SS)' cleanupJobsLoopDelay.default = '00:05:00' cleanupJobsLoopDelay.fromStringConverter = cm.timeDeltaConverter priorityLoopDelay = cm.Option() priorityLoopDelay.doc = 'the time between checks for priority jobs (HHH:MM:SS)' priorityLoopDelay.default = '00:01:00' priorityLoopDelay.fromStringConverter = cm.timeDeltaConverter