def single_queue_it(self, count, remaining_count=None): """ This method is used for specifying how to queue your rater plugin based on single process :param count: Used to count number of events in a bundle :return: """ et = self.sample.earliestTime() lt = self.sample.latestTime() if count < 1 and count != -1: logger.info( "There is no data to be generated in worker {0} because the count is {1}." .format(self.sample.config.generatorWorkers, count)) else: genPlugin = self.generatorPlugin(sample=self.sample) # Adjust queue for threading mode genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue) genPlugin.updateCounts(count=count, start_time=et, end_time=lt) try: self.generatorQueue.put(genPlugin) logger.info(("Put {0} MB of events in queue for sample '{1}'" + "with et '{2}' and lt '{3}'").format( round((count / 1024.0 / 1024), 4), self.sample.name, et, lt)) except Full: logger.warning( "Generator Queue Full. Skipping current generation.")
def flush(self, q): if len(q) > 0: logger.debug( "Flushing output for sample '%s' in app '%s' for queue '%s'" % (self._sample.name, self._app, self._sample.source)) # Loop through all the messages and build the long string, write once for each flush # This may cause the file exceed the maxFileBytes a little bit but will greatly improve the performance try: for metamsg in q: msg = metamsg.get("_raw") if not msg: continue if msg[-1] != "\n": msg += "\n" if self._fileLength + len(msg) <= self._fileMaxBytes: self._fileHandle.write(msg) self._fileLength += len(msg) else: self._fileHandle.flush() self._fileHandle.close() if os.path.exists(self._file + "." + str(self._fileBackupFiles)): logger.debug("File Output: Removing file: %s" % self._file + "." + str(self._fileBackupFiles)) os.unlink(self._file + "." + str(self._fileBackupFiles)) for x in range(1, int(self._fileBackupFiles))[::-1]: logger.debug("File Output: Checking for file: %s" % self._file + "." + str(x)) if os.path.exists(self._file + "." + str(x)): logger.debug( "File Output: Renaming file %s to %s" % ( self._file + "." + str(x), self._file + "." + str(x + 1), )) os.rename( self._file + "." + str(x), self._file + "." + str(x + 1), ) os.rename(self._file, self._file + ".1") self._fileHandle = open(self._file, "w") self._fileHandle.write(msg) self._fileLength = len(msg) except IndexError: logger.warning( "IndexError when writting for app '%s' sample '%s'" % (self._app, self._sample.name)) if not self._fileHandle.closed: self._fileHandle.flush() logger.debug("Queue for app '%s' sample '%s' written" % (self._app, self._sample.name)) self._fileHandle.close()
def getTSFromEvent(self, event, passed_token=None): currentTime = None formats = [] # JB: 2012/11/20 - Can we optimize this by only testing tokens of type = *timestamp? # JB: 2012/11/20 - Alternatively, documentation should suggest putting timestamp as token.0. if passed_token is not None: tokens = [passed_token] else: tokens = self.tokens for token in tokens: try: formats.append(token.token) # logger.debug("Searching for token '%s' in event '%s'" % (token.token, event)) results = token._search(event) if results: timeFormat = token.replacement group = 0 if len(results.groups()) == 0 else 1 timeString = results.group(group) # logger.debug("Testing '%s' as a time string against '%s'" % (timeString, timeFormat)) if timeFormat == "%s": ts = float(timeString) if len(timeString) < 10 else float(timeString) \ / (10**(len(timeString) - 10)) # logger.debug("Getting time for timestamp '%s'" % ts) currentTime = datetime.datetime.fromtimestamp(ts) else: # logger.debug("Getting time for timeFormat '%s' and timeString '%s'" % # (timeFormat, timeString)) # Working around Python bug with a non thread-safe strptime. Randomly get AttributeError # when calling strptime, so if we get that, try again while currentTime is None: try: # Checking for timezone adjustment if timeString[-5] == "+": timeString = timeString[:-5] currentTime = datetime.datetime.strptime(timeString, timeFormat) except AttributeError: pass logger.debug("Match '%s' Format '%s' result: '%s'" % (timeString, timeFormat, currentTime)) if type(currentTime) == datetime.datetime: break except ValueError: logger.warning("Match found ('%s') but time parse failed. Timeformat '%s' Event '%s'" % (timeString, timeFormat, event)) if type(currentTime) != datetime.datetime: # Total fail if passed_token is None: # If we're running for autotimestamp don't log error logger.warning( "Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event)) raise ValueError("Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event)) # Check to make sure we parsed a year if currentTime.year == 1900: currentTime = currentTime.replace(year=self.now().year) # 11/3/14 CS So, this is breaking replay mode, and getTSFromEvent is only used by replay mode # but I don't remember why I added these two lines of code so it might create a regression. # Found the change on 6/14/14 but no comments as to why I added these two lines. # if self.timestamp == None: # self.timestamp = currentTime return currentTime
def flush(self, endOfInterval=False): """ Flushes output buffer, unless endOfInterval called, and then only flush if we've been called more than maxIntervalsBeforeFlush tunable. """ # TODO: Fix interval flushing somehow with a queue, not sure I even want to support this feature anymore. '''if endOfInterval: logger.debugv("Sample calling flush, checking increment against maxIntervalsBeforeFlush") c.intervalsSinceFlush[self._sample.name].increment() if c.intervalsSinceFlush[self._sample.name].value() >= self._sample.maxIntervalsBeforeFlush: logger.debugv("Exceeded maxIntervalsBeforeFlush, flushing") flushing = True c.intervalsSinceFlush[self._sample.name].clear() else: logger.debugv("Not enough events to flush, passing flush routine.") else: logger.debugv("maxQueueLength exceeded, flushing") flushing = True''' # TODO: This is set this way just for the time being while I decide if we want this feature. flushing = True if flushing: q = self._queue logger.debug("Flushing queue for sample '%s' with size %d" % (self._sample.name, len(q))) self._queue = [] outputer = self.outputPlugin(self._sample, self.output_counter) outputer.updateConfig(self.config) outputer.set_events(q) # When an outputQueue is used, it needs to run in a single threaded nature which requires to be put back # into the outputqueue so a single thread worker can execute it. When an outputQueue is not used, it can be # ran by multiple processes or threads. Therefore, no need to put the outputer back into the Queue. Just # execute it. # if outputPlugin must be used for useOutputQueue, use outputQueue regardless of user config useOutputQueue: if self.outputPlugin.useOutputQueue or self.config.useOutputQueue: try: self.outputQueue.put(outputer) except Full: logger.warning("Output Queue full, looping again") else: if self.config.splunkEmbedded: tmp = [len(s['_raw']) for s in q] if len(tmp) > 0: metrics_logger.info({ 'timestamp': datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'), 'sample': self._sample.name, 'events': len(tmp), 'bytes': sum(tmp) }) tmp = None outputer.run()
def queue_it(self, count): try: realtime = self.sample.now(realnow=True) if "-" in self.sample.backfill[0]: mathsymbol = "-" else: mathsymbol = "+" backfillnumber = "" backfillletter = "" for char in self.sample.backfill: if char.isdigit(): backfillnumber += char elif char != "-": backfillletter += char backfillearliest = timeParserTimeMath( plusminus=mathsymbol, num=backfillnumber, unit=backfillletter, ret=realtime, ) while backfillearliest < realtime: et = backfillearliest lt = timeParserTimeMath(plusminus="+", num=self.sample.interval, unit="s", ret=et) genPlugin = self.generatorPlugin(sample=self.sample) genPlugin.updateCounts(count=count, start_time=et, end_time=lt) genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue) try: # Need to lock on replay mode since event duration is dynamic. Interval starts counting # after the replay has finished. if self.sample.generator == "replay": genPlugin.run() else: self.generatorQueue.put(genPlugin) except Full: logger.warning( "Generator Queue Full. Skipping current generation.") # due to replays needing to iterate in reverse, it's more efficent to process backfill # after the file has been parsed. This section is to allow replay mode to take # care of all replays on it's first run. and sets backfilldone if self.sample.generator == "replay": backfillearliest = realtime else: backfillearliest = lt if self.sample.generator != "replay": self.sample.backfilldone = True except Exception as e: logger.error("Failed queuing backfill, exception: {0}".format(e))
def gen(self, count, earliest, latest, samplename=None): if count < 0: logger.warning( 'Sample size not found for count=-1 and generator=windbag, defaulting to count=60' ) count = 60 time_interval = timedelta.total_seconds((latest - earliest)) / count for i in range(count): current_time_object = earliest + datetime.timedelta( 0, time_interval * (i + 1)) msg = '{0} -0700 WINDBAG Event {1} of {2}'.format( current_time_object, (i + 1), count) self._out.send(msg) return 0
def single_queue_it(self, count): """ This method is used for specifying how to queue your rater plugin based on single process :param count: :return: """ et = self.sample.earliestTime() lt = self.sample.latestTime() if count < 1 and count != -1: logger.info( "There is no data to be generated in worker {0} because the count is {1}.".format( self.sample.config.generatorWorkers, count ) ) else: genPlugin = self.generatorPlugin(sample=self.sample) # Adjust queue for threading mode genPlugin.updateCounts(count=count, start_time=et, end_time=lt) genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue) try: logger.info( ( "Put {0} MB of events in queue for sample '{1}'" + "with et '{2}' and lt '{3}'" ).format( round((count / 1024.0 / 1024), 4), self.sample.name, et, lt ) ) if self.sample.generator == "replay": # lock on to replay mode, this will keep the timer knowing when to continue cycles since # replay mode has a dynamic replay time and interval doesn't mean the same thing. if ( hasattr(self.config, "outputCounter") and self.config.outputCounter ): from splunk_eventgen.lib.outputcounter import OutputCounter output_counter = OutputCounter() elif hasattr(self.config, "outputCounter"): output_counter = self.config.outputCounter genPlugin.run(output_counter=output_counter) else: self.generatorQueue.put(genPlugin) except Full: logger.warning("Generator Queue Full. Skipping current generation.")
def flush(self, endOfInterval=False): """ Flushes output buffer, unless endOfInterval called, and then only flush if we've been called more than maxIntervalsBeforeFlush tunable. """ flushing = True if flushing: q = self._queue logger.debug("Flushing queue for sample '%s' with size %d" % (self._sample.name, len(q))) self._queue = [] outputer = self.outputPlugin(self._sample, self.output_counter) outputer.updateConfig(self.config) outputer.set_events(q) # When an outputQueue is used, it needs to run in a single threaded nature which requires to be put back # into the outputqueue so a single thread worker can execute it. When an outputQueue is not used, it can be # ran by multiple processes or threads. Therefore, no need to put the outputer back into the Queue. Just # execute it. # if outputPlugin must be used for useOutputQueue, use outputQueue regardless of user config useOutputQueue: if self.outputPlugin.useOutputQueue or self.config.useOutputQueue: try: self.outputQueue.put(outputer) except Full: logger.warning("Output Queue full, looping again") else: if self.config.splunkEmbedded: tmp = [len(s['_raw']) for s in q] if len(tmp) > 0: metrics_logger.info({ 'timestamp': datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'), 'sample': self._sample.name, 'events': len(tmp), 'bytes': sum(tmp) }) tmp = None outputer.run() q = None
def queue_it(self, count): count = count + self.previous_count_left if 0 < count < self.raweventsize: logger.info( "current interval size is {}, which is smaller than a raw event size {}." .format(count, self.raweventsize) + "Wait for the next turn.") self.update_options(previous_count_left=count) else: self.update_options(previous_count_left=0) et = self.sample.earliestTime() lt = self.sample.latestTime() # self.generatorPlugin is only an instance, now we need a real plugin. Make a copy of # of the sample in case another generator corrupts it. genPlugin = self.generatorPlugin(sample=self.sample) # Adjust queue for threading mode genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue) genPlugin.updateCounts(count=count, start_time=et, end_time=lt) try: self.generatorQueue.put(genPlugin) except Full: logger.warning( "Generator Queue Full. Skipping current generation.")
def real_run(self): """ Worker function of the Timer class. Determine whether a plugin is queueable, and either place an item in the generator queue for that plugin or call the plugin's gen method directly. """ if self.sample.delay > 0: logger.info("Sample set to delay %s, sleeping." % self.sample.delay) time.sleep(self.sample.delay) logger.debug("Timer creating plugin for '%s'" % self.sample.name) end = False previous_count_left = 0 raw_event_size = self.predict_event_size() if self.end: if int(self.end) == 0: logger.info( "End = 0, no events will be generated for sample '%s'" % self.sample.name) end = True elif int(self.end) == -1: logger.info( "End is set to -1. Will be running without stopping for sample %s" % self.sample.name) while not end: # Need to be able to stop threads by the main thread or this thread. self.config will stop all threads # referenced in the config object, while, self.stopping will only stop this one. if self.config.stopping or self.stopping: end = True continue count = self.rater.rate() # First run of the generator, see if we have any backfill work to do. if self.countdown <= 0: if self.sample.backfill and not self.sample.backfilldone: realtime = self.sample.now(realnow=True) if "-" in self.sample.backfill[0]: mathsymbol = "-" else: mathsymbol = "+" backfillnumber = "" backfillletter = "" for char in self.sample.backfill: if char.isdigit(): backfillnumber += char elif char != "-": backfillletter += char backfillearliest = timeParserTimeMath(plusminus=mathsymbol, num=backfillnumber, unit=backfillletter, ret=realtime) while backfillearliest < realtime: if self.end and self.executions == int(self.end): logger.info( "End executions %d reached, ending generation of sample '%s'" % (int(self.end), self.sample.name)) break et = backfillearliest lt = timeParserTimeMath(plusminus="+", num=self.interval, unit="s", ret=et) copy_sample = copy.copy(self.sample) tokens = copy.deepcopy(self.sample.tokens) copy_sample.tokens = tokens genPlugin = self.generatorPlugin(sample=copy_sample) # need to make sure we set the queue right if we're using multiprocessing or thread modes genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue) genPlugin.updateCounts(count=count, start_time=et, end_time=lt) try: self.generatorQueue.put(genPlugin, True, 3) self.executions += 1 backfillearliest = lt except Full: logger.warning( "Generator Queue Full. Reput the backfill generator task later. %d backfill generators are dispatched.", self.executions) backfillearliest = et realtime = self.sample.now(realnow=True) self.sample.backfilldone = True else: # 12/15/13 CS Moving the rating to a separate plugin architecture # Save previous interval count left to avoid perdayvolumegenerator drop small tasks if self.sample.generator == 'perdayvolumegenerator': count = self.rater.rate() + previous_count_left if 0 < count < raw_event_size: logger.info( "current interval size is {}, which is smaller than a raw event size {}." .format(count, raw_event_size) + "Wait for the next turn.") previous_count_left = count self.countdown = self.interval self.executions += 1 continue else: previous_count_left = 0 else: count = self.rater.rate() et = self.sample.earliestTime() lt = self.sample.latestTime() try: if count < 1 and count != -1: logger.info( "There is no data to be generated in worker {0} because the count is {1}." .format(self.sample.config.generatorWorkers, count)) else: # Spawn workers at the beginning of job rather than wait for next interval logger.info( "Starting '%d' generatorWorkers for sample '%s'" % (self.sample.config.generatorWorkers, self.sample.name)) for worker_id in range( self.config.generatorWorkers): copy_sample = copy.copy(self.sample) tokens = copy.deepcopy(self.sample.tokens) copy_sample.tokens = tokens genPlugin = self.generatorPlugin( sample=copy_sample) # Adjust queue for threading mode genPlugin.updateConfig( config=self.config, outqueue=self.outputQueue) genPlugin.updateCounts(count=count, start_time=et, end_time=lt) try: self.generatorQueue.put(genPlugin) logger.debug(( "Worker# {0}: Put {1} MB of events in queue for sample '{2}'" + "with et '{3}' and lt '{4}'").format( worker_id, round((count / 1024.0 / 1024), 4), self.sample.name, et, lt)) except Full: logger.warning( "Generator Queue Full. Skipping current generation." ) self.executions += 1 except Exception as e: logger.exception(str(e)) if self.stopping: end = True pass # Sleep until we're supposed to wake up and generate more events self.countdown = self.interval # 8/20/15 CS Adding support for ending generation at a certain time if self.end: if int(self.end) == -1: time.sleep(self.time) self.countdown -= self.time continue # 3/16/16 CS Adding support for ending on a number of executions instead of time # Should be fine with storing state in this sample object since each sample has it's own unique # timer thread if not self.endts: if self.executions >= int(self.end): logger.info( "End executions %d reached, ending generation of sample '%s'" % (int(self.end), self.sample.name)) self.stopping = True end = True elif lt >= self.endts: logger.info( "End Time '%s' reached, ending generation of sample '%s'" % (self.sample.endts, self.sample.name)) self.stopping = True end = True else: time.sleep(self.time) self.countdown -= self.time