def flush(self, q): if len(q) > 0: logger.debug( "Flushing output for sample '%s' in app '%s' for queue '%s'" % (self._sample.name, self._app, self._sample.source)) # Loop through all the messages and build the long string, write once for each flush # This may cause the file exceed the maxFileBytes a little bit but will greatly improve the performance msglist = "" try: for metamsg in q: msg = metamsg['_raw'] if msg[-1] != '\n': msg += '\n' msglist += msg self._fileHandle.write(msglist) self._fileLength += len(msglist) # If we're at the end of the max allowable size, shift all files # up a number and create a new one if self._fileLength > self._fileMaxBytes: self._fileHandle.flush() self._fileHandle.close() if os.path.exists(self._file + '.' + str(self._fileBackupFiles)): logger.debug('File Output: Removing file: %s' % self._file + '.' + str(self._fileBackupFiles)) os.unlink(self._file + '.' + str(self._fileBackupFiles)) for x in range(1, self._fileBackupFiles)[::-1]: logger.debug('File Output: Checking for file: %s' % self._file + '.' + str(x)) if os.path.exists(self._file + '.' + str(x)): logger.debug( 'File Output: Renaming file %s to %s' % (self._file + '.' + str(x), self._file + '.' + str(x + 1))) os.rename(self._file + '.' + str(x), self._file + '.' + str(x + 1)) os.rename(self._file, self._file + '.1') self._fileHandle = open(self._file, 'w') self._fileLength = 0 except IndexError: logger.warning( "IndexError when writting for app '%s' sample '%s'" % (self._app, self._sample.name)) if not self._fileHandle.closed: self._fileHandle.flush() logger.debug("Queue for app '%s' sample '%s' written" % (self._app, self._sample.name)) self._fileHandle.close()
def draw_text( img: Image, text: str, location: tuple = (0, 0), text_color=(0, 0, 0)) -> Image: draw = ImageDraw.Draw(img) try: # For Linux font = ImageFont.truetype("DejaVuSans.ttf", 20) except Exception: logger.warning("No font DejaVuSans; use default instead") # For others font = ImageFont.load_default() draw.text(location, text, font=font, fill=text_color) return img
def gen(self, count, earliest, latest, samplename=None): if count < 0: logger.warning( 'Sample size not found for count=-1 and generator=windbag, defaulting to count=60' ) count = 60 time_interval = timedelta.total_seconds((latest - earliest)) / count for i in xrange(count): current_time_object = earliest + datetime.timedelta( 0, time_interval * (i + 1)) msg = '{0} -0700 WINDBAG Event {1} of {2}'.format( current_time_object, (i + 1), count) self._out.send(msg) return 0
def flush(self, endOfInterval=False): """ Flushes output buffer, unless endOfInterval called, and then only flush if we've been called more than maxIntervalsBeforeFlush tunable. """ # TODO: Fix interval flushing somehow with a queue, not sure I even want to support this feature anymore. '''if endOfInterval: logger.debugv("Sample calling flush, checking increment against maxIntervalsBeforeFlush") c.intervalsSinceFlush[self._sample.name].increment() if c.intervalsSinceFlush[self._sample.name].value() >= self._sample.maxIntervalsBeforeFlush: logger.debugv("Exceeded maxIntervalsBeforeFlush, flushing") flushing = True c.intervalsSinceFlush[self._sample.name].clear() else: logger.debugv("Not enough events to flush, passing flush routine.") else: logger.debugv("maxQueueLength exceeded, flushing") flushing = True''' # TODO: This is set this way just for the time being while I decide if we want this feature. flushing = True if flushing: q = self._queue logger.debug("Flushing queue for sample '%s' with size %d" % (self._sample.name, len(q))) self._queue = [] outputer = self.outputPlugin(self._sample, self.output_counter) outputer.updateConfig(self.config) outputer.set_events(q) # When an outputQueue is used, it needs to run in a single threaded nature which requires to be put back # into the outputqueue so a single thread worker can execute it. When an outputQueue is not used, it can be # ran by multiple processes or threads. Therefore, no need to put the outputer back into the Queue. Just # execute it. # if outputPlugin must be used for useOutputQueue, use outputQueue regardless of user config useOutputQueue: if self.outputPlugin.useOutputQueue or self.config.useOutputQueue: try: self.outputQueue.put(outputer) except Full: logger.warning("Output Queue full, looping again") else: if self.config.splunkEmbedded: tmp = [len(s['_raw']) for s in q] if len(tmp) > 0: metrics_logger.info({ 'timestamp': datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'), 'sample': self._sample.name, 'events': len(tmp), 'bytes': sum(tmp)}) tmp = None outputer.run()
def main(args): mydb = mysql.connector.connect(host="localhost", user=args.user, passwd=args.passwd, database="starwar") mycursor = mydb.cursor() with open(args.answer_shapes_path, 'r') as fin: answer_shapes = json.load(fin) result_shapes = {} shape_correct = 0 error = 0 with open(args.file_path, 'r') as fin: lines = fin.readlines() for i, line in enumerate(lines): try: mycursor.execute(line) except Exception as err: error += 1 logger.error(err, exc_info=True) else: results = mycursor.fetchall() for x in results: logger.info(x) result_shapes[i + 1] = np.array(results).shape result_shape = np.array(results).shape answer_shape = answer_shapes[f"{i+1}"] if answer_shape != list(result_shape): logger.error( f"Question {i+1} shape not match: " f"yours: {result_shape} / ans: {answer_shape}") else: shape_correct += 1 logger.info(f"Question {i+1} shape correct") logger.info("-------------------------------------------------------") logger.info(f"Shape correct: {shape_correct} / {len(answer_shapes)}") logger.info(f"Error: {error} / {len(answer_shapes)}") logger.warning("Note that this checker only checks the shape." "Your answer may still be wrong.") logger.warning("The answer is not guaranteed to be correct as well; " "open a issue if you think the answer shape is incorrect.")
def real_run(self): """ Worker function of the Timer class. Determine whether a plugin is queueable, and either place an item in the generator queue for that plugin or call the plugin's gen method directly. """ if self.sample.delay > 0: logger.info("Sample set to delay %s, sleeping." % self.sample.delay) time.sleep(self.sample.delay) logger.debug("Timer creating plugin for '%s'" % self.sample.name) end = False previous_count_left = 0 raw_event_size = self.predict_event_size() if self.end: if int(self.end) == 0: logger.info("End = 0, no events will be generated for sample '%s'" % self.sample.name) end = True elif int(self.end) == -1: logger.info("End is set to -1. Will be running without stopping for sample %s" % self.sample.name) while not end: # Need to be able to stop threads by the main thread or this thread. self.config will stop all threads # referenced in the config object, while, self.stopping will only stop this one. if self.config.stopping or self.stopping: end = True count = self.rater.rate() # First run of the generator, see if we have any backfill work to do. if self.countdown <= 0: if self.sample.backfill and not self.sample.backfilldone: realtime = self.sample.now(realnow=True) if "-" in self.sample.backfill[0]: mathsymbol = "-" else: mathsymbol = "+" backfillnumber = "" backfillletter = "" for char in self.sample.backfill: if char.isdigit(): backfillnumber += char elif char != "-": backfillletter += char backfillearliest = timeParserTimeMath(plusminus=mathsymbol, num=backfillnumber, unit=backfillletter, ret=realtime) while backfillearliest < realtime: if self.end and self.executions == int(self.end): logger.info("End executions %d reached, ending generation of sample '%s'" % (int( self.end), self.sample.name)) break et = backfillearliest lt = timeParserTimeMath(plusminus="+", num=self.interval, unit="s", ret=et) genPlugin = self.generatorPlugin(sample=self.sample) # need to make sure we set the queue right if we're using multiprocessing or thread modes genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue) genPlugin.updateCounts(count=count, start_time=et, end_time=lt) try: self.generatorQueue.put(genPlugin, True, 3) self.executions += 1 backfillearliest = lt except Full: logger.warning("Generator Queue Full. Reput the backfill generator task later. %d backfill generators are dispatched.", self.executions) backfillearliest = et realtime = self.sample.now(realnow=True) self.sample.backfilldone = True else: # 12/15/13 CS Moving the rating to a separate plugin architecture # Save previous interval count left to avoid perdayvolumegenerator drop small tasks if self.sample.generator == 'perdayvolumegenerator': count = self.rater.rate() + previous_count_left if 0 < count < raw_event_size: logger.info("current interval size is {}, which is smaller than a raw event size {}.". format(count, raw_event_size) + "Wait for the next turn.") previous_count_left = count self.countdown = self.interval self.executions += 1 continue else: previous_count_left = 0 else: count = self.rater.rate() et = self.sample.earliestTime() lt = self.sample.latestTime() try: if count < 1 and count != -1: logger.info( "There is no data to be generated in worker {0} because the count is {1}.".format( self.sample.config.generatorWorkers, count)) else: # Spawn workers at the beginning of job rather than wait for next interval logger.info("Starting '%d' generatorWorkers for sample '%s'" % (self.sample.config.generatorWorkers, self.sample.name)) for worker_id in range(self.config.generatorWorkers): genPlugin = self.generatorPlugin(sample=self.sample) # Adjust queue for threading mode genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue) genPlugin.updateCounts(count=count, start_time=et, end_time=lt) try: self.generatorQueue.put(genPlugin) logger.debug(("Worker# {0}: Put {1} MB of events in queue for sample '{2}'" + "with et '{3}' and lt '{4}'").format( worker_id, round((count / 1024.0 / 1024), 4), self.sample.name, et, lt)) except Full: logger.warning("Generator Queue Full. Skipping current generation.") self.executions += 1 except Exception as e: logger.exception(str(e)) if self.stopping: end = True pass # Sleep until we're supposed to wake up and generate more events self.countdown = self.interval # 8/20/15 CS Adding support for ending generation at a certain time if self.end: if int(self.end) == -1: time.sleep(self.time) self.countdown -= self.time continue # 3/16/16 CS Adding support for ending on a number of executions instead of time # Should be fine with storing state in this sample object since each sample has it's own unique # timer thread if not self.endts: if self.executions >= int(self.end): logger.info("End executions %d reached, ending generation of sample '%s'" % (int( self.end), self.sample.name)) self.stopping = True end = True elif lt >= self.endts: logger.info("End Time '%s' reached, ending generation of sample '%s'" % (self.sample.endts, self.sample.name)) self.stopping = True end = True else: time.sleep(self.time) self.countdown -= self.time
def getTSFromEvent(self, event, passed_token=None): currentTime = None formats = [] # JB: 2012/11/20 - Can we optimize this by only testing tokens of type = *timestamp? # JB: 2012/11/20 - Alternatively, documentation should suggest putting timestamp as token.0. if passed_token is not None: tokens = [passed_token] else: tokens = self.tokens for token in tokens: try: formats.append(token.token) # logger.debug("Searching for token '%s' in event '%s'" % (token.token, event)) results = token._search(event) if results: timeFormat = token.replacement group = 0 if len(results.groups()) == 0 else 1 timeString = results.group(group) # logger.debug("Testing '%s' as a time string against '%s'" % (timeString, timeFormat)) if timeFormat == "%s": ts = float(timeString) if len(timeString) < 10 else float(timeString) \ / (10**(len(timeString) - 10)) # logger.debug("Getting time for timestamp '%s'" % ts) currentTime = datetime.datetime.fromtimestamp(ts) else: # logger.debug("Getting time for timeFormat '%s' and timeString '%s'" % # (timeFormat, timeString)) # Working around Python bug with a non thread-safe strptime. Randomly get AttributeError # when calling strptime, so if we get that, try again while currentTime is None: try: # Checking for timezone adjustment if timeString[-5] == "+": timeString = timeString[:-5] currentTime = datetime.datetime.strptime( timeString, timeFormat) except AttributeError: pass logger.debug("Match '%s' Format '%s' result: '%s'" % (timeString, timeFormat, currentTime)) if type(currentTime) == datetime.datetime: break except ValueError: logger.warning( "Match found ('%s') but time parse failed. Timeformat '%s' Event '%s'" % (timeString, timeFormat, event)) if type(currentTime) != datetime.datetime: # Total fail if passed_token is None: # If we're running for autotimestamp don't log error logger.warning( "Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event)) raise ValueError( "Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event)) # Check to make sure we parsed a year if currentTime.year == 1900: currentTime = currentTime.replace(year=self.now().year) # 11/3/14 CS So, this is breaking replay mode, and getTSFromEvent is only used by replay mode # but I don't remember why I added these two lines of code so it might create a regression. # Found the change on 6/14/14 but no comments as to why I added these two lines. # if self.timestamp == None: # self.timestamp = currentTime return currentTime
def parse(self): """Parse configs from Splunk REST Handler or from files. We get called manually instead of in __init__ because we need find out if we're Splunk embedded before we figure out how to configure ourselves. """ self.samples = [] logger.debug("Parsing configuration files.") self._buildConfDict() # Set defaults config instance variables to 'global' section # This establishes defaults for other stanza settings if 'global' in self._confDict: for key, value in self._confDict['global'].items(): value = self._validateSetting('global', key, value) setattr(self, key, value) del self._confDict['global'] if 'default' in self._confDict: del self._confDict['default'] tempsamples = [] tempsamples2 = [] stanza_map = {} stanza_list = [] for stanza in self._confDict.keys(): stanza_list.append(stanza) stanza_map[stanza] = [] for stanza, settings in self._confDict.iteritems(): for stanza_item in stanza_list: if stanza != stanza_item and re.match(stanza, stanza_item): stanza_map[stanza_item].append(stanza) # 1/16/16 CS Trying to clean up the need to have attributes hard coded into the Config object # and instead go off the list of valid settings that could be set for setting in self._validSettings: if not hasattr(self, setting): setattr(self, setting, None) # Now iterate for the rest of the samples we've found # We'll create Sample objects for each of them for stanza, settings in self._confDict.items(): if self.sample is not None and self.sample != stanza: logger.info("Skipping sample '%s' because of command line override", stanza) continue sampleexists = False for sample in self.samples: if sample.name == stanza: sampleexists = True # If we see the sample in two places, use the first and ignore the second if not sampleexists: s = Sample(stanza) s.splunkEmbedded = self.splunkEmbedded s.updateConfig(self) # Get the latest token number of the current stanza last_token_number = 0 for key, value in settings.items(): if 'token' in key and key[6].isdigit() and int(key[6]) > last_token_number: last_token_number = int(key[6]) # Apply global tokens to the current stanza kv_pair_items = settings.items() if stanza in stanza_map: for global_stanza in stanza_map[stanza]: i = 0 # Scan for tokens first while True: if 'token.{}.token'.format(i) in self._confDict[global_stanza]: token = self._confDict[global_stanza].get('token.{}.token'.format(i)) replacement = self._confDict[global_stanza].get('token.{}.replacement'.format(i)) replacementType = self._confDict[global_stanza].get( 'token.{}.replacementType'.format(i)) last_token_number += 1 if token: k = 'token.{}.token'.format(last_token_number) v = token kv_pair_items.append((k, v)) if replacement: k = 'token.{}.replacement'.format(last_token_number) v = replacement kv_pair_items.append((k, v)) if replacementType: k = 'token.{}.replacementType'.format(last_token_number) v = replacementType kv_pair_items.append((k, v)) i += 1 else: break keys = settings.keys() for k, v in self._confDict[global_stanza].items(): if 'token' not in k and k not in keys: kv_pair_items.append((k, v)) for key, value in kv_pair_items: oldvalue = value try: value = self._validateSetting(stanza, key, value) except ValueError: # If we're improperly formatted, skip to the next item continue # If we're a tuple, then this must be a token if type(value) == tuple: # Token indices could be out of order, so we must check to # see whether we have enough items in the list to update the token # In general this will keep growing the list by whatever length we need if (key.find("host.") > -1): # logger.info("hostToken.{} = {}".format(value[1],oldvalue)) if not isinstance(s.hostToken, Token): s.hostToken = Token(s) # default hard-coded for host replacement s.hostToken.replacementType = 'file' setattr(s.hostToken, value[0], oldvalue) else: if len(s.tokens) <= value[0]: x = (value[0] + 1) - len(s.tokens) s.tokens.extend([None for num in xrange(0, x)]) if not isinstance(s.tokens[value[0]], Token): s.tokens[value[0]] = Token(s) # logger.info("token[{}].{} = {}".format(value[0],value[1],oldvalue)) setattr(s.tokens[value[0]], value[1], oldvalue) elif key == 'eai:acl': setattr(s, 'app', value['app']) else: setattr(s, key, value) # 6/22/12 CS Need a way to show a setting was set by the original # config read s._lockedSettings.append(key) # logger.debug("Appending '%s' to locked settings for sample '%s'" % (key, s.name)) # Validate all the tokens are fully setup, can't do this in _validateSettings # because they come over multiple lines # Don't error out at this point, just log it and remove the token and move on deleteidx = [] for i in xrange(0, len(s.tokens)): t = s.tokens[i] # If the index doesn't exist at all if t is None: logger.error("Token at index %s invalid" % i) # Can't modify list in place while we're looping through it # so create a list to remove later deleteidx.append(i) elif t.token is None or t.replacementType is None or t.replacement is None: logger.error("Token at index %s invalid" % i) deleteidx.append(i) newtokens = [] for i in xrange(0, len(s.tokens)): if i not in deleteidx: newtokens.append(s.tokens[i]) s.tokens = newtokens # Must have eai:acl key to determine app name which determines where actual files are if s.app is None: logger.error("App not set for sample '%s' in stanza '%s'" % (s.name, stanza)) raise ValueError("App not set for sample '%s' in stanza '%s'" % (s.name, stanza)) # Set defaults for items not included in the config file for setting in self._defaultableSettings: if not hasattr(s, setting) or getattr(s, setting) is None: setattr(s, setting, getattr(self, setting, None)) # Append to temporary holding list if not s.disabled: s._priority = len(tempsamples) + 1 tempsamples.append(s) # 6/22/12 CS Rewriting the config matching code yet again to handling flattening better. # In this case, we're now going to match all the files first, create a sample for each of them # and then take the match from the sample seen last in the config file, and apply settings from # every other match to that one. for s in tempsamples: # Now we need to match this up to real files. May generate multiple copies of the sample. foundFiles = [] # 1/5/14 Adding a config setting to override sample directory, primarily so I can put tests in their own # directories if s.sampleDir is None: logger.debug("Sample directory not specified in config, setting based on standard") if self.splunkEmbedded and not STANDALONE: s.sampleDir = os.path.normpath( os.path.join(self.grandparentdir, os.path.pardir, os.path.pardir, os.path.pardir, s.app, self.DEFAULT_SAMPLE_DIR)) else: # 2/1/15 CS Adding support for looking for samples based on the config file specified on # the command line. if self.configfile: base_dir = os.path.dirname(self.configfile) if os.path.isdir(self.configfile) else os.path.dirname(os.path.dirname(self.configfile)) s.sampleDir = os.path.join(base_dir, self.DEFAULT_SAMPLE_DIR) else: s.sampleDir = os.path.join(os.getcwd(), self.DEFAULT_SAMPLE_DIR) if not os.path.exists(s.sampleDir): # use the prebuilt sample dirs as the last choice if not os.path.exists(s.sampleDir): newSampleDir = os.path.join(self.grandparentdir, self.DEFAULT_SAMPLE_DIR) logger.error( "Path not found for samples '%s', trying '%s'" % (s.sampleDir, newSampleDir)) s.sampleDir = newSampleDir else: if not os.path.isabs(s.sampleDir): # relative path use the conffile dir as the base dir logger.debug("Sample directory specified in config, checking for relative") base_path = self.configfile if os.path.isdir(self.configfile) else os.path.dirname(self.configfile) s.sampleDir = os.path.join(base_path, s.sampleDir) # do nothing when sampleDir is absolute path # 2/1/15 CS Adding support for command line options, specifically running a single sample # from the command line self.run_sample = True if self.run_sample: # Name doesn't match, disable # if s.name != self.run_sample: # logger.debug("Disabling sample '%s' because of command line override" % s.name) # s.disabled = True # # Name matches # else: # logger.debug("Sample '%s' selected from command line" % s.name) # Also, can't backfill search if we don't know how to talk to Splunk s.backfillSearch = None s.backfillSearchUrl = None # Since the user is running this for debug output, lets assume that they # always want to see output self.maxIntervalsBeforeFlush = 1 s.maxIntervalsBeforeFlush = 1 s.maxQueueLength = s.maxQueueLength or 1 logger.debug( "Sample '%s' setting maxQueueLength to '%s' from command line" % (s.name, s.maxQueueLength)) if self.override_outputter: logger.debug( "Sample '%s' setting output to '%s' from command line" % (s.name, self.override_outputter)) s.outputMode = self.override_outputter if self.override_count: logger.debug("Overriding count to '%d' for sample '%s'" % (self.override_count, s.name)) s.count = self.override_count # If we're specifying a count, turn off backfill s.backfill = None if self.override_interval: logger.debug( "Overriding interval to '%d' for sample '%s'" % (self.override_interval, s.name)) s.interval = self.override_interval if self.override_backfill: logger.debug( "Overriding backfill to '%s' for sample '%s'" % (self.override_backfill, s.name)) s.backfill = self.override_backfill.lstrip() if self.override_end: logger.debug("Overriding end to '%s' for sample '%s'" % (self.override_end, s.name)) s.end = self.override_end.lstrip() if s.mode == 'replay' and not s.end: s.end = 1 # Now that we know where samples will be written, # Loop through tokens and load state for any that are integerid replacementType for token in s.tokens: if token.replacementType == 'integerid': try: stateFile = open(os.path.join(s.sampleDir, 'state.' + urllib.pathname2url(token.token)), 'rU') token.replacement = stateFile.read() stateFile.close() # The file doesn't exist, use the default value in the config except (IOError, ValueError): token.replacement = token.replacement if os.path.exists(s.sampleDir): sampleFiles = os.listdir(s.sampleDir) for sample in sampleFiles: results = re.match(s.name, sample) if results: logger.debug("Matched file {0} with sample name {1}".format(results.group(0), s.name)) samplePath = os.path.join(s.sampleDir, sample) if os.path.isfile(samplePath): logger.debug( "Found sample file '%s' for app '%s' using config '%s' with priority '%s'" % (sample, s.app, s.name, s._priority) + "; adding to list") foundFiles.append(samplePath) # If we didn't find any files, log about it if len(foundFiles) == 0: logger.warning("Sample '%s' in config but no matching files" % s.name) # 1/23/14 Change in behavior, go ahead and add the sample even if we don't find a file # 9/16/15 Change bit us, now only append if we're a generator other than the two stock generators if not s.disabled and not (s.generator == "default" or s.generator == "replay"): tempsamples2.append(s) for f in foundFiles: if s.name in f: news = s news.filePath = f # 12/3/13 CS TODO These are hard coded but should be handled via the modular config system # Maybe a generic callback for all plugins which will modify sample based on the filename # found? # Override <SAMPLE> with real name if s.outputMode == 'spool' and s.spoolFile == self.spoolFile: news.spoolFile = f.split(os.sep)[-1] if s.outputMode == 'file' and s.fileName is None: if self.fileName: news.fileName = self.fileName logger.debug("Found a global fileName {}. Setting the sample fileName.".format(self.fileName)) elif s.spoolFile == self.spoolFile: news.fileName = os.path.join(s.spoolDir, f.split(os.sep)[-1]) elif s.spoolFile is not None: news.fileName = os.path.join(s.spoolDir, s.spoolFile) # Override s.name with file name. Usually they'll match unless we've been a regex # 6/22/12 CS Save original name for later matching news._origName = news.name news.name = f.split(os.sep)[-1] if not news.disabled: tempsamples2.append(news) else: logger.info("Sample '%s' for app '%s' is marked disabled." % (news.name, news.app)) # Clear tempsamples, we're going to reuse it tempsamples = [] # We're now going go through the samples and attempt to apply any matches from other stanzas # This allows us to specify a wildcard at the beginning of the file and get more specific as we go on # Loop through all samples, create a list of the master samples for s in tempsamples2: foundHigherPriority = False othermatches = [] # If we're an exact match, don't go looking for higher priorities if not s.name == s._origName: for matchs in tempsamples2: if matchs.filePath == s.filePath and s._origName != matchs._origName: # We have a match, now determine if we're higher priority or not # If this is a longer pattern or our match is an exact match # then we're a higher priority match if len(matchs._origName) > len(s._origName) or matchs.name == matchs._origName: # if s._priority < matchs._priority: logger.debug("Found higher priority for sample '%s' with priority '%s' from sample " % (s._origName, s._priority) + "'%s' with priority '%s'" % (matchs._origName, matchs._priority)) foundHigherPriority = True break else: othermatches.append(matchs._origName) if not foundHigherPriority: logger.debug( "Chose sample '%s' from samples '%s' for file '%s'" % (s._origName, othermatches, s.name)) tempsamples.append(s) # Now we have two lists, tempsamples which contains only the highest priority matches, and # tempsamples2 which contains all matches. We need to now flatten the config in order to # take all the configs which might match. # Reversing tempsamples2 in order to look from the bottom of the file towards the top # We want entries lower in the file to override entries higher in the file tempsamples2.reverse() # Loop through all samples for s in tempsamples: # Now loop through the samples we've matched with files to see if we apply to any of them for overridesample in tempsamples2: if s.filePath == overridesample.filePath and s._origName != overridesample._origName: # Now we're going to loop through all valid settings and set them assuming # the more specific object that we've matched doesn't already have them set for settingname in self._validSettings: if settingname not in ['eai:acl', 'blacklist', 'disabled', 'name']: # 7/16/14 CS For some reason default settings are suddenly erroring # not sure why, but lets just move on try: sourcesetting = getattr(overridesample, settingname) destsetting = getattr(s, settingname) # We want to check that the setting we're copying to hasn't been # set, otherwise keep the more specific value # 6/22/12 CS Added support for non-overrideable (locked) settings # logger.debug("Locked settings: %s" % pprint.pformat(matchs._lockedSettings)) # if settingname in matchs._lockedSettings: # logger.debug("Matched setting '%s' in sample '%s' lockedSettings" # % (settingname, matchs.name)) if (destsetting is None or destsetting == getattr(self, settingname)) \ and sourcesetting is not None and sourcesetting != getattr(self, settingname) \ and settingname not in s._lockedSettings: logger.debug("Overriding setting '%s' with value '%s' from sample '%s' to " % (settingname, sourcesetting, overridesample._origName) + "sample '%s' in app '%s'" % (s.name, s.app)) setattr(s, settingname, sourcesetting) except AttributeError: pass # Now prepend all the tokens to the beginning of the list so they'll be sure to match first newtokens = s.tokens # logger.debug("Prepending tokens from sample '%s' to sample '%s' in app '%s': %s" \ # % (overridesample._origName, s.name, s.app, pprint.pformat(newtokens))) newtokens.extend(overridesample.tokens) s.tokens = newtokens # We've added replay mode, so lets loop through the samples again and set the earliest and latest # settings for any samples that were set to replay mode for s in tempsamples: # We've added replay mode, so lets loop through the samples again and set the earliest and latest # settings for any samples that were set to replay mode if s.perDayVolume: logger.info( "Stanza contains per day volume, changing rater and generator to perdayvolume instead of count") s.rater = 'perdayvolume' s.count = 1 s.generator = 'perdayvolumegenerator' elif s.mode == 'replay': logger.debug("Setting defaults for replay samples") s.earliest = 'now' if not s.earliest else s.earliest s.latest = 'now' if not s.latest else s.latest s.count = 1 s.randomizeCount = None s.hourOfDayRate = None s.dayOfWeekRate = None s.minuteOfHourRate = None s.interval = 0 if not s.interval else s.interval # 12/29/13 CS Moved replay generation to a new replay generator plugin s.generator = 'replay' self.samples = tempsamples self._confDict = None # 9/2/15 Try autotimestamp values, add a timestamp if we find one for s in self.samples: if s.generator == 'default': s.loadSample() if s.autotimestamp: at = self.autotimestamps line_puncts = [] # Check for _time field, if it exists, add a timestamp to support it if len(s.sampleDict) > 0: if '_time' in s.sampleDict[0]: logger.debug("Found _time field, checking if default timestamp exists") t = Token() t.token = "\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}" t.replacementType = "timestamp" t.replacement = "%Y-%m-%dT%H:%M:%S.%f" found_token = False # Check to see if we're already a token for st in s.tokens: if st.token == t.token and st.replacement == t.replacement: found_token = True break if not found_token: logger.debug("Found _time adding timestamp to support") s.tokens.append(t) else: logger.debug("_time field exists and timestamp already configured") for e in s.sampleDict: # Run punct against the line, make sure we haven't seen this same pattern # Not totally exact but good enough for Rock'N'Roll p = self._punct(e['_raw']) logger.debug("Got punct of '%s' for event '%s'" % (p, e[s.timeField])) if p not in line_puncts: for x in at: t = Token() t.token = x[0] t.replacementType = "timestamp" t.replacement = x[1] try: logger.debug( "Trying regex '%s' for format '%s' on '%s'" % (x[0], x[1], e[s.timeField])) ts = s.getTSFromEvent(e['_raw'], t) if type(ts) == datetime.datetime: found_token = False # Check to see if we're already a token for st in s.tokens: if st.token == t.token and st.replacement == t.replacement: found_token = True break if not found_token: logger.debug( "Found timestamp '%s', extending token with format '%s'" % (x[0], x[1])) s.tokens.append(t) # Drop this pattern from ones we try in the future at = [z for z in at if z[0] != x[0]] break except ValueError: pass line_puncts.append(p) logger.debug("Finished parsing")