示例#1
0
文件: file.py 项目: szsb26/eventgen
    def flush(self, q):
        if len(q) > 0:
            logger.debug(
                "Flushing output for sample '%s' in app '%s' for queue '%s'" %
                (self._sample.name, self._app, self._sample.source))

            # Loop through all the messages and build the long string, write once for each flush
            # This may cause the file exceed the maxFileBytes a little bit but will greatly improve the performance
            msglist = ""
            try:
                for metamsg in q:
                    msg = metamsg['_raw']
                    if msg[-1] != '\n':
                        msg += '\n'
                    msglist += msg

                self._fileHandle.write(msglist)
                self._fileLength += len(msglist)

                # If we're at the end of the max allowable size, shift all files
                # up a number and create a new one
                if self._fileLength > self._fileMaxBytes:
                    self._fileHandle.flush()
                    self._fileHandle.close()
                    if os.path.exists(self._file + '.' +
                                      str(self._fileBackupFiles)):
                        logger.debug('File Output: Removing file: %s' %
                                     self._file + '.' +
                                     str(self._fileBackupFiles))
                        os.unlink(self._file + '.' +
                                  str(self._fileBackupFiles))
                    for x in range(1, self._fileBackupFiles)[::-1]:
                        logger.debug('File Output: Checking for file: %s' %
                                     self._file + '.' + str(x))
                        if os.path.exists(self._file + '.' + str(x)):
                            logger.debug(
                                'File Output: Renaming file %s to %s' %
                                (self._file + '.' + str(x),
                                 self._file + '.' + str(x + 1)))
                            os.rename(self._file + '.' + str(x),
                                      self._file + '.' + str(x + 1))
                    os.rename(self._file, self._file + '.1')
                    self._fileHandle = open(self._file, 'w')
                    self._fileLength = 0
            except IndexError:
                logger.warning(
                    "IndexError when writting for app '%s' sample '%s'" %
                    (self._app, self._sample.name))

            if not self._fileHandle.closed:
                self._fileHandle.flush()
            logger.debug("Queue for app '%s' sample '%s' written" %
                         (self._app, self._sample.name))

            self._fileHandle.close()
示例#2
0
def draw_text(
    img: Image, text: str, location: tuple = (0, 0),
    text_color=(0, 0, 0)) -> Image:
    draw = ImageDraw.Draw(img)

    try:
        # For Linux
        font = ImageFont.truetype("DejaVuSans.ttf", 20)
    except Exception:
        logger.warning("No font DejaVuSans; use default instead")
        # For others
        font = ImageFont.load_default()
    draw.text(location, text, font=font, fill=text_color)
    return img
示例#3
0
 def gen(self, count, earliest, latest, samplename=None):
     if count < 0:
         logger.warning(
             'Sample size not found for count=-1 and generator=windbag, defaulting to count=60'
         )
         count = 60
     time_interval = timedelta.total_seconds((latest - earliest)) / count
     for i in xrange(count):
         current_time_object = earliest + datetime.timedelta(
             0, time_interval * (i + 1))
         msg = '{0} -0700 WINDBAG Event {1} of {2}'.format(
             current_time_object, (i + 1), count)
         self._out.send(msg)
     return 0
示例#4
0
    def flush(self, endOfInterval=False):
        """
        Flushes output buffer, unless endOfInterval called, and then only flush if we've been called
        more than maxIntervalsBeforeFlush tunable.
        """
        # TODO: Fix interval flushing somehow with a queue, not sure I even want to support this feature anymore.
        '''if endOfInterval:
            logger.debugv("Sample calling flush, checking increment against maxIntervalsBeforeFlush")
            c.intervalsSinceFlush[self._sample.name].increment()
            if c.intervalsSinceFlush[self._sample.name].value() >= self._sample.maxIntervalsBeforeFlush:
                logger.debugv("Exceeded maxIntervalsBeforeFlush, flushing")
                flushing = True
                c.intervalsSinceFlush[self._sample.name].clear()
            else:
                logger.debugv("Not enough events to flush, passing flush routine.")
        else:
            logger.debugv("maxQueueLength exceeded, flushing")
            flushing = True'''

        # TODO: This is set this way just for the time being while I decide if we want this feature.
        flushing = True
        if flushing:
            q = self._queue
            logger.debug("Flushing queue for sample '%s' with size %d" % (self._sample.name, len(q)))
            self._queue = []
            outputer = self.outputPlugin(self._sample, self.output_counter)
            outputer.updateConfig(self.config)
            outputer.set_events(q)
            # When an outputQueue is used, it needs to run in a single threaded nature which requires to be put back
            # into the outputqueue so a single thread worker can execute it. When an outputQueue is not used, it can be
            # ran by multiple processes or threads. Therefore, no need to put the outputer back into the Queue. Just
            # execute it.
            # if outputPlugin must be used for useOutputQueue, use outputQueue regardless of user config useOutputQueue:
            if self.outputPlugin.useOutputQueue or self.config.useOutputQueue:
                try:
                    self.outputQueue.put(outputer)
                except Full:
                    logger.warning("Output Queue full, looping again")
            else:
                if self.config.splunkEmbedded:
                    tmp = [len(s['_raw']) for s in q]
                    if len(tmp) > 0:
                        metrics_logger.info({
                            'timestamp': datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'), 'sample':
                            self._sample.name, 'events': len(tmp), 'bytes': sum(tmp)})
                    tmp = None
                outputer.run()
示例#5
0
def main(args):
    mydb = mysql.connector.connect(host="localhost",
                                   user=args.user,
                                   passwd=args.passwd,
                                   database="starwar")

    mycursor = mydb.cursor()

    with open(args.answer_shapes_path, 'r') as fin:
        answer_shapes = json.load(fin)

    result_shapes = {}
    shape_correct = 0
    error = 0
    with open(args.file_path, 'r') as fin:
        lines = fin.readlines()
        for i, line in enumerate(lines):
            try:
                mycursor.execute(line)
            except Exception as err:
                error += 1
                logger.error(err, exc_info=True)
            else:
                results = mycursor.fetchall()
                for x in results:
                    logger.info(x)
                result_shapes[i + 1] = np.array(results).shape

                result_shape = np.array(results).shape
                answer_shape = answer_shapes[f"{i+1}"]
                if answer_shape != list(result_shape):
                    logger.error(
                        f"Question {i+1} shape not match: "
                        f"yours: {result_shape} / ans: {answer_shape}")
                else:
                    shape_correct += 1
                    logger.info(f"Question {i+1} shape correct")

    logger.info("-------------------------------------------------------")
    logger.info(f"Shape correct: {shape_correct} / {len(answer_shapes)}")
    logger.info(f"Error: {error} / {len(answer_shapes)}")
    logger.warning("Note that this checker only checks the shape."
                   "Your answer may still be wrong.")
    logger.warning("The answer is not guaranteed to be correct as well; "
                   "open a issue if you think the answer shape is incorrect.")
示例#6
0
    def real_run(self):
        """
        Worker function of the Timer class.  Determine whether a plugin is queueable, and either
        place an item in the generator queue for that plugin or call the plugin's gen method directly.
        """
        if self.sample.delay > 0:
            logger.info("Sample set to delay %s, sleeping." % self.sample.delay)
            time.sleep(self.sample.delay)

        logger.debug("Timer creating plugin for '%s'" % self.sample.name)

        end = False
        previous_count_left = 0
        raw_event_size = self.predict_event_size()
        if self.end:
            if int(self.end) == 0:
                logger.info("End = 0, no events will be generated for sample '%s'" % self.sample.name)
                end = True
            elif int(self.end) == -1:
                logger.info("End is set to -1. Will be running without stopping for sample %s" % self.sample.name)
        while not end:
            # Need to be able to stop threads by the main thread or this thread. self.config will stop all threads
            # referenced in the config object, while, self.stopping will only stop this one.
            if self.config.stopping or self.stopping:
                end = True
            count = self.rater.rate()
            # First run of the generator, see if we have any backfill work to do.
            if self.countdown <= 0:
                if self.sample.backfill and not self.sample.backfilldone:
                    realtime = self.sample.now(realnow=True)
                    if "-" in self.sample.backfill[0]:
                        mathsymbol = "-"
                    else:
                        mathsymbol = "+"
                    backfillnumber = ""
                    backfillletter = ""
                    for char in self.sample.backfill:
                        if char.isdigit():
                            backfillnumber += char
                        elif char != "-":
                            backfillletter += char
                    backfillearliest = timeParserTimeMath(plusminus=mathsymbol, num=backfillnumber, unit=backfillletter,
                                                        ret=realtime)
                    while backfillearliest < realtime:
                        if self.end and self.executions == int(self.end):
                            logger.info("End executions %d reached, ending generation of sample '%s'" % (int(
                                self.end), self.sample.name))
                            break
                        et = backfillearliest
                        lt = timeParserTimeMath(plusminus="+", num=self.interval, unit="s", ret=et)
                        genPlugin = self.generatorPlugin(sample=self.sample)
                        # need to make sure we set the queue right if we're using multiprocessing or thread modes
                        genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue)
                        genPlugin.updateCounts(count=count, start_time=et, end_time=lt)
                        try:
                            self.generatorQueue.put(genPlugin, True, 3)
                            self.executions += 1
                            backfillearliest = lt
                        except Full:
                            logger.warning("Generator Queue Full. Reput the backfill generator task later. %d backfill generators are dispatched.", self.executions)
                            backfillearliest = et
                        realtime = self.sample.now(realnow=True)

                    self.sample.backfilldone = True
                else:
                    # 12/15/13 CS Moving the rating to a separate plugin architecture
                    # Save previous interval count left to avoid perdayvolumegenerator drop small tasks
                    if self.sample.generator == 'perdayvolumegenerator':
                        count = self.rater.rate() + previous_count_left
                        if 0 < count < raw_event_size:
                            logger.info("current interval size is {}, which is smaller than a raw event size {}.".
                                             format(count, raw_event_size) + "Wait for the next turn.")
                            previous_count_left = count
                            self.countdown = self.interval
                            self.executions += 1
                            continue
                        else:
                            previous_count_left = 0
                    else:
                        count = self.rater.rate()

                    et = self.sample.earliestTime()
                    lt = self.sample.latestTime()

                    try:
                        if count < 1 and count != -1:
                            logger.info(
                                "There is no data to be generated in worker {0} because the count is {1}.".format(
                                    self.sample.config.generatorWorkers, count))
                        else:
                            # Spawn workers at the beginning of job rather than wait for next interval
                            logger.info("Starting '%d' generatorWorkers for sample '%s'" %
                                             (self.sample.config.generatorWorkers, self.sample.name))
                            for worker_id in range(self.config.generatorWorkers):
                                genPlugin = self.generatorPlugin(sample=self.sample)
                                # Adjust queue for threading mode
                                genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue)
                                genPlugin.updateCounts(count=count, start_time=et, end_time=lt)

                                try:
                                    self.generatorQueue.put(genPlugin)
                                    logger.debug(("Worker# {0}: Put {1} MB of events in queue for sample '{2}'" +
                                                       "with et '{3}' and lt '{4}'").format(
                                                          worker_id, round((count / 1024.0 / 1024), 4),
                                                          self.sample.name, et, lt))
                                except Full:
                                    logger.warning("Generator Queue Full. Skipping current generation.")
                            self.executions += 1
                    except Exception as e:
                        logger.exception(str(e))
                        if self.stopping:
                            end = True
                        pass

                # Sleep until we're supposed to wake up and generate more events
                self.countdown = self.interval

                # 8/20/15 CS Adding support for ending generation at a certain time

                if self.end:
                    if int(self.end) == -1:
                        time.sleep(self.time)
                        self.countdown -= self.time
                        continue
                    # 3/16/16 CS Adding support for ending on a number of executions instead of time
                    # Should be fine with storing state in this sample object since each sample has it's own unique
                    # timer thread
                    if not self.endts:
                        if self.executions >= int(self.end):
                            logger.info("End executions %d reached, ending generation of sample '%s'" % (int(
                                self.end), self.sample.name))
                            self.stopping = True
                            end = True
                    elif lt >= self.endts:
                        logger.info("End Time '%s' reached, ending generation of sample '%s'" % (self.sample.endts,
                                                                                                      self.sample.name))
                        self.stopping = True
                        end = True

            else:
                time.sleep(self.time)
                self.countdown -= self.time
示例#7
0
 def getTSFromEvent(self, event, passed_token=None):
     currentTime = None
     formats = []
     # JB: 2012/11/20 - Can we optimize this by only testing tokens of type = *timestamp?
     # JB: 2012/11/20 - Alternatively, documentation should suggest putting timestamp as token.0.
     if passed_token is not None:
         tokens = [passed_token]
     else:
         tokens = self.tokens
     for token in tokens:
         try:
             formats.append(token.token)
             # logger.debug("Searching for token '%s' in event '%s'" % (token.token, event))
             results = token._search(event)
             if results:
                 timeFormat = token.replacement
                 group = 0 if len(results.groups()) == 0 else 1
                 timeString = results.group(group)
                 # logger.debug("Testing '%s' as a time string against '%s'" % (timeString, timeFormat))
                 if timeFormat == "%s":
                     ts = float(timeString) if len(timeString) < 10 else float(timeString) \
                          / (10**(len(timeString) - 10))
                     # logger.debug("Getting time for timestamp '%s'" % ts)
                     currentTime = datetime.datetime.fromtimestamp(ts)
                 else:
                     # logger.debug("Getting time for timeFormat '%s' and timeString '%s'" %
                     #                   (timeFormat, timeString))
                     # Working around Python bug with a non thread-safe strptime. Randomly get AttributeError
                     # when calling strptime, so if we get that, try again
                     while currentTime is None:
                         try:
                             # Checking for timezone adjustment
                             if timeString[-5] == "+":
                                 timeString = timeString[:-5]
                             currentTime = datetime.datetime.strptime(
                                 timeString, timeFormat)
                         except AttributeError:
                             pass
                 logger.debug("Match '%s' Format '%s' result: '%s'" %
                              (timeString, timeFormat, currentTime))
                 if type(currentTime) == datetime.datetime:
                     break
         except ValueError:
             logger.warning(
                 "Match found ('%s') but time parse failed. Timeformat '%s' Event '%s'"
                 % (timeString, timeFormat, event))
     if type(currentTime) != datetime.datetime:
         # Total fail
         if passed_token is None:  # If we're running for autotimestamp don't log error
             logger.warning(
                 "Can't find a timestamp (using patterns '%s') in this event: '%s'."
                 % (formats, event))
         raise ValueError(
             "Can't find a timestamp (using patterns '%s') in this event: '%s'."
             % (formats, event))
     # Check to make sure we parsed a year
     if currentTime.year == 1900:
         currentTime = currentTime.replace(year=self.now().year)
     # 11/3/14 CS So, this is breaking replay mode, and getTSFromEvent is only used by replay mode
     #            but I don't remember why I added these two lines of code so it might create a regression.
     #            Found the change on 6/14/14 but no comments as to why I added these two lines.
     # if self.timestamp == None:
     #     self.timestamp = currentTime
     return currentTime
示例#8
0
    def parse(self):
        """Parse configs from Splunk REST Handler or from files.
        We get called manually instead of in __init__ because we need find out if we're Splunk embedded before
        we figure out how to configure ourselves.
        """
        self.samples = []
        logger.debug("Parsing configuration files.")
        self._buildConfDict()
        # Set defaults config instance variables to 'global' section
        # This establishes defaults for other stanza settings
        if 'global' in self._confDict:
            for key, value in self._confDict['global'].items():
                value = self._validateSetting('global', key, value)
                setattr(self, key, value)
            del self._confDict['global']
            if 'default' in self._confDict:
                del self._confDict['default']

        tempsamples = []
        tempsamples2 = []

        stanza_map = {}
        stanza_list = []
        for stanza in self._confDict.keys():
            stanza_list.append(stanza)
            stanza_map[stanza] = []

        for stanza, settings in self._confDict.iteritems():
            for stanza_item in stanza_list:
                if stanza != stanza_item and re.match(stanza, stanza_item):
                    stanza_map[stanza_item].append(stanza)

        # 1/16/16 CS Trying to clean up the need to have attributes hard coded into the Config object
        # and instead go off the list of valid settings that could be set
        for setting in self._validSettings:
            if not hasattr(self, setting):
                setattr(self, setting, None)

        # Now iterate for the rest of the samples we've found
        # We'll create Sample objects for each of them
        for stanza, settings in self._confDict.items():
            if self.sample is not None and self.sample != stanza:
                logger.info("Skipping sample '%s' because of command line override", stanza)
                continue

            sampleexists = False
            for sample in self.samples:
                if sample.name == stanza:
                    sampleexists = True

            # If we see the sample in two places, use the first and ignore the second
            if not sampleexists:
                s = Sample(stanza)
                s.splunkEmbedded = self.splunkEmbedded

                s.updateConfig(self)

                # Get the latest token number of the current stanza
                last_token_number = 0
                for key, value in settings.items():
                    if 'token' in key and key[6].isdigit() and int(key[6]) > last_token_number:
                        last_token_number = int(key[6])

                # Apply global tokens to the current stanza
                kv_pair_items = settings.items()
                if stanza in stanza_map:
                    for global_stanza in stanza_map[stanza]:
                        i = 0

                        # Scan for tokens first
                        while True:
                            if 'token.{}.token'.format(i) in self._confDict[global_stanza]:
                                token = self._confDict[global_stanza].get('token.{}.token'.format(i))
                                replacement = self._confDict[global_stanza].get('token.{}.replacement'.format(i))
                                replacementType = self._confDict[global_stanza].get(
                                    'token.{}.replacementType'.format(i))

                                last_token_number += 1
                                if token:
                                    k = 'token.{}.token'.format(last_token_number)
                                    v = token
                                    kv_pair_items.append((k, v))
                                if replacement:
                                    k = 'token.{}.replacement'.format(last_token_number)
                                    v = replacement
                                    kv_pair_items.append((k, v))
                                if replacementType:
                                    k = 'token.{}.replacementType'.format(last_token_number)
                                    v = replacementType
                                    kv_pair_items.append((k, v))

                                i += 1
                            else:
                                break

                        keys = settings.keys()
                        for k, v in self._confDict[global_stanza].items():
                            if 'token' not in k and k not in keys:
                                kv_pair_items.append((k, v))

                for key, value in kv_pair_items:
                    oldvalue = value
                    try:
                        value = self._validateSetting(stanza, key, value)
                    except ValueError:
                        # If we're improperly formatted, skip to the next item
                        continue
                    # If we're a tuple, then this must be a token
                    if type(value) == tuple:
                        # Token indices could be out of order, so we must check to
                        # see whether we have enough items in the list to update the token
                        # In general this will keep growing the list by whatever length we need
                        if (key.find("host.") > -1):
                            # logger.info("hostToken.{} = {}".format(value[1],oldvalue))
                            if not isinstance(s.hostToken, Token):
                                s.hostToken = Token(s)
                                # default hard-coded for host replacement
                                s.hostToken.replacementType = 'file'
                            setattr(s.hostToken, value[0], oldvalue)
                        else:
                            if len(s.tokens) <= value[0]:
                                x = (value[0] + 1) - len(s.tokens)
                                s.tokens.extend([None for num in xrange(0, x)])
                            if not isinstance(s.tokens[value[0]], Token):
                                s.tokens[value[0]] = Token(s)
                            # logger.info("token[{}].{} = {}".format(value[0],value[1],oldvalue))
                            setattr(s.tokens[value[0]], value[1], oldvalue)
                    elif key == 'eai:acl':
                        setattr(s, 'app', value['app'])
                    else:
                        setattr(s, key, value)
                        # 6/22/12 CS Need a way to show a setting was set by the original
                        # config read
                        s._lockedSettings.append(key)
                        # logger.debug("Appending '%s' to locked settings for sample '%s'" % (key, s.name))

                # Validate all the tokens are fully setup, can't do this in _validateSettings
                # because they come over multiple lines
                # Don't error out at this point, just log it and remove the token and move on
                deleteidx = []
                for i in xrange(0, len(s.tokens)):
                    t = s.tokens[i]
                    # If the index doesn't exist at all
                    if t is None:
                        logger.error("Token at index %s invalid" % i)
                        # Can't modify list in place while we're looping through it
                        # so create a list to remove later
                        deleteidx.append(i)
                    elif t.token is None or t.replacementType is None or t.replacement is None:
                        logger.error("Token at index %s invalid" % i)
                        deleteidx.append(i)
                newtokens = []
                for i in xrange(0, len(s.tokens)):
                    if i not in deleteidx:
                        newtokens.append(s.tokens[i])
                s.tokens = newtokens

                # Must have eai:acl key to determine app name which determines where actual files are
                if s.app is None:
                    logger.error("App not set for sample '%s' in stanza '%s'" % (s.name, stanza))
                    raise ValueError("App not set for sample '%s' in stanza '%s'" % (s.name, stanza))
                # Set defaults for items not included in the config file
                for setting in self._defaultableSettings:
                    if not hasattr(s, setting) or getattr(s, setting) is None:
                        setattr(s, setting, getattr(self, setting, None))

                # Append to temporary holding list
                if not s.disabled:
                    s._priority = len(tempsamples) + 1
                    tempsamples.append(s)

        # 6/22/12 CS Rewriting the config matching code yet again to handling flattening better.
        # In this case, we're now going to match all the files first, create a sample for each of them
        # and then take the match from the sample seen last in the config file, and apply settings from
        # every other match to that one.
        for s in tempsamples:
            # Now we need to match this up to real files.  May generate multiple copies of the sample.
            foundFiles = []

            # 1/5/14 Adding a config setting to override sample directory, primarily so I can put tests in their own
            # directories
            if s.sampleDir is None:
                logger.debug("Sample directory not specified in config, setting based on standard")
                if self.splunkEmbedded and not STANDALONE:
                    s.sampleDir = os.path.normpath(
                        os.path.join(self.grandparentdir, os.path.pardir, os.path.pardir, os.path.pardir, s.app, self.DEFAULT_SAMPLE_DIR))
                else:
                    # 2/1/15 CS  Adding support for looking for samples based on the config file specified on
                    # the command line.
                    if self.configfile:
                        base_dir = os.path.dirname(self.configfile) if os.path.isdir(self.configfile) else os.path.dirname(os.path.dirname(self.configfile))
                        s.sampleDir = os.path.join(base_dir, self.DEFAULT_SAMPLE_DIR)
                    else:
                        s.sampleDir = os.path.join(os.getcwd(), self.DEFAULT_SAMPLE_DIR)
                        if not os.path.exists(s.sampleDir):
                            # use the prebuilt sample dirs as the last choice
                            if not os.path.exists(s.sampleDir):
                                newSampleDir = os.path.join(self.grandparentdir, self.DEFAULT_SAMPLE_DIR)
                                logger.error(
                                    "Path not found for samples '%s', trying '%s'" % (s.sampleDir, newSampleDir))
                                s.sampleDir = newSampleDir
            else:
                if not os.path.isabs(s.sampleDir):
                    # relative path use the conffile dir as the base dir
                    logger.debug("Sample directory specified in config, checking for relative")
                    base_path = self.configfile if os.path.isdir(self.configfile) else os.path.dirname(self.configfile)
                    s.sampleDir = os.path.join(base_path, s.sampleDir)
                # do nothing when sampleDir is absolute path

            # 2/1/15 CS Adding support for command line options, specifically running a single sample
            # from the command line
                self.run_sample = True
                if self.run_sample:
                    # Name doesn't match, disable
                    # if s.name != self.run_sample:
                    #     logger.debug("Disabling sample '%s' because of command line override" % s.name)
                    #     s.disabled = True
                    # # Name matches
                    # else:
                    #     logger.debug("Sample '%s' selected from command line" % s.name)
                    # Also, can't backfill search if we don't know how to talk to Splunk
                    s.backfillSearch = None
                    s.backfillSearchUrl = None
                    # Since the user is running this for debug output, lets assume that they
                    # always want to see output
                    self.maxIntervalsBeforeFlush = 1
                    s.maxIntervalsBeforeFlush = 1
                    s.maxQueueLength = s.maxQueueLength or 1
                    logger.debug(
                        "Sample '%s' setting maxQueueLength to '%s' from command line" % (s.name, s.maxQueueLength))

                    if self.override_outputter:
                        logger.debug(
                            "Sample '%s' setting output to '%s' from command line" % (s.name, self.override_outputter))
                        s.outputMode = self.override_outputter

                    if self.override_count:
                        logger.debug("Overriding count to '%d' for sample '%s'" % (self.override_count, s.name))
                        s.count = self.override_count
                        # If we're specifying a count, turn off backfill
                        s.backfill = None

                    if self.override_interval:
                        logger.debug(
                            "Overriding interval to '%d' for sample '%s'" % (self.override_interval, s.name))
                        s.interval = self.override_interval

                    if self.override_backfill:
                        logger.debug(
                            "Overriding backfill to '%s' for sample '%s'" % (self.override_backfill, s.name))
                        s.backfill = self.override_backfill.lstrip()

                    if self.override_end:
                        logger.debug("Overriding end to '%s' for sample '%s'" % (self.override_end, s.name))
                        s.end = self.override_end.lstrip()

                    if s.mode == 'replay' and not s.end:
                        s.end = 1

            # Now that we know where samples will be written,
            # Loop through tokens and load state for any that are integerid replacementType
            for token in s.tokens:
                if token.replacementType == 'integerid':
                    try:
                        stateFile = open(os.path.join(s.sampleDir, 'state.' + urllib.pathname2url(token.token)), 'rU')
                        token.replacement = stateFile.read()
                        stateFile.close()
                    # The file doesn't exist, use the default value in the config
                    except (IOError, ValueError):
                        token.replacement = token.replacement

            if os.path.exists(s.sampleDir):
                sampleFiles = os.listdir(s.sampleDir)
                for sample in sampleFiles:
                    results = re.match(s.name, sample)
                    if results:
                        logger.debug("Matched file {0} with sample name {1}".format(results.group(0), s.name))
                        samplePath = os.path.join(s.sampleDir, sample)
                        if os.path.isfile(samplePath):
                            logger.debug(
                                "Found sample file '%s' for app '%s' using config '%s' with priority '%s'" %
                                (sample, s.app, s.name, s._priority) + "; adding to list")
                            foundFiles.append(samplePath)

            # If we didn't find any files, log about it
            if len(foundFiles) == 0:
                logger.warning("Sample '%s' in config but no matching files" % s.name)
                # 1/23/14 Change in behavior, go ahead and add the sample even if we don't find a file
                # 9/16/15 Change bit us, now only append if we're a generator other than the two stock generators
                if not s.disabled and not (s.generator == "default" or s.generator == "replay"):
                    tempsamples2.append(s)

            for f in foundFiles:
                if s.name in f:
                    news = s
                    news.filePath = f
                    # 12/3/13 CS TODO These are hard coded but should be handled via the modular config system
                    # Maybe a generic callback for all plugins which will modify sample based on the filename
                    # found?
                    # Override <SAMPLE> with real name
                    if s.outputMode == 'spool' and s.spoolFile == self.spoolFile:
                        news.spoolFile = f.split(os.sep)[-1]
                    if s.outputMode == 'file' and s.fileName is None:
                        if self.fileName:
                            news.fileName = self.fileName
                            logger.debug("Found a global fileName {}. Setting the sample fileName.".format(self.fileName))
                        elif s.spoolFile == self.spoolFile:
                            news.fileName = os.path.join(s.spoolDir, f.split(os.sep)[-1])
                        elif s.spoolFile is not None:
                            news.fileName = os.path.join(s.spoolDir, s.spoolFile)
                    # Override s.name with file name.  Usually they'll match unless we've been a regex
                    # 6/22/12 CS Save original name for later matching
                    news._origName = news.name
                    news.name = f.split(os.sep)[-1]
                    if not news.disabled:
                        tempsamples2.append(news)
                    else:
                        logger.info("Sample '%s' for app '%s' is marked disabled." % (news.name, news.app))

        # Clear tempsamples, we're going to reuse it
        tempsamples = []

        # We're now going go through the samples and attempt to apply any matches from other stanzas
        # This allows us to specify a wildcard at the beginning of the file and get more specific as we go on

        # Loop through all samples, create a list of the master samples
        for s in tempsamples2:
            foundHigherPriority = False
            othermatches = []
            # If we're an exact match, don't go looking for higher priorities
            if not s.name == s._origName:
                for matchs in tempsamples2:
                    if matchs.filePath == s.filePath and s._origName != matchs._origName:
                        # We have a match, now determine if we're higher priority or not
                        # If this is a longer pattern or our match is an exact match
                        # then we're a higher priority match
                        if len(matchs._origName) > len(s._origName) or matchs.name == matchs._origName:
                            # if s._priority < matchs._priority:
                            logger.debug("Found higher priority for sample '%s' with priority '%s' from sample " %
                                              (s._origName, s._priority) +
                                              "'%s' with priority '%s'" % (matchs._origName, matchs._priority))
                            foundHigherPriority = True
                            break
                        else:
                            othermatches.append(matchs._origName)
            if not foundHigherPriority:
                logger.debug(
                    "Chose sample '%s' from samples '%s' for file '%s'" % (s._origName, othermatches, s.name))
                tempsamples.append(s)

        # Now we have two lists, tempsamples which contains only the highest priority matches, and
        # tempsamples2 which contains all matches.  We need to now flatten the config in order to
        # take all the configs which might match.

        # Reversing tempsamples2 in order to look from the bottom of the file towards the top
        # We want entries lower in the file to override entries higher in the file

        tempsamples2.reverse()

        # Loop through all samples
        for s in tempsamples:
            # Now loop through the samples we've matched with files to see if we apply to any of them
            for overridesample in tempsamples2:
                if s.filePath == overridesample.filePath and s._origName != overridesample._origName:
                    # Now we're going to loop through all valid settings and set them assuming
                    # the more specific object that we've matched doesn't already have them set
                    for settingname in self._validSettings:
                        if settingname not in ['eai:acl', 'blacklist', 'disabled', 'name']:
                            # 7/16/14 CS For some reason default settings are suddenly erroring
                            # not sure why, but lets just move on
                            try:
                                sourcesetting = getattr(overridesample, settingname)
                                destsetting = getattr(s, settingname)
                                # We want to check that the setting we're copying to hasn't been
                                # set, otherwise keep the more specific value
                                # 6/22/12 CS Added support for non-overrideable (locked) settings
                                # logger.debug("Locked settings: %s" % pprint.pformat(matchs._lockedSettings))
                                # if settingname in matchs._lockedSettings:
                                #     logger.debug("Matched setting '%s' in sample '%s' lockedSettings"
                                #         % (settingname, matchs.name))
                                if (destsetting is None or destsetting == getattr(self, settingname)) \
                                        and sourcesetting is not None and sourcesetting != getattr(self, settingname) \
                                        and settingname not in s._lockedSettings:
                                    logger.debug("Overriding setting '%s' with value '%s' from sample '%s' to " %
                                                      (settingname, sourcesetting, overridesample._origName) +
                                                      "sample '%s' in app '%s'" % (s.name, s.app))
                                    setattr(s, settingname, sourcesetting)
                            except AttributeError:
                                pass

                    # Now prepend all the tokens to the beginning of the list so they'll be sure to match first
                    newtokens = s.tokens
                    # logger.debug("Prepending tokens from sample '%s' to sample '%s' in app '%s': %s" \
                    #             % (overridesample._origName, s.name, s.app, pprint.pformat(newtokens)))
                    newtokens.extend(overridesample.tokens)
                    s.tokens = newtokens

        # We've added replay mode, so lets loop through the samples again and set the earliest and latest
        # settings for any samples that were set to replay mode
        for s in tempsamples:
            # We've added replay mode, so lets loop through the samples again and set the earliest and latest
            # settings for any samples that were set to replay mode
            if s.perDayVolume:
                logger.info(
                    "Stanza contains per day volume, changing rater and generator to perdayvolume instead of count")
                s.rater = 'perdayvolume'
                s.count = 1
                s.generator = 'perdayvolumegenerator'
            elif s.mode == 'replay':
                logger.debug("Setting defaults for replay samples")
                s.earliest = 'now' if not s.earliest else s.earliest
                s.latest = 'now' if not s.latest else s.latest
                s.count = 1
                s.randomizeCount = None
                s.hourOfDayRate = None
                s.dayOfWeekRate = None
                s.minuteOfHourRate = None
                s.interval = 0 if not s.interval else s.interval
                # 12/29/13 CS Moved replay generation to a new replay generator plugin
                s.generator = 'replay'

        self.samples = tempsamples
        self._confDict = None

        # 9/2/15 Try autotimestamp values, add a timestamp if we find one
        for s in self.samples:
            if s.generator == 'default':
                s.loadSample()

                if s.autotimestamp:
                    at = self.autotimestamps
                    line_puncts = []

                    # Check for _time field, if it exists, add a timestamp to support it
                    if len(s.sampleDict) > 0:
                        if '_time' in s.sampleDict[0]:
                            logger.debug("Found _time field, checking if default timestamp exists")
                            t = Token()
                            t.token = "\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}"
                            t.replacementType = "timestamp"
                            t.replacement = "%Y-%m-%dT%H:%M:%S.%f"

                            found_token = False
                            # Check to see if we're already a token
                            for st in s.tokens:
                                if st.token == t.token and st.replacement == t.replacement:
                                    found_token = True
                                    break
                            if not found_token:
                                logger.debug("Found _time adding timestamp to support")
                                s.tokens.append(t)
                            else:
                                logger.debug("_time field exists and timestamp already configured")

                    for e in s.sampleDict:
                        # Run punct against the line, make sure we haven't seen this same pattern
                        # Not totally exact but good enough for Rock'N'Roll
                        p = self._punct(e['_raw'])
                        logger.debug("Got punct of '%s' for event '%s'" % (p, e[s.timeField]))
                        if p not in line_puncts:
                            for x in at:
                                t = Token()
                                t.token = x[0]
                                t.replacementType = "timestamp"
                                t.replacement = x[1]

                                try:
                                    logger.debug(
                                        "Trying regex '%s' for format '%s' on '%s'" % (x[0], x[1], e[s.timeField]))
                                    ts = s.getTSFromEvent(e['_raw'], t)
                                    if type(ts) == datetime.datetime:
                                        found_token = False
                                        # Check to see if we're already a token
                                        for st in s.tokens:
                                            if st.token == t.token and st.replacement == t.replacement:
                                                found_token = True
                                                break
                                        if not found_token:
                                            logger.debug(
                                                "Found timestamp '%s', extending token with format '%s'" % (x[0], x[1]))
                                            s.tokens.append(t)
                                            # Drop this pattern from ones we try in the future
                                            at = [z for z in at if z[0] != x[0]]
                                        break
                                except ValueError:
                                    pass
                        line_puncts.append(p)
        logger.debug("Finished parsing")