def __init__(self, sample, output_counter=None): # Override maxQueueLength to EventPerKey so that each flush # will generate one aws key if sample.awsS3EventPerKey: sample.maxQueueLength = sample.awsS3EventPerKey OutputPlugin.__init__(self, sample, output_counter) if not boto_imported: logger.error("There is no boto3 or botocore library available") return # disable any "requests" warnings requests.packages.urllib3.disable_warnings() # Bind passed in samples to the outputter. self.awsS3compressiontype = (sample.awsS3CompressionType if hasattr(sample, "awsS3CompressionType") and sample.awsS3CompressionType else None) self.awsS3eventtype = (sample.awsS3EventType if hasattr(sample, "awsS3EventType") and sample.awsS3EventType else "syslog") self.awsS3objectprefix = (sample.awsS3ObjectPrefix if hasattr(sample, "awsS3ObjectPrefix") and sample.awsS3ObjectPrefix else "") self.awsS3objectsuffix = (sample.awsS3ObjectSuffix if hasattr(sample, "awsS3ObjectSuffix") and sample.awsS3ObjectSuffix else "") self.awsS3bucketname = sample.awsS3BucketName logger.debug("Setting up the connection pool for %s in %s" % (self._sample.name, self._app)) self._client = None self._createConnections(sample) logger.debug("Finished init of awsS3 plugin.")
def flush(self, q): if len(q) > 0: logger.debug( "Flushing output for sample '%s' in app '%s' for queue '%s'" % (self._sample.name, self._app, self._sample.source)) # Loop through all the messages and build the long string, write once for each flush # This may cause the file exceed the maxFileBytes a little bit but will greatly improve the performance try: for metamsg in q: msg = metamsg.get("_raw") if not msg: continue if msg[-1] != "\n": msg += "\n" if self._fileLength + len(msg) <= self._fileMaxBytes: self._fileHandle.write(msg) self._fileLength += len(msg) else: self._fileHandle.flush() self._fileHandle.close() if os.path.exists(self._file + "." + str(self._fileBackupFiles)): logger.debug("File Output: Removing file: %s" % self._file + "." + str(self._fileBackupFiles)) os.unlink(self._file + "." + str(self._fileBackupFiles)) for x in range(1, int(self._fileBackupFiles))[::-1]: logger.debug("File Output: Checking for file: %s" % self._file + "." + str(x)) if os.path.exists(self._file + "." + str(x)): logger.debug( "File Output: Renaming file %s to %s" % ( self._file + "." + str(x), self._file + "." + str(x + 1), )) os.rename( self._file + "." + str(x), self._file + "." + str(x + 1), ) os.rename(self._file, self._file + ".1") self._fileHandle = open(self._file, "w") self._fileHandle.write(msg) self._fileLength = len(msg) except IndexError: logger.warning( "IndexError when writting for app '%s' sample '%s'" % (self._app, self._sample.name)) if not self._fileHandle.closed: self._fileHandle.flush() logger.debug("Queue for app '%s' sample '%s' written" % (self._app, self._sample.name)) self._fileHandle.close()
def build_events(self, eventsDict, startTime, earliest, latest, ignore_tokens=False): """Ready events for output by replacing tokens and updating the output queue""" # Replace tokens first so that perDayVolume evaluates the correct event length send_objects = self.replace_tokens(eventsDict, earliest, latest, ignore_tokens=ignore_tokens) try: self._out.bulksend(send_objects) self._sample.timestamp = None except Exception as e: logger.exception("Exception {} happened.".format(type(e))) raise e try: # TODO: Change this logic so that we don't lose all events if an exception is hit (try/except/break?) endTime = datetime.datetime.now() timeDiff = endTime - startTime timeDiffFrac = "%d.%06d" % (timeDiff.seconds, timeDiff.microseconds) logger.debug("Interval complete, flushing feed") self._out.flush(endOfInterval=True) logger.debug( "Generation of sample '%s' in app '%s' completed in %s seconds." % (self._sample.name, self._sample.app, timeDiffFrac)) except Exception as e: logger.exception("Exception {} happened.".format(type(e))) raise e
def __init__(self, time, sample=None, config=None, genqueue=None, outputqueue=None, loggingqueue=None): # Logger already setup by config, just get an instance # setup default options self.profiler = config.profiler self.config = config self.sample = sample self.end = getattr(self.sample, "end", -1) self.endts = getattr(self.sample, "endts", None) self.generatorQueue = genqueue self.outputQueue = outputqueue self.time = time self.stopping = False self.countdown = 0 self.executions = 0 self.interval = getattr(self.sample, "interval", config.interval) logger.debug('Initializing timer for %s' % sample.name if sample is not None else "None") # load plugins if self.sample is not None: rater_class = self.config.getPlugin('rater.' + self.sample.rater, self.sample) self.rater = rater_class(self.sample) self.generatorPlugin = self.config.getPlugin('generator.' + self.sample.generator, self.sample) self.outputPlugin = self.config.getPlugin('output.' + self.sample.outputMode, self.sample) if self.sample.timeMultiple < 0: logger.error("Invalid setting for timeMultiple: {}, value should be positive".format( self.sample.timeMultiple)) elif self.sample.timeMultiple != 1: self.interval = self.sample.interval logger.debug("Adjusting interval {} with timeMultiple {}, new interval: {}".format( self.sample.interval, self.sample.timeMultiple, self.interval)) logger.info( "Start '%s' generatorWorkers for sample '%s'" % (self.sample.config.generatorWorkers, self.sample.name))
def __init__(self, sample): super(PerDayVolume, self).__init__(sample) # Logger already setup by config, just get an instance logger.debug("Starting PerDayVolumeRater for %s" % sample.name if sample is not None else "None") self.previous_count_left = 0 self.raweventsize = 0
def getTSFromEvent(self, event, passed_token=None): currentTime = None formats = [] # JB: 2012/11/20 - Can we optimize this by only testing tokens of type = *timestamp? # JB: 2012/11/20 - Alternatively, documentation should suggest putting timestamp as token.0. if passed_token is not None: tokens = [passed_token] else: tokens = self.tokens for token in tokens: try: formats.append(token.token) # logger.debug("Searching for token '%s' in event '%s'" % (token.token, event)) results = token._search(event) if results: timeFormat = token.replacement group = 0 if len(results.groups()) == 0 else 1 timeString = results.group(group) # logger.debug("Testing '%s' as a time string against '%s'" % (timeString, timeFormat)) if timeFormat == "%s": ts = float(timeString) if len(timeString) < 10 else float(timeString) \ / (10**(len(timeString) - 10)) # logger.debug("Getting time for timestamp '%s'" % ts) currentTime = datetime.datetime.fromtimestamp(ts) else: # logger.debug("Getting time for timeFormat '%s' and timeString '%s'" % # (timeFormat, timeString)) # Working around Python bug with a non thread-safe strptime. Randomly get AttributeError # when calling strptime, so if we get that, try again while currentTime is None: try: # Checking for timezone adjustment if timeString[-5] == "+": timeString = timeString[:-5] currentTime = datetime.datetime.strptime(timeString, timeFormat) except AttributeError: pass logger.debug("Match '%s' Format '%s' result: '%s'" % (timeString, timeFormat, currentTime)) if type(currentTime) == datetime.datetime: break except ValueError: logger.warning("Match found ('%s') but time parse failed. Timeformat '%s' Event '%s'" % (timeString, timeFormat, event)) if type(currentTime) != datetime.datetime: # Total fail if passed_token is None: # If we're running for autotimestamp don't log error logger.warning( "Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event)) raise ValueError("Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event)) # Check to make sure we parsed a year if currentTime.year == 1900: currentTime = currentTime.replace(year=self.now().year) # 11/3/14 CS So, this is breaking replay mode, and getTSFromEvent is only used by replay mode # but I don't remember why I added these two lines of code so it might create a regression. # Found the change on 6/14/14 but no comments as to why I added these two lines. # if self.timestamp == None: # self.timestamp = currentTime return currentTime
def __init__(self, sample): logger.debug( "Starting ConfigRater for %s" % sample.name if sample is not None else "None" ) self._sample = sample self._generatorWorkers = self._sample.config.generatorWorkers
def __init__(self, sample, output_counter=None): self._app = sample.app self._sample = sample self._outputMode = sample.outputMode self.events = None logger.debug("Starting OutputPlugin for sample '%s' with output '%s'" % (self._sample.name, self._sample.outputMode)) self._queue = deque([]) self.output_counter = output_counter
def flush(self, events): if not self.scsEndPoint: if getattr(self.config, "scsEndPoint", None): self.scsEndPoint = self.config.scsEndPoint else: raise NoSCSEndPoint( "Please specify your REST endpoint for the SCS tenant") if not self.scsAccessToken: if getattr(self.config, "scsAccessToken", None): self.scsAccessToken = self.config.scsAccessToken else: raise NoSCSAccessToken( "Please specify your REST endpoint access token for the SCS tenant" ) if self.scsClientId and self.scsClientSecret: logger.info( "Both scsClientId and scsClientSecret are supplied." + " We will renew the expired token using these credentials.") self.scsRenewToken = True else: if getattr(self.config, "scsClientId", None) and getattr( self.config, "scsClientSecret", None): self.scsClientId = self.config.scsClientId self.scsClientSecret = self.config.scsClientSecret logger.info( "Both scsClientId and scsClientSecret are supplied." + " We will renew the expired token using these credentials." ) self.scsRenewToken = True else: self.scsRenewToken = False self.header = { "Authorization": "Bearer {0}".format(self.scsAccessToken), "Content-Type": "application/json", } self.accessTokenExpired = False self.tokenRenewEndPoint = "https://auth.scp.splunk.com/token" self.tokenRenewBody = { "client_id": self.scsClientId, "client_secret": self.scsClientSecret, "grant_type": "client_credentials", } for i in range(self.scsRetryNum + 1): logger.debug("Sending data to the scs endpoint. Num:{0}".format(i)) self._sendHTTPEvents(events) if not self.checkResults(): if self.accessTokenExpired and self.scsRenewToken: self.renewAccessToken() self.active_sessions = [] else: break
def flush(self, endOfInterval=False): """ Flushes output buffer, unless endOfInterval called, and then only flush if we've been called more than maxIntervalsBeforeFlush tunable. """ # TODO: Fix interval flushing somehow with a queue, not sure I even want to support this feature anymore. '''if endOfInterval: logger.debugv("Sample calling flush, checking increment against maxIntervalsBeforeFlush") c.intervalsSinceFlush[self._sample.name].increment() if c.intervalsSinceFlush[self._sample.name].value() >= self._sample.maxIntervalsBeforeFlush: logger.debugv("Exceeded maxIntervalsBeforeFlush, flushing") flushing = True c.intervalsSinceFlush[self._sample.name].clear() else: logger.debugv("Not enough events to flush, passing flush routine.") else: logger.debugv("maxQueueLength exceeded, flushing") flushing = True''' # TODO: This is set this way just for the time being while I decide if we want this feature. flushing = True if flushing: q = self._queue logger.debug("Flushing queue for sample '%s' with size %d" % (self._sample.name, len(q))) self._queue = [] outputer = self.outputPlugin(self._sample, self.output_counter) outputer.updateConfig(self.config) outputer.set_events(q) # When an outputQueue is used, it needs to run in a single threaded nature which requires to be put back # into the outputqueue so a single thread worker can execute it. When an outputQueue is not used, it can be # ran by multiple processes or threads. Therefore, no need to put the outputer back into the Queue. Just # execute it. # if outputPlugin must be used for useOutputQueue, use outputQueue regardless of user config useOutputQueue: if self.outputPlugin.useOutputQueue or self.config.useOutputQueue: try: self.outputQueue.put(outputer) except Full: logger.warning("Output Queue full, looping again") else: if self.config.splunkEmbedded: tmp = [len(s['_raw']) for s in q] if len(tmp) > 0: metrics_logger.info({ 'timestamp': datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'), 'sample': self._sample.name, 'events': len(tmp), 'bytes': sum(tmp) }) tmp = None outputer.run()
def run(self, output_counter=None): if output_counter is not None and hasattr(self.config, 'outputCounter') and self.config.outputCounter: # Use output_counter to calculate throughput self._out.setOutputCounter(output_counter) self.gen(count=self.count, earliest=self.start_time, latest=self.end_time, samplename=self._sample.name) # TODO: Make this some how handle an output queue and support intervals and a master queue # Just double check to see if there's something in queue to flush out at the end of run if len(self._out._queue) > 0: logger.debug("Queue is not empty, flush out at the end of each run") self._out.flush()
def update_throughput(self, timestamp): # B/s, count/s delta_time = timestamp - self.current_time self.throughput_volume = self.event_size_1_min / (delta_time) self.throughput_count = self.event_count_1_min / (delta_time) self.current_time = timestamp self.event_count_1_min = 0 self.event_size_1_min = 0 logger.debug("Current throughput is {} B/s, {} count/s".format( self.throughput_volume, self.throughput_count))
def predict_event_size(self): try: self.sample.loadSample() logger.debug("File sample loaded successfully.") except TypeError: logger.debug("Error loading sample file for sample '%s'" % self.sample.name) return total_len = sum([len(e['_raw']) for e in self.sample.sampleDict]) sample_count = len(self.sample.sampleDict) if sample_count == 0: return 0 else: return total_len/sample_count
def createConnections(self): self.serverPool = [] if self.httpeventServers: for server in self.httpeventServers.get("servers"): if not server.get("address"): logger.error( "requested a connection to a httpevent server, but no address specified for sample %s" % self._sample.name) raise ValueError( "requested a connection to a httpevent server, but no address specified for sample %s" % self._sample.name) if not server.get("port"): logger.error( "requested a connection to a httpevent server, but no port specified for server %s" % server) raise ValueError( "requested a connection to a httpevent server, but no port specified for server %s" % server) if not server.get("key"): logger.error( "requested a connection to a httpevent server, but no key specified for server %s" % server) raise ValueError( "requested a connection to a httpevent server, but no key specified for server %s" % server) if not ((server.get("protocol") == "http") or (server.get("protocol") == "https")): logger.error( "requested a connection to a httpevent server, but no protocol specified for server %s" % server) raise ValueError( "requested a connection to a httpevent server, but no protocol specified for server %s" % server) logger.debug( "Validation Passed, Creating a requests object for server: %s" % server.get("address")) setserver = {} setserver["url"] = "%s://%s:%s/services/collector" % ( server.get("protocol"), server.get("address"), server.get("port"), ) setserver["header"] = "Splunk %s" % server.get("key") logger.debug("Adding server set to pool, server: %s" % setserver) self.serverPool.append(setserver) else: raise NoServers( "outputMode %s but httpeventServers not specified for sample %s" % (self.name, self._sample.name))
def checkResults(self): for session in self.active_sessions: response = session.result() if response.status_code == 401 and "Invalid or Expired Bearer Token" in response.text: logger.error("scsAccessToken is invalid or expired") self.accessTokenExpired = True return False elif response.status_code != 200: logger.error( f"Data transmisison failed with {response.status_code} and {response.text}" ) return False logger.debug(f"Data transmission successful") return True
def multi_queue_it(self, count): logger.info("Entering multi-processing division of sample") numberOfWorkers = self.config.generatorWorkers logger.debug("Number of Workers: {0}".format(numberOfWorkers)) # this is a redundant check, but will prevent some missed call to multi_queue without a valid setting if bool(self.sample.splitSample): # if split = 1, then they want to divide by number of generator workers, else use the splitSample if self.sample.splitSample == 1: logger.debug("SplitSample = 1, using all availible workers") targetWorkersToUse = numberOfWorkers else: logger.debug("SplitSample != 1, using {0} workers.".format( self.sample.splitSample)) targetWorkersToUse = self.sample.splitSample else: logger.debug( "SplitSample set to disable multithreading for just this sample." ) self.single_queue_it() currentWorkerPrepCount = 0 remainingCount = count targetLoopCount = int(count) / targetWorkersToUse while currentWorkerPrepCount < targetWorkersToUse: currentWorkerPrepCount = currentWorkerPrepCount + 1 # check if this is the last loop, if so, add in the remainder count if currentWorkerPrepCount < targetWorkersToUse: remainingCount = count - targetLoopCount else: targetLoopCount = remainingCount self.single_queue_it(targetLoopCount)
def createConnections(self): self.serverPool = [] if self.httpeventServers: for server in self.httpeventServers.get('servers'): if not server.get('address'): logger.error( 'requested a connection to a httpevent server, but no address specified for sample %s' % self._sample.name) raise ValueError( 'requested a connection to a httpevent server, but no address specified for sample %s' % self._sample.name) if not server.get('port'): logger.error( 'requested a connection to a httpevent server, but no port specified for server %s' % server) raise ValueError( 'requested a connection to a httpevent server, but no port specified for server %s' % server) if not server.get('key'): logger.error( 'requested a connection to a httpevent server, but no key specified for server %s' % server) raise ValueError( 'requested a connection to a httpevent server, but no key specified for server %s' % server) if not ((server.get('protocol') == 'http') or (server.get('protocol') == 'https')): logger.error( 'requested a connection to a httpevent server, but no protocol specified for server %s' % server) raise ValueError( 'requested a connection to a httpevent server, but no protocol specified for server %s' % server) logger.debug( "Validation Passed, Creating a requests object for server: %s" % server.get('address')) setserver = {} setserver['url'] = "%s://%s:%s/services/collector" % ( server.get('protocol'), server.get('address'), server.get('port')) setserver['header'] = "Splunk %s" % server.get('key') logger.debug("Adding server set to pool, server: %s" % setserver) self.serverPool.append(setserver) else: raise NoServers( 'outputMode %s but httpeventServers not specified for sample %s' % (self.name, self._sample.name))
def setOutputMetadata(self, event): if self._sample.sampletype == 'csv' and (event['index'] != self._sample.index or event['host'] != self._sample.host or event['source'] != self._sample.source or event['sourcetype'] != self._sample.sourcetype): self._sample.index = event['index'] self._sample.host = event['host'] # Allow randomizing the host: if self._sample.hostToken: self.host = self._sample.hostToken.replace(self.host) self._sample.source = event['source'] self._sample.sourcetype = event['sourcetype'] logger.debug("Setting CSV parameters. index: '%s' host: '%s' source: '%s' sourcetype: '%s'" % (self._sample.index, self._sample.host, self._sample.source, self._sample.sourcetype))
def flush(self, q): if len(q) > 0: logger.debug( "Flushing output for sample '%s' in app '%s' for queue '%s'" % (self._sample.name, self._app, self._sample.source)) # Keep trying to open destination file as it might be touched by other processes data = ''.join(event['_raw'] for event in q if event.get('_raw')) while True: try: with open(self.spoolPath, 'a') as dst: dst.write(data) break except Exception as e: logger.error(str(e)) time.sleep(0.1) logger.debug("Queue for app '%s' sample '%s' written" % (self._app, self._sample.name))
def rate(self): self.sample.count = int(self.sample.count) # Let generators handle infinite count for themselves if self.sample.count == -1 and self.sample.generator == "default": if not self.sample.sampleDict: logger.error( "No sample found for default generator, cannot generate events" ) self.sample.count = len(self.sample.sampleDict) count = self.sample.count rateFactor = self.adjust_rate_factor() ret = int(round(count * rateFactor, 0)) if rateFactor != 1.0: logger.debug( "Original count: %s Rated count: %s Rate factor: %s" % (count, ret, rateFactor) ) return ret
def flush(self, endOfInterval=False): """ Flushes output buffer, unless endOfInterval called, and then only flush if we've been called more than maxIntervalsBeforeFlush tunable. """ flushing = True if flushing: q = self._queue logger.debug("Flushing queue for sample '%s' with size %d" % (self._sample.name, len(q))) self._queue = [] outputer = self.outputPlugin(self._sample, self.output_counter) outputer.updateConfig(self.config) outputer.set_events(q) # When an outputQueue is used, it needs to run in a single threaded nature which requires to be put back # into the outputqueue so a single thread worker can execute it. When an outputQueue is not used, it can be # ran by multiple processes or threads. Therefore, no need to put the outputer back into the Queue. Just # execute it. # if outputPlugin must be used for useOutputQueue, use outputQueue regardless of user config useOutputQueue: if self.outputPlugin.useOutputQueue or self.config.useOutputQueue: try: self.outputQueue.put(outputer) except Full: logger.warning("Output Queue full, looping again") else: if self.config.splunkEmbedded: tmp = [len(s['_raw']) for s in q] if len(tmp) > 0: metrics_logger.info({ 'timestamp': datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'), 'sample': self._sample.name, 'events': len(tmp), 'bytes': sum(tmp) }) tmp = None outputer.run() q = None
def __init__(self, sample, output_counter=None): OutputPlugin.__init__(self, sample, output_counter) if sample.fileName is None: logger.error( "outputMode file but file not specified for sample %s" % self._sample.name) raise ValueError( "outputMode file but file not specified for sample %s" % self._sample.name) self._file = sample.pathParser(sample.fileName) self._fileMaxBytes = sample.fileMaxBytes self._fileBackupFiles = sample.fileBackupFiles self._fileHandle = open(self._file, "a") self._fileLength = os.stat(self._file).st_size logger.debug( "Configured to log to '%s' with maxBytes '%s' with backupCount '%s'" % (self._file, self._fileMaxBytes, self._fileBackupFiles))
def processSampleLine(self, filehandler): """ Due to a change in python3, utf-8 is now the default trying to read a file. To get around this we need the process loop outside of the filehandler. :param filehandler: :return: """ sampleLines = [] if self.breaker == self.config.breaker: logger.debug("Reading raw sample '%s' in app '%s'" % (self.name, self.app)) sampleLines = filehandler.readlines() # 1/5/14 CS Moving to using only sampleDict and doing the breaking up into events at load time # instead of on every generation else: logger.debug( "Non-default breaker '%s' detected for sample '%s' in app '%s'" % (self.breaker, self.name, self.app)) sampleData = filehandler.read() logger.debug( "Filling array for sample '%s' in app '%s'; sampleData=%s, breaker=%s" % (self.name, self.app, len(sampleData), self.breaker)) try: breakerRE = re.compile(self.breaker, re.M) except: logger.error( "Line breaker '%s' for sample '%s' in app '%s'" " could not be compiled; using default breaker", self.breaker, self.name, self.app, ) self.breaker = self.config.breaker # Loop through data, finding matches of the regular expression and breaking them up into # "lines". Each match includes the breaker itself. extractpos = 0 searchpos = 0 breakerMatch = breakerRE.search(sampleData, searchpos) while breakerMatch: logger.debug("Breaker found at: %d, %d" % (breakerMatch.span()[0], breakerMatch.span()[1])) # Ignore matches at the beginning of the file if breakerMatch.span()[0] != 0: sampleLines.append( sampleData[extractpos:breakerMatch.span()[0]]) extractpos = breakerMatch.span()[0] searchpos = breakerMatch.span()[1] breakerMatch = breakerRE.search(sampleData, searchpos) sampleLines.append(sampleData[extractpos:]) return sampleLines
def earliestTime(self): # First optimization, we need only store earliest and latest # as an offset of now if they're relative times if self._earliestParsed is not None: earliestTime = self.now() - self._earliestParsed logger.debug("Using cached earliest time: %s" % earliestTime) else: if (self.earliest.strip()[0:1] == "+" or self.earliest.strip()[0:1] == "-" or self.earliest == "now"): tempearliest = timeParser(self.earliest, timezone=self.timezone) temptd = self.now(realnow=True) - tempearliest self._earliestParsed = datetime.timedelta( days=temptd.days, seconds=temptd.seconds) earliestTime = self.now() - self._earliestParsed logger.debug( "Calulating earliestParsed as '%s' with earliestTime as '%s' and self.sample.earliest as '%s'" % (self._earliestParsed, earliestTime, tempearliest)) else: earliestTime = timeParser(self.earliest, timezone=self.timezone) logger.debug("earliestTime as absolute time '%s'" % earliestTime) return earliestTime
def _transmitEvents(self, payloadstring): targetServer = [] logger.debug("Transmission called with payloadstring: %s " % payloadstring) if self.httpeventoutputmode == "mirror": targetServer = self.serverPool else: targetServer.append(random.choice(self.serverPool)) for server in targetServer: logger.debug("Selected targetServer object: %s" % targetServer) url = server["url"] headers = {} headers["Authorization"] = server["header"] headers["content-type"] = "application/json" try: payloadsize = len(payloadstring) self.active_sessions.append( self.session.post(url=url, data=payloadstring, headers=headers, verify=False)) except Exception as e: logger.error("Failed for exception: %s" % e) logger.error( "Failed sending events to url: %s sourcetype: %s size: %s" % (url, self.lastsourcetype, payloadsize)) logger.debug( "Failed sending events to url: %s headers: %s payload: %s" % (url, headers, payloadstring)) raise e
def setOutputMetadata(self, event): if self._sample.sampletype == "csv" and ( event["index"] != self._sample.index or event["host"] != self._sample.host or event["source"] != self._sample.source or event["sourcetype"] != self._sample.sourcetype): self._sample.index = event["index"] self._sample.host = event["host"] # Allow randomizing the host: if self._sample.hostToken: self.host = self._sample.hostToken.replace(self.host) self._sample.source = event["source"] self._sample.sourcetype = event["sourcetype"] logger.debug( "Setting CSV parameters. index: '%s' host: '%s' source: '%s' sourcetype: '%s'" % ( self._sample.index, self._sample.host, self._sample.source, self._sample.sourcetype, ))
def send_events(self, send_objects, startTime): """Ready events for output by replacing tokens and updating the output queue""" try: self._out.bulksend(send_objects) self._sample.timestamp = None except Exception as e: logger.exception("Exception {} happened.".format(type(e))) raise e try: # TODO: Change this logic so that we don't lose all events if an exception is hit (try/except/break?) endTime = datetime.datetime.now() timeDiff = endTime - startTime timeDiffFrac = "%d.%06d" % (timeDiff.seconds, timeDiff.microseconds) logger.debug("Interval complete, flushing feed") self._out.flush(endOfInterval=True) logger.debug( "Generation of sample '%s' in app '%s' completed in %s seconds." % (self._sample.name, self._sample.app, timeDiffFrac)) except Exception as e: logger.exception("Exception {} happened.".format(type(e))) raise e
def _createConnections(self, sample): try: if hasattr(sample, "awsKeyId") and hasattr(sample, "awsSecretKey"): self._client = boto3.client( "s3", region_name=sample.awsRegion, aws_access_key_id=sample.awsKeyId, aws_secret_access_key=sample.awsSecretKey, ) if self._client is None: msg = """ [your_eventgen_stanza] awsKeyId = YOUR_ACCESS_KEY awsSecretKey = YOUR_SECRET_KEY """ logger.error( "Failed for init boto3 client: %s, you should define correct 'awsKeyId'\ and 'awsSecretKey' in eventgen conf %s" % msg) raise Exception(msg) else: self._client = boto3.client("s3", region_name=sample.awsRegion) except Exception as e: logger.error("Failed for init boto3 client: exception = %s" % e) raise e # Try list bucket method to validate if the connection works try: self._client.list_buckets() except botocore.exceptions.NoCredentialsError: msg = """ [default] aws_access_key_id = YOUR_ACCESS_KEY aws_secret_access_key = YOUR_SECRET_KEY """ logger.error("Failed for init boto3 client, you should create " "'~/.aws/credentials' with credential info %s" % msg) raise logger.debug("Init conn done, conn = %s" % self._client)
def rate(self): perdayvolume = float(self.sample.perDayVolume) # Convert perdayvolume to bytes from GB perdayvolume = perdayvolume * 1024 * 1024 * 1024 interval = self.sample.interval if self.sample.interval == 0: logger.debug("Running perDayVolume as if for 24hr period.") interval = 86400 logger.debug("Current perDayVolume: %f, Sample interval: %s" % (perdayvolume, interval)) intervalsperday = 86400 / interval perintervalvolume = perdayvolume / intervalsperday count = self.sample.count rateFactor = self.adjust_rate_factor() logger.debug("Size per interval: %s, rate factor to adjust by: %s" % (perintervalvolume, rateFactor)) ret = int(round(perintervalvolume * rateFactor, 0)) if rateFactor != 1.0: logger.debug("Original count: %s Rated count: %s Rate factor: %s" % (count, ret, rateFactor)) logger.debug( "Finished rating, interval: {0}s, generation rate: {1} MB/interval" .format(interval, round((ret / 1024 / 1024), 4))) return ret
def updateConfig(self, config): OutputPlugin.updateConfig(self, config) try: if hasattr(self.config, 'httpeventServers') is False: if hasattr(self._sample, 'httpeventServers'): self.config.httpeventServers = self._sample.httpeventServers else: logger.error( 'outputMode %s but httpeventServers not specified for sample %s' % (self.name, self._sample.name)) raise NoServers( 'outputMode %s but httpeventServers not specified for sample %s' % (self.name, self._sample.name)) # set default output mode to round robin if hasattr( self.config, 'httpeventOutputMode') and self.config.httpeventOutputMode: self.httpeventoutputmode = config.httpeventOutputMode else: if hasattr(self._sample, 'httpeventOutputMode' ) and self._sample.httpeventOutputMode: self.httpeventoutputmode = self._sample.httpeventOutputMode else: self.httpeventoutputmode = 'roundrobin' if hasattr(self.config, 'httpeventMaxPayloadSize' ) and self.config.httpeventMaxPayloadSize: self.httpeventmaxsize = self.config.httpeventMaxPayloadSize else: if hasattr(self._sample, 'httpeventMaxPayloadSize' ) and self._sample.httpeventMaxPayloadSize: self.httpeventmaxsize = self._sample.httpeventMaxPayloadSize else: self.httpeventmaxsize = 10000 logger.debug("Currentmax size: %s " % self.httpeventmaxsize) if isinstance(config.httpeventServers, str): self.httpeventServers = json.loads(config.httpeventServers) else: self.httpeventServers = config.httpeventServers logger.debug("Setting up the connection pool for %s in %s" % (self._sample.name, self._app)) self.createConnections() logger.debug("Pool created.") logger.debug("Finished init of %s plugin." % self.name) except Exception as e: logger.exception(str(e))