def _transmitEvents(self, payloadstring): targetServer = [] logger.debug("Transmission called with payloadstring: %s " % payloadstring) if self.httpeventoutputmode == "mirror": targetServer = self.serverPool else: targetServer.append(random.choice(self.serverPool)) for server in targetServer: logger.debug("Selected targetServer object: %s" % targetServer) url = server["url"] headers = {} headers["Authorization"] = server["header"] headers["content-type"] = "application/json" try: payloadsize = len(payloadstring) self.active_sessions.append( self.session.post(url=url, data=payloadstring, headers=headers, verify=False)) except Exception as e: logger.error("Failed for exception: %s" % e) logger.error( "Failed sending events to url: %s sourcetype: %s size: %s" % (url, self.lastsourcetype, payloadsize)) logger.debug( "Failed sending events to url: %s headers: %s payload: %s" % (url, headers, payloadstring)) raise e
def __init__(self, time, sample=None, config=None, genqueue=None, outputqueue=None, loggingqueue=None): # Logger already setup by config, just get an instance # setup default options self.profiler = config.profiler self.config = config self.sample = sample self.end = getattr(self.sample, "end", -1) self.endts = getattr(self.sample, "endts", None) self.generatorQueue = genqueue self.outputQueue = outputqueue self.time = time self.stopping = False self.countdown = 0 self.executions = 0 self.interval = getattr(self.sample, "interval", config.interval) logger.debug('Initializing timer for %s' % sample.name if sample is not None else "None") # load plugins if self.sample is not None: rater_class = self.config.getPlugin('rater.' + self.sample.rater, self.sample) self.rater = rater_class(self.sample) self.generatorPlugin = self.config.getPlugin('generator.' + self.sample.generator, self.sample) self.outputPlugin = self.config.getPlugin('output.' + self.sample.outputMode, self.sample) if self.sample.timeMultiple < 0: logger.error("Invalid setting for timeMultiple: {}, value should be positive".format( self.sample.timeMultiple)) elif self.sample.timeMultiple != 1: self.interval = self.sample.interval logger.debug("Adjusting interval {} with timeMultiple {}, new interval: {}".format( self.sample.interval, self.sample.timeMultiple, self.interval)) logger.info( "Start '%s' generatorWorkers for sample '%s'" % (self.sample.config.generatorWorkers, self.sample.name))
def __init__(self, sample, output_counter=None): # Override maxQueueLength to EventPerKey so that each flush # will generate one aws key if sample.awsS3EventPerKey: sample.maxQueueLength = sample.awsS3EventPerKey OutputPlugin.__init__(self, sample, output_counter) if not boto_imported: logger.error("There is no boto3 or botocore library available") return # disable any "requests" warnings requests.packages.urllib3.disable_warnings() # Bind passed in samples to the outputter. self.awsS3compressiontype = (sample.awsS3CompressionType if hasattr(sample, "awsS3CompressionType") and sample.awsS3CompressionType else None) self.awsS3eventtype = (sample.awsS3EventType if hasattr(sample, "awsS3EventType") and sample.awsS3EventType else "syslog") self.awsS3objectprefix = (sample.awsS3ObjectPrefix if hasattr(sample, "awsS3ObjectPrefix") and sample.awsS3ObjectPrefix else "") self.awsS3objectsuffix = (sample.awsS3ObjectSuffix if hasattr(sample, "awsS3ObjectSuffix") and sample.awsS3ObjectSuffix else "") self.awsS3bucketname = sample.awsS3BucketName logger.debug("Setting up the connection pool for %s in %s" % (self._sample.name, self._app)) self._client = None self._createConnections(sample) logger.debug("Finished init of awsS3 plugin.")
def __init__(self, sample, output_counter=None): OutputPlugin.__init__(self, sample, output_counter) if sample.fileName is None: logger.error( "outputMode file but file not specified for sample %s" % self._sample.name) raise ValueError( "outputMode file but file not specified for sample %s" % self._sample.name) self._file = sample.pathParser(sample.fileName) self._fileMaxBytes = sample.fileMaxBytes self._fileBackupFiles = sample.fileBackupFiles self._fileFiles = sample.fileFiles def new_name(name, i): split = name.rsplit(".", 1) if len(split) == 1: return "{}_{}".format(name, i) else: return "{}_{}.{}".format(split[0], i, split[1]) self._multifiles = [ new_name(self._file, i) for i in range(int(self._fileFiles)) ] self._fileHandles = [open(file, "a") for file in self._multifiles] self._fileLengths = [ os.stat(file).st_size for file in self._multifiles ]
def renewAccessToken(self): response = requests.post(self.tokenRenewEndPoint, data=self.tokenRenewBody, timeout=5) if response.status_code == 200: logger.info("Renewal of the access token succesful") self.scsAccessToken = response.json()["access_token"] setattr(self._sample, "scsAccessToken", self.scsAccessToken) self.accessTokenExpired = False else: logger.error("Renewal of the access token failed")
def queue_it(self, count): try: realtime = self.sample.now(realnow=True) if "-" in self.sample.backfill[0]: mathsymbol = "-" else: mathsymbol = "+" backfillnumber = "" backfillletter = "" for char in self.sample.backfill: if char.isdigit(): backfillnumber += char elif char != "-": backfillletter += char backfillearliest = timeParserTimeMath( plusminus=mathsymbol, num=backfillnumber, unit=backfillletter, ret=realtime, ) while backfillearliest < realtime: et = backfillearliest lt = timeParserTimeMath(plusminus="+", num=self.sample.interval, unit="s", ret=et) genPlugin = self.generatorPlugin(sample=self.sample) genPlugin.updateCounts(count=count, start_time=et, end_time=lt) genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue) try: # Need to lock on replay mode since event duration is dynamic. Interval starts counting # after the replay has finished. if self.sample.generator == "replay": genPlugin.run() else: self.generatorQueue.put(genPlugin) except Full: logger.warning( "Generator Queue Full. Skipping current generation.") # due to replays needing to iterate in reverse, it's more efficent to process backfill # after the file has been parsed. This section is to allow replay mode to take # care of all replays on it's first run. and sets backfilldone if self.sample.generator == "replay": backfillearliest = realtime else: backfillearliest = lt if self.sample.generator != "replay": self.sample.backfilldone = True except Exception as e: logger.error("Failed queuing backfill, exception: {0}".format(e))
def processSampleLine(self, filehandler): """ Due to a change in python3, utf-8 is now the default trying to read a file. To get around this we need the process loop outside of the filehandler. :param filehandler: :return: """ sampleLines = [] if self.breaker == self.config.breaker: logger.debug("Reading raw sample '%s' in app '%s'" % (self.name, self.app)) sampleLines = filehandler.readlines() # 1/5/14 CS Moving to using only sampleDict and doing the breaking up into events at load time # instead of on every generation else: logger.debug( "Non-default breaker '%s' detected for sample '%s' in app '%s'" % (self.breaker, self.name, self.app)) sampleData = filehandler.read() logger.debug( "Filling array for sample '%s' in app '%s'; sampleData=%s, breaker=%s" % (self.name, self.app, len(sampleData), self.breaker)) try: breakerRE = re.compile(self.breaker, re.M) except: logger.error( "Line breaker '%s' for sample '%s' in app '%s'" " could not be compiled; using default breaker", self.breaker, self.name, self.app, ) self.breaker = self.config.breaker # Loop through data, finding matches of the regular expression and breaking them up into # "lines". Each match includes the breaker itself. extractpos = 0 searchpos = 0 breakerMatch = breakerRE.search(sampleData, searchpos) while breakerMatch: logger.debug("Breaker found at: %d, %d" % (breakerMatch.span()[0], breakerMatch.span()[1])) # Ignore matches at the beginning of the file if breakerMatch.span()[0] != 0: sampleLines.append( sampleData[extractpos:breakerMatch.span()[0]]) extractpos = breakerMatch.span()[0] searchpos = breakerMatch.span()[1] breakerMatch = breakerRE.search(sampleData, searchpos) sampleLines.append(sampleData[extractpos:]) return sampleLines
def gen(self, count, earliest, latest, samplename=None): # 9/8/15 CS Check to make sure we have events to replay self._sample.loadSample() self.current_time = self._sample.now() line_list = self.load_sample_file() # If backfill exists, calculate the start of the backfill time relative to the current time. # Otherwise, backfill time equals to the current time self.backfill_time = self._sample.get_backfill_time(self.current_time) # if we have backfill, replay the events backwards until we hit the backfill if self.backfill_time != self.current_time and not self._sample.backfilldone: backfill_count_time = self.current_time current_backfill_index = len(line_list) - 1 backfill_events = [] while backfill_count_time >= self.backfill_time: rpevent = line_list[current_backfill_index] backfill_count_time = backfill_count_time - rpevent["timediff"] backfill_events.append( self.set_time_and_tokens(rpevent, backfill_count_time, earliest, latest)) current_backfill_index -= 1 if current_backfill_index < 0: current_backfill_index = len(line_list) - 1 backfill_events.reverse() self._out.bulksend(backfill_events) self._sample.backfilldone = True previous_event = None for index, rpevent in enumerate(line_list): if previous_event is None: current_event = self.set_time_and_tokens( rpevent, self.backfill_time, earliest, latest) previous_event = current_event previous_event_timediff = rpevent["timediff"] self._out.bulksend([current_event]) continue try: time.sleep(previous_event_timediff.total_seconds()) except ValueError: logger.error( "Can't sleep for negative time, please make sure your events are in time order." "see line Number{0}".format(index)) logger.error("Event: {0}".format(rpevent)) pass current_time = datetime.datetime.now() previous_event = rpevent previous_event_timediff = rpevent["timediff"] send_event = self.set_time_and_tokens(rpevent, current_time, earliest, latest) self._out.bulksend([send_event]) self._out.flush(endOfInterval=True) return
def bulksend(self, msglist): """ Accepts list, msglist, and adds to the output buffer. If the buffer exceeds MAXQUEUELENGTH, then flush. """ try: self._queue.extend(msglist) if len(self._queue) >= self.MAXQUEUELENGTH: self.flush() except Exception as e: # We don't want to exit if there's a single bad event logger.error( "Caught Exception {} while appending/flushing output queue. There may be a " .format(e) + "faulty event or token replacement in your sample.")
def checkResults(self): for session in self.active_sessions: response = session.result() if response.status_code == 401 and "Invalid or Expired Bearer Token" in response.text: logger.error("scsAccessToken is invalid or expired") self.accessTokenExpired = True return False elif response.status_code != 200: logger.error( f"Data transmisison failed with {response.status_code} and {response.text}" ) return False logger.debug(f"Data transmission successful") return True
def __init__(self, sample, output_counter=None): OutputPlugin.__init__(self, sample, output_counter) from splunk_eventgen.lib.eventgenconfig import Config globals()['c'] = Config() self._splunkUrl, self._splunkMethod, self._splunkHost, self._splunkPort = c.getSplunkUrl( self._sample) # noqa self._splunkUser = self._sample.splunkUser self._splunkPass = self._sample.splunkPass # Cancel SSL verification import ssl ssl._create_default_https_context = ssl._create_unverified_context if not self._sample.sessionKey: try: myhttp = httplib2.Http(disable_ssl_certificate_validation=True) logger.debug( "Getting session key from '%s' with user '%s' and pass '%s'" % (self._splunkUrl + '/services/auth/login', self._splunkUser, self._splunkPass)) response = myhttp.request(self._splunkUrl + '/services/auth/login', 'POST', headers={}, body=urllib.parse.urlencode({ 'username': self._splunkUser, 'password': self._splunkPass }))[1] self._sample.sessionKey = minidom.parseString( response).getElementsByTagName( 'sessionKey')[0].childNodes[0].nodeValue logger.debug( "Got new session for splunkstream, sessionKey '%s'" % self._sample.sessionKey) except: logger.error( "Error getting session key for non-SPLUNK_EMBEEDED for sample '%s'." % self._sample.name + " Credentials are missing or wrong") raise IOError( "Error getting session key for non-SPLUNK_EMBEEDED for sample '%s'." % self._sample.name + "Credentials are missing or wrong") logger.debug( "Retrieved session key '%s' for Splunk session for sample %s'" % (self._sample.sessionKey, self._sample.name))
def get_backfill_time(self, current_time): if not current_time: current_time = self.now() if not self.backfill: return current_time else: if self.backfill[0] == '-': backfill_time = self.backfill[1:-1] time_unit = self.backfill[-1] if self.backfill[-2:] == 'ms': time_unit = 'ms' backfill_time = self.backfill[1:-2] return self.get_time_difference(current_time=current_time, different_time=backfill_time, sign='-', time_unit=time_unit) else: logger.error("Backfill time is not in the past.") return current_time
def flush(self, q): if len(q) > 0: logger.debug( "Flushing output for sample '%s' in app '%s' for queue '%s'" % (self._sample.name, self._app, self._sample.source)) # Keep trying to open destination file as it might be touched by other processes data = ''.join(event['_raw'] for event in q if event.get('_raw')) while True: try: with open(self.spoolPath, 'a') as dst: dst.write(data) break except Exception as e: logger.error(str(e)) time.sleep(0.1) logger.debug("Queue for app '%s' sample '%s' written" % (self._app, self._sample.name))
def updateConfig(self, config): OutputPlugin.updateConfig(self, config) try: if hasattr(self.config, 'httpeventServers') is False: if hasattr(self._sample, 'httpeventServers'): self.config.httpeventServers = self._sample.httpeventServers else: logger.error( 'outputMode %s but httpeventServers not specified for sample %s' % (self.name, self._sample.name)) raise NoServers( 'outputMode %s but httpeventServers not specified for sample %s' % (self.name, self._sample.name)) # set default output mode to round robin if hasattr( self.config, 'httpeventOutputMode') and self.config.httpeventOutputMode: self.httpeventoutputmode = config.httpeventOutputMode else: if hasattr(self._sample, 'httpeventOutputMode' ) and self._sample.httpeventOutputMode: self.httpeventoutputmode = self._sample.httpeventOutputMode else: self.httpeventoutputmode = 'roundrobin' if hasattr(self.config, 'httpeventMaxPayloadSize' ) and self.config.httpeventMaxPayloadSize: self.httpeventmaxsize = self.config.httpeventMaxPayloadSize else: if hasattr(self._sample, 'httpeventMaxPayloadSize' ) and self._sample.httpeventMaxPayloadSize: self.httpeventmaxsize = self._sample.httpeventMaxPayloadSize else: self.httpeventmaxsize = 10000 logger.debug("Currentmax size: %s " % self.httpeventmaxsize) if isinstance(config.httpeventServers, str): self.httpeventServers = json.loads(config.httpeventServers) else: self.httpeventServers = config.httpeventServers logger.debug("Setting up the connection pool for %s in %s" % (self._sample.name, self._app)) self.createConnections() logger.debug("Pool created.") logger.debug("Finished init of %s plugin." % self.name) except Exception as e: logger.exception(str(e))
def flush(self, q): logger.debug("Flush called on httpevent plugin") self._setup_REST_workers() if len(q) > 0: try: payload = [] logger.debug("Currently being called with %d events" % len(q)) for event in q: logger.debug("HTTPEvent proccessing event: %s" % event) payloadFragment = {} if event.get('_raw') is None or event['_raw'] == "\n": logger.error('failure outputting event, does not contain _raw') else: logger.debug("Event contains _raw, attempting to process...") payloadFragment['event'] = event['_raw'] if event.get('source'): logger.debug("Event contains source, adding to httpevent event") payloadFragment['source'] = event['source'] if event.get('sourcetype'): logger.debug("Event contains sourcetype, adding to httpevent event") payloadFragment['sourcetype'] = event['sourcetype'] self.lastsourcetype = event['sourcetype'] if event.get('host'): logger.debug("Event contains host, adding to httpevent event") payloadFragment['host'] = event['host'] if event.get('_time'): # make sure _time can be an epoch timestamp try: float(event.get("_time")) logger.debug("Event contains _time, adding to httpevent event") payloadFragment['time'] = event['_time'] except: logger.error("Timestamp not in epoch format, ignoring event: {0}".format(event)) if event.get('index'): logger.debug("Event contains index, adding to httpevent event") payloadFragment['index'] = event['index'] logger.debug("Full payloadFragment: %s" % json.dumps(payloadFragment)) payload.append(payloadFragment) logger.debug("Finished processing events, sending all to splunk") self._sendHTTPEvents(payload) payload = [] if self.config.httpeventWaitResponse: for session in self.active_sessions: response = session.result() if not response.raise_for_status(): logger.debug("Payload successfully sent to httpevent server.") else: logger.error("Server returned an error while trying to send, response code: %s" % response.status_code) raise BadConnection( "Server returned an error while sending, response code: %s" % response.status_code) else: logger.debug("Ignoring response from HTTP server, leaving httpevent outputter") except Exception as e: logger.error('failed indexing events, reason: %s ' % e)
def rate(self): self.sample.count = int(self.sample.count) # Let generators handle infinite count for themselves if self.sample.count == -1 and self.sample.generator == "default": if not self.sample.sampleDict: logger.error( "No sample found for default generator, cannot generate events" ) self.sample.count = len(self.sample.sampleDict) count = self.sample.count rateFactor = self.adjust_rate_factor() ret = int(round(count * rateFactor, 0)) if rateFactor != 1.0: logger.debug( "Original count: %s Rated count: %s Rate factor: %s" % (count, ret, rateFactor) ) return ret
def __init__(self, sample, output_counter=None): OutputPlugin.__init__(self, sample, output_counter) if sample.fileName is None: logger.error( "outputMode file but file not specified for sample %s" % self._sample.name) raise ValueError( "outputMode file but file not specified for sample %s" % self._sample.name) self._file = sample.pathParser(sample.fileName) self._fileMaxBytes = sample.fileMaxBytes self._fileBackupFiles = sample.fileBackupFiles self._fileHandle = open(self._file, "a") self._fileLength = os.stat(self._file).st_size logger.debug( "Configured to log to '%s' with maxBytes '%s' with backupCount '%s'" % (self._file, self._fileMaxBytes, self._fileBackupFiles))
def createConnections(self): self.serverPool = [] if self.httpeventServers: for server in self.httpeventServers.get("servers"): if not server.get("address"): logger.error( "requested a connection to a httpevent server, but no address specified for sample %s" % self._sample.name) raise ValueError( "requested a connection to a httpevent server, but no address specified for sample %s" % self._sample.name) if not server.get("port"): logger.error( "requested a connection to a httpevent server, but no port specified for server %s" % server) raise ValueError( "requested a connection to a httpevent server, but no port specified for server %s" % server) if not server.get("key"): logger.error( "requested a connection to a httpevent server, but no key specified for server %s" % server) raise ValueError( "requested a connection to a httpevent server, but no key specified for server %s" % server) if not ((server.get("protocol") == "http") or (server.get("protocol") == "https")): logger.error( "requested a connection to a httpevent server, but no protocol specified for server %s" % server) raise ValueError( "requested a connection to a httpevent server, but no protocol specified for server %s" % server) logger.debug( "Validation Passed, Creating a requests object for server: %s" % server.get("address")) setserver = {} setserver["url"] = "%s://%s:%s/services/collector" % ( server.get("protocol"), server.get("address"), server.get("port"), ) setserver["header"] = "Splunk %s" % server.get("key") logger.debug("Adding server set to pool, server: %s" % setserver) self.serverPool.append(setserver) else: raise NoServers( "outputMode %s but httpeventServers not specified for sample %s" % (self.name, self._sample.name))
def _transmitEvents(self, payloadstring): logger.debug( "Transmission called with payloadstring event number: %d " % len(payloadstring)) records = "".join(payloadstring) # Different key prefix for different log type if self.awsS3eventtype == "elbaccesslog": s3keyname = (self.awsS3objectprefix + datetime.datetime.utcnow().strftime("%Y%m%dT%H%MZ") + "_" + str(uuid.uuid1()) + self.awsS3objectsuffix) elif self.awsS3eventtype == "s3accesslog": s3keyname = ( self.awsS3objectprefix + datetime.datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S") + "-" + str(uuid.uuid1()).replace("-", "").upper()[0:15] + self.awsS3objectsuffix) else: s3keyname = (self.awsS3objectprefix + datetime.datetime.utcnow().isoformat() + str(uuid.uuid1()) + self.awsS3objectsuffix) logger.debug("Uploading %d events into s3 key: %s " % (len(records), s3keyname)) if self.awsS3compressiontype == "gz": import io import gzip out = io.StringIO() with gzip.GzipFile(fileobj=out, mode="w") as f: f.write(records) records = out.getvalue() try: response = self._client.put_object(Bucket=self.awsS3bucketname, Key=s3keyname, Body=records) logger.debug("response = %s" % response) except Exception as e: logger.error("Failed for exception: %s" % e) logger.debug("Failed sending events to payload: %s" % (payloadstring)) raise e
def createConnections(self): self.serverPool = [] if self.httpeventServers: for server in self.httpeventServers.get('servers'): if not server.get('address'): logger.error( 'requested a connection to a httpevent server, but no address specified for sample %s' % self._sample.name) raise ValueError( 'requested a connection to a httpevent server, but no address specified for sample %s' % self._sample.name) if not server.get('port'): logger.error( 'requested a connection to a httpevent server, but no port specified for server %s' % server) raise ValueError( 'requested a connection to a httpevent server, but no port specified for server %s' % server) if not server.get('key'): logger.error( 'requested a connection to a httpevent server, but no key specified for server %s' % server) raise ValueError( 'requested a connection to a httpevent server, but no key specified for server %s' % server) if not ((server.get('protocol') == 'http') or (server.get('protocol') == 'https')): logger.error( 'requested a connection to a httpevent server, but no protocol specified for server %s' % server) raise ValueError( 'requested a connection to a httpevent server, but no protocol specified for server %s' % server) logger.debug( "Validation Passed, Creating a requests object for server: %s" % server.get('address')) setserver = {} setserver['url'] = "%s://%s:%s/services/collector" % ( server.get('protocol'), server.get('address'), server.get('port')) setserver['header'] = "Splunk %s" % server.get('key') logger.debug("Adding server set to pool, server: %s" % setserver) self.serverPool.append(setserver) else: raise NoServers( 'outputMode %s but httpeventServers not specified for sample %s' % (self.name, self._sample.name))
def flush(self, q): logger.debug("Flush called on awsS3 plugin with length %d" % len(q)) if len(q) > 0: try: payload = [] logger.debug("Currently being called with %d events" % len(q)) for event in q: if event.get('_raw') is None: logger.error( 'failure outputting event, does not contain _raw') else: payload.append(event['_raw']) logger.debug( "Finished processing events, sending all to AWS S3") self._sendPayloads(payload) except Exception as e: import traceback logger.error(traceback.print_exc()) logger.error('failed sending events, reason: %s ' % e)
def _createConnections(self, sample): try: if hasattr(sample, "awsKeyId") and hasattr(sample, "awsSecretKey"): self._client = boto3.client( "s3", region_name=sample.awsRegion, aws_access_key_id=sample.awsKeyId, aws_secret_access_key=sample.awsSecretKey, ) if self._client is None: msg = """ [your_eventgen_stanza] awsKeyId = YOUR_ACCESS_KEY awsSecretKey = YOUR_SECRET_KEY """ logger.error( "Failed for init boto3 client: %s, you should define correct 'awsKeyId'\ and 'awsSecretKey' in eventgen conf %s" % msg) raise Exception(msg) else: self._client = boto3.client("s3", region_name=sample.awsRegion) except Exception as e: logger.error("Failed for init boto3 client: exception = %s" % e) raise e # Try list bucket method to validate if the connection works try: self._client.list_buckets() except botocore.exceptions.NoCredentialsError: msg = """ [default] aws_access_key_id = YOUR_ACCESS_KEY aws_secret_access_key = YOUR_SECRET_KEY """ logger.error("Failed for init boto3 client, you should create " "'~/.aws/credentials' with credential info %s" % msg) raise logger.debug("Init conn done, conn = %s" % self._client)
def rate(self): self._sample.count = int(self._sample.count) # Let generators handle infinite count for themselves if self._sample.count == -1 and self._sample.generator == 'default': if not self._sample.sampleDict: logger.error( 'No sample found for default generator, cannot generate events' ) self._sample.count = len(self._sample.sampleDict) self._generatorWorkers = int(self._generatorWorkers) count = self._sample.count / self._generatorWorkers # 5/8/12 CS We've requested not the whole file, so we should adjust count based on # hourOfDay, dayOfWeek and randomizeCount configs rateFactor = 1.0 if self._sample.randomizeCount: try: logger.debug( "randomizeCount for sample '%s' in app '%s' is %s" % (self._sample.name, self._sample.app, self._sample.randomizeCount)) # If we say we're going to be 20% variable, then that means we # can be .1% high or .1% low. Math below does that. randBound = round(self._sample.randomizeCount * 1000, 0) rand = random.randint(0, randBound) randFactor = 1 + ((-((randBound / 2) - rand)) / 1000) logger.debug("randFactor for sample '%s' in app '%s' is %s" % (self._sample.name, self._sample.app, randFactor)) rateFactor *= randFactor except: import traceback stack = traceback.format_exc() logger.error( "Randomize count failed for sample '%s'. Stacktrace %s" % (self._sample.name, stack)) if type(self._sample.hourOfDayRate) == dict: try: rate = self._sample.hourOfDayRate[str(self._sample.now().hour)] logger.debug( "hourOfDayRate for sample '%s' in app '%s' is %s" % (self._sample.name, self._sample.app, rate)) rateFactor *= rate except KeyError: import traceback stack = traceback.format_exc() logger.error( "Hour of day rate failed for sample '%s'. Stacktrace %s" % (self._sample.name, stack)) if type(self._sample.dayOfWeekRate) == dict: try: weekday = datetime.date.weekday(self._sample.now()) if weekday == 6: weekday = 0 else: weekday += 1 rate = self._sample.dayOfWeekRate[str(weekday)] logger.debug( "dayOfWeekRate for sample '%s' in app '%s' is %s" % (self._sample.name, self._sample.app, rate)) rateFactor *= rate except KeyError: import traceback stack = traceback.format_exc() logger.error( "Hour of day rate failed for sample '%s'. Stacktrace %s" % (self._sample.name, stack)) if type(self._sample.minuteOfHourRate) == dict: try: rate = self._sample.minuteOfHourRate[str( self._sample.now().minute)] logger.debug( "minuteOfHourRate for sample '%s' in app '%s' is %s" % (self._sample.name, self._sample.app, rate)) rateFactor *= rate except KeyError: import traceback stack = traceback.format_exc() logger.error( "Minute of hour rate failed for sample '%s'. Stacktrace %s" % (self._sample.name, stack)) if type(self._sample.dayOfMonthRate) == dict: try: rate = self._sample.dayOfMonthRate[str(self._sample.now().day)] logger.debug( "dayOfMonthRate for sample '%s' in app '%s' is %s" % (self._sample.name, self._sample.app, rate)) rateFactor *= rate except KeyError: import traceback stack = traceback.format_exc() logger.error( "Day of Month rate for sample '%s' failed. Stacktrace %s" % (self._sample.name, stack)) if type(self._sample.monthOfYearRate) == dict: try: rate = self._sample.monthOfYearRate[str( self._sample.now().month)] logger.debug( "monthOfYearRate for sample '%s' in app '%s' is %s" % (self._sample.name, self._sample.app, rate)) rateFactor *= rate except KeyError: import traceback stack = traceback.format_exc() logger.error( "Month Of Year rate failed for sample '%s'. Stacktrace %s" % (self._sample.name, stack)) ret = int(round(count * rateFactor, 0)) if rateFactor != 1.0: logger.debug("Original count: %s Rated count: %s Rate factor: %s" % (count, ret, rateFactor)) return ret
def loadSample(self): """ Load sample from disk into self._sample.sampleLines and self._sample.sampleDict, using cached copy if possible """ if self.sampletype == "raw": # 5/27/12 CS Added caching of the sample file if self.sampleDict is None: with open(self.filePath, "r") as fh: if self.breaker == self.config.breaker: logger.debug("Reading raw sample '%s' in app '%s'" % (self.name, self.app)) self.sampleLines = fh.readlines() # 1/5/14 CS Moving to using only sampleDict and doing the breaking up into events at load time # instead of on every generation else: logger.debug( "Non-default breaker '%s' detected for sample '%s' in app '%s'" % (self.breaker, self.name, self.app)) sampleData = fh.read() self.sampleLines = [] logger.debug( "Filling array for sample '%s' in app '%s'; sampleData=%s, breaker=%s" % (self.name, self.app, len(sampleData), self.breaker)) try: breakerRE = re.compile(self.breaker, re.M) except: logger.error( "Line breaker '%s' for sample '%s' in app '%s'" " could not be compiled; using default breaker", self.breaker, self.name, self.app, ) self.breaker = self.config.breaker # Loop through data, finding matches of the regular expression and breaking them up into # "lines". Each match includes the breaker itself. extractpos = 0 searchpos = 0 breakerMatch = breakerRE.search(sampleData, searchpos) while breakerMatch: logger.debug("Breaker found at: %d, %d" % (breakerMatch.span()[0], breakerMatch.span()[1])) # Ignore matches at the beginning of the file if breakerMatch.span()[0] != 0: self.sampleLines.append( sampleData[extractpos:breakerMatch.span( )[0]]) extractpos = breakerMatch.span()[0] searchpos = breakerMatch.span()[1] breakerMatch = breakerRE.search( sampleData, searchpos) self.sampleLines.append(sampleData[extractpos:]) self.sampleDict = [] for line in self.sampleLines: if line == "\n": continue if line and line[-1] != "\n": line = line + "\n" self.sampleDict.append({ "_raw": line, "index": self.index, "host": self.host, "source": self.source, "sourcetype": self.sourcetype, }) logger.debug( "Finished creating sampleDict & sampleLines. Len samplesLines: %d Len sampleDict: %d" % (len(self.sampleLines), len(self.sampleDict))) elif self.sampletype == "csv": if self.sampleDict is None: with open(self.filePath, "r") as fh: logger.debug("Reading csv sample '%s' in app '%s'" % (self.name, self.app)) self.sampleDict = [] self.sampleLines = [] # Fix to load large csv files, work with python 2.5 onwards csv.field_size_limit(sys.maxsize) csvReader = csv.DictReader(fh) for line in csvReader: if "_raw" in line: # Use conf-defined values for these params instead of sample-defined ones current_line_keys = list(line.keys()) if "host" not in current_line_keys: line["host"] = self.host if "hostRegex" not in current_line_keys: line["hostRegex"] = self.hostRegex if "source" not in current_line_keys: line["source"] = self.source if "sourcetype" not in current_line_keys: line["sourcetype"] = self.sourcetype if "index" not in current_line_keys: line["index"] = self.index self.sampleDict.append(line) self.sampleLines.append(line["_raw"]) else: logger.error("Missing _raw in line '%s'" % pprint.pformat(line)) logger.debug( "Finished creating sampleDict & sampleLines for sample '%s'. Len sampleDict: %d" % (self.name, len(self.sampleDict))) for i in range(0, len(self.sampleDict)): if (len(self.sampleDict[i]["_raw"]) < 1 or self.sampleDict[i]["_raw"][-1] != "\n"): self.sampleDict[i]["_raw"] += "\n" if self.extendIndexes: try: for index_item in self.extendIndexes.split(","): index_item = index_item.strip() if ":" in index_item: extend_indexes_count = int(index_item.split(":")[-1]) extend_indexes_prefix = index_item.split(":")[0] + "{}" self.index_list.extend([ extend_indexes_prefix.format(_i) for _i in range(extend_indexes_count) ]) elif len(index_item): self.index_list.append(index_item) except Exception: logger.error( "Failed to parse extendIndexes, using index={} now.". format(self.index)) self.index_list = [] finally: # only read the extendIndexes configure once. self.extendIndexes = None
def setupBackfill(self): """ Called by non-queueable plugins or by the timer to setup backfill times per config or based on a Splunk Search """ s = self._sample if s.backfill is not None: try: s.backfillts = timeParser(s.backfill, timezone=s.timezone) logger.info("Setting up backfill of %s (%s)" % (s.backfill, s.backfillts)) except Exception as ex: logger.error("Failed to parse backfill '%s': %s" % (s.backfill, ex)) raise if s.backfillSearch is not None: if s.backfillSearchUrl is None: try: s.backfillSearchUrl = c.getSplunkUrl(s)[ 0] # noqa, we update c in the globals() dict except ValueError: logger.error( "Backfill Search URL not specified for sample '%s', not running backfill search" % s.name) if not s.backfillSearch.startswith('search'): s.backfillSearch = 'search ' + s.backfillSearch s.backfillSearch += '| head 1 | table _time' if s.backfillSearchUrl is not None: logger.debug( "Searching Splunk URL '%s/services/search/jobs' with search '%s' with sessionKey '%s'" % (s.backfillSearchUrl, s.backfillSearch, s.sessionKey)) results = httplib2.Http( disable_ssl_certificate_validation=True).request( s.backfillSearchUrl + '/services/search/jobs', 'POST', headers={ 'Authorization': 'Splunk %s' % s.sessionKey }, body=urllib.parse.urlencode({ 'search': s.backfillSearch, 'earliest_time': s.backfill, 'exec_mode': 'oneshot' }))[1] try: temptime = minidom.parseString( results).getElementsByTagName( 'text')[0].childNodes[0].nodeValue # logger.debug("Time returned from backfill search: %s" % temptime) # Results returned look like: 2013-01-16T10:59:15.411-08:00 # But the offset in time can also be +, so make sure we strip that out first if len(temptime) > 0: if temptime.find('+') > 0: temptime = temptime.split('+')[0] temptime = '-'.join(temptime.split('-')[0:3]) s.backfillts = datetime.datetime.strptime( temptime, '%Y-%m-%dT%H:%M:%S.%f') logger.debug( "Backfill search results: '%s' value: '%s' time: '%s'" % (pprint.pformat(results), temptime, s.backfillts)) except (ExpatError, IndexError): pass if s.end is not None: parsed = False try: s.end = int(s.end) s.endts = None parsed = True except ValueError: logger.debug( "Failed to parse end '%s' for sample '%s', treating as end time" % (s.end, s.name)) if not parsed: try: s.endts = timeParser(s.end, timezone=s.timezone) logger.info("Ending generation at %s (%s)" % (s.end, s.endts)) except Exception as ex: logger.error( "Failed to parse end '%s' for sample '%s', treating as number of executions" % (s.end, s.name)) raise
def gen(self, count, earliest, latest, samplename=None): # TODO: Figure out how to gracefully tell generator plugins to exit when there is an error. try: from jinja2 import Environment, FileSystemLoader self.target_count = count # assume that if there is no "count" field, we want to run 1 time, and only one time. if self.target_count == -1: self.target_count = 1 self.earliest = earliest self.latest = latest if hasattr(self._sample, "jinja_count_type"): if self._sample.jinja_count_type in [ "line_count", "cycles", "perDayVolume" ]: self.jinja_count_type = self._sample.jinja_count_type startTime = datetime.datetime.now() # if eventgen is running as Splunk app the configfile is None sample_dir = self._sample.sampleDir if self._sample.splunkEmbedded is True: splunk_home = os.environ["SPLUNK_HOME"] app_name = getattr(self._sample, 'app', 'SA-Eventgen') sample_dir = os.path.join(splunk_home, 'etc', 'apps', app_name, 'samples') if not hasattr(self._sample, "jinja_template_dir"): template_dir = 'templates' else: template_dir = self._sample.jinja_template_dir if not os.path.isabs(template_dir): target_template_dir = os.path.join(sample_dir, template_dir) else: target_template_dir = template_dir logger.info('set jinja template path to %s', target_template_dir) if not hasattr(self._sample, "jinja_target_template"): raise CantFindTemplate( "Template to load not specified in eventgen conf for stanza. Skipping Stanza" ) jinja_env = Environment(loader=FileSystemLoader( [target_template_dir], encoding='utf-8', followlinks=False), extensions=[ 'jinja2.ext.do', 'jinja2.ext.with_', 'jinja2.ext.loopcontrols', JinjaTime ], line_statement_prefix="#", line_comment_prefix="##") jinja_loaded_template = jinja_env.get_template( str(self._sample.jinja_target_template)) if hasattr(self._sample, 'jinja_variables'): jinja_loaded_vars = json.loads(self._sample.jinja_variables) else: jinja_loaded_vars = None # make the default generator vars accessable to jinja jinja_loaded_vars["eventgen_count"] = self.current_count jinja_loaded_vars["eventgen_maxcount"] = self.target_count jinja_loaded_vars["eventgen_earliest"] = self.earliest self.earliest_epoch = ( self.earliest - datetime.datetime(1970, 1, 1)).total_seconds() jinja_loaded_vars["eventgen_earliest_epoch"] = self.earliest_epoch jinja_loaded_vars["eventgen_latest"] = self.latest jinja_loaded_vars["eventgen_latest_epoch"] = ( self.latest - datetime.datetime(1970, 1, 1)).total_seconds() self.latest_epoch = ( self.latest - datetime.datetime(1970, 1, 1)).total_seconds() while self.current_count < self.target_count: self.end_of_cycle = False jinja_loaded_vars["eventgen_count"] = self.current_count jinja_loaded_vars["eventgen_target_time_earliest"], jinja_loaded_vars["eventgen_target_time_latest"], \ jinja_loaded_vars["eventgen_target_time_slice_size"], \ jinja_loaded_vars["eventgen_target_time_epoch"] = \ JinjaTime._get_time_slice(self.earliest_epoch, self.latest_epoch, self.target_count, self.current_count, slice_type="random") self.jinja_stream = jinja_loaded_template.stream( jinja_loaded_vars) lines_out = [] try: for raw_line in self.jinja_stream: # trim the newline char for jinja output # it is quite normal to output empty newlines in jinja line = raw_line.strip() if line: # TODO: Time can be supported by self._sample.timestamp, should probably set that up here. try: target_line = json.loads(line) except ValueError as e: logger.error( "Unable to parse Jinja's return. Line: {0}" .format(line)) logger.error( "Parse Failure Reason: {0}".format( e.message)) logger.error( "Please note, you must meet the requirements for json.loads in python if you have" + "not installed ujson. Native python does not support multi-line events." ) continue current_line_keys = list(target_line.keys()) if "_time" not in current_line_keys: # TODO: Add a custom exception here raise Exception( "No _time field supplied, please add time to your jinja template." ) if "_raw" not in current_line_keys: # TODO: Add a custom exception here raise Exception( "No _raw field supplied, please add time to your jinja template." ) if "host" not in current_line_keys: target_line["host"] = self._sample.host if "hostRegex" not in current_line_keys: target_line[ "hostRegex"] = self._sample.hostRegex if "source" not in current_line_keys: target_line["source"] = self._sample.source if "sourcetype" not in current_line_keys: target_line[ "sourcetype"] = self._sample.sourcetype if "index" not in current_line_keys: target_line["index"] = self._sample.index lines_out.append(target_line) except TypeError as e: logger.exception(str(e)) self.end_of_cycle = True self._increment_count(lines_out) self._out.bulksend(lines_out) endTime = datetime.datetime.now() timeDiff = endTime - startTime timeDiffFrac = "%d.%06d" % (timeDiff.seconds, timeDiff.microseconds) logger.debug("Interval complete, flushing feed") self._out.flush(endOfInterval=True) logger.info("Generation of sample '%s' completed in %s seconds." % (self._sample.name, timeDiffFrac)) return 0 except Exception as e: logger.exception(str(e)) return 1
def _getReplacement(self, old=None, earliestTime=None, latestTime=None, s=None, pivot_timestamp=None): if self.replacementType == 'static': return self.replacement # This logic is done in replay.py elif self.replacementType == 'replaytimestamp': pass elif self.replacementType == 'timestamp': if s.earliest and s.latest: if earliestTime and latestTime: if latestTime >= earliestTime: if pivot_timestamp: replacementTime = pivot_timestamp elif s.timestamp is None: minDelta = 0 # Compute timeDelta as total_seconds td = latestTime - earliestTime if not type(td) == float: maxDelta = timeDelta2secs(td) else: maxDelta = td # Get random timeDelta randomDelta = datetime.timedelta( seconds=random.randint(minDelta, maxDelta), microseconds=random.randint( 0, latestTime.microsecond if latestTime.microsecond > 0 else 999999)) # Compute replacmentTime replacementTime = latestTime - randomDelta s.timestamp = replacementTime else: replacementTime = s.timestamp replacement = self.replacement.replace( '%s', str(round(time.mktime( replacementTime.timetuple()))).rstrip( '0').rstrip('.')) replacementTime = replacementTime.strftime(replacement) # replacementTime == replacement for invalid strptime specifiers if replacementTime != self.replacement.replace( '%', ''): return replacementTime else: logger.error( "Invalid strptime specifier '%s' detected; will not replace" % (self.replacement)) return old # earliestTime/latestTime not proper else: logger.error(( "Earliest specifier '%s', value '%s' is greater than latest specifier '%s'" + "value '%s' for sample '%s'; will not replace") % (s.earliest, earliestTime, s.latest, latestTime, s.name)) return old # earliest/latest not proper else: logger.error( 'Earliest or latest specifier were not set; will not replace' ) return old elif self.replacementType in ('random', 'rated'): # Validations: if self._integerMatch is not None: integerMatch = self._integerMatch else: integerRE = re.compile(r'integer\[([-]?\d+):([-]?\d+)\]', re.I) integerMatch = integerRE.match(self.replacement) self._integerMatch = integerMatch if self._floatMatch is not None: floatMatch = self._floatMatch else: floatRE = re.compile( r'float\[(-?\d+|-?\d+\.(\d+)):(-?\d+|-?\d+\.(\d+))\]', re.I) floatMatch = floatRE.match(self.replacement) self._floatMatch = floatMatch if self._stringMatch is not None: stringMatch = self._stringMatch else: stringRE = re.compile(r'string\((\d+)\)', re.I) stringMatch = stringRE.match(self.replacement) self._stringMatch = stringMatch if self._hexMatch is not None: hexMatch = self._hexMatch else: hexRE = re.compile(r'hex\((\d+)\)', re.I) hexMatch = hexRE.match(self.replacement) self._hexMatch = hexMatch if self._listMatch is not None: listMatch = self._listMatch else: listRE = re.compile(r'list(\[[^\]]+\])', re.I) listMatch = listRE.match(self.replacement) self._listMatch = listMatch # Valid replacements: ipv4 | ipv6 | integer[<start>:<end>] | string(<i>) if self.replacement.lower() == 'ipv4': x = 0 replacement = '' while x < 4: replacement += str(random.randint(0, 255)) + '.' x += 1 replacement = replacement.strip('.') return replacement elif self.replacement.lower() == 'ipv6': x = 0 replacement = '' while x < 8: replacement += hex(random.randint(0, 65535))[2:] + ':' x += 1 replacement = replacement.strip(':') return replacement elif self.replacement.lower() == 'mac': x = 0 replacement = '' # Give me 6 blocks of 2 hex while x < 6: y = 0 while y < 2: replacement += hex(random.randint(0, 15))[2:] y += 1 replacement += ':' x += 1 replacement = replacement.strip(':') return replacement elif self.replacement.lower() == 'guid': return str(uuid.uuid4()) elif integerMatch: startInt = int(integerMatch.group(1)) endInt = int(integerMatch.group(2)) if endInt >= startInt: replacementInt = random.randint(startInt, endInt) if self.replacementType == 'rated': rateFactor = 1.0 if type(s.hourOfDayRate) == dict: try: rateFactor *= s.hourOfDayRate[str(s.now())] except KeyError: import traceback stack = traceback.format_exc() logger.error( "Hour of day rate failed for token %s. Stacktrace %s" % stack) if type(s.dayOfWeekRate) == dict: try: weekday = datetime.date.weekday(s.now()) if weekday == 6: weekday = 0 else: weekday += 1 rateFactor *= s.dayOfWeekRate[str(weekday)] except KeyError: import traceback stack = traceback.format_exc() logger.error( "Day of week rate failed. Stacktrace %s" % stack) replacementInt = int( round(replacementInt * rateFactor, 0)) replacement = str(replacementInt) return replacement else: logger.error( "Start integer %s greater than end integer %s; will not replace" % (startInt, endInt)) return old elif floatMatch: try: startFloat = float(floatMatch.group(1)) endFloat = float(floatMatch.group(3)) significance = 0 if floatMatch.group(2) is not None: significance = len(floatMatch.group(2)) if endFloat >= startFloat: floatret = round(random.uniform(startFloat, endFloat), significance) if self.replacementType == 'rated': rateFactor = 1.0 now = s.now() if type(s.hourOfDayRate) == dict: try: rateFactor *= s.hourOfDayRate[str( now.hour)] except KeyError: import traceback stack = traceback.format_exc() logger.error( "Hour of day rate failed for token %s. Stacktrace %s" % stack) if type(s.dayOfWeekRate) == dict: try: weekday = datetime.date.weekday(now) if weekday == 6: weekday = 0 else: weekday += 1 rateFactor *= s.dayOfWeekRate[str(weekday)] except KeyError: import traceback stack = traceback.format_exc() logger.error( "Day of week rate failed. Stacktrace %s" % stack) floatret = round(floatret * rateFactor, significance) floatret = str(floatret) return floatret else: logger.error( "Start float %s greater than end float %s; will not replace" % (startFloat, endFloat)) return old except ValueError: logger.error("Could not parse float[%s:%s]" % (floatMatch.group(1), floatMatch.group(4))) return old elif stringMatch: strLength = int(stringMatch.group(1)) if strLength == 0: return '' elif strLength > 0: replacement = '' while len(replacement) < strLength: # Generate a random ASCII between dec 33->126 replacement += chr(random.randint(33, 126)) # Practice safe strings replacement = re.sub('%[0-9a-fA-F]+', '', urllib.parse.quote(replacement)) return replacement else: logger.error( "Length specifier %s for string replacement must be greater than 0; will not replace" % (strLength)) return old elif hexMatch: strLength = int(hexMatch.group(1)) replacement = '' hexList = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' ] while len(replacement) < strLength: replacement += hexList[random.randint(0, 15)] return replacement elif listMatch: try: value = json.loads(listMatch.group(1)) except: logger.error( "Could not parse json for '%s' in sample '%s'" % (listMatch.group(1), s.name)) return old return random.SystemRandom().choice(value) else: logger.error( "Unknown replacement value '%s' for replacementType '%s'; will not replace" % (self.replacement, self.replacementType)) return old elif self.replacementType in ('file', 'mvfile', 'seqfile'): if self._replacementFile is not None: replacementFile = self._replacementFile replacementColumn = self._replacementColumn else: try: paths = self.replacement.split(':') if (len(paths) == 1): replacementColumn = 0 else: try: # When it's not a mvfile, there's no number on the end: replacementColumn = int(paths[-1]) except (ValueError): replacementColumn = 0 if (replacementColumn > 0): # This supports having a drive-letter colon replacementFile = s.pathParser(":".join(paths[0:-1])) else: replacementFile = s.pathParser(self.replacement) except ValueError: logger.error( "Replacement string '%s' improperly formatted. Should be /path/to/file or /path/to/file:column" % self.replacement) return old self._replacementFile = replacementFile self._replacementColumn = replacementColumn # If we've seen this file before, simply return already read results # This applies only if we're looking at a multivalue file and we want to # return the same random pick on every iteration if replacementColumn > 0 and replacementFile in self.mvhash: if replacementColumn > len(self.mvhash[replacementFile]): logger.error( "Index for column '%s' in replacement file '%s' is out of bounds" % (replacementColumn, replacementFile)) return old else: # logger.debug("Returning mvhash: %s" % self.mvhash[replacementFile][replacementColumn-1]) return self.mvhash[replacementFile][replacementColumn - 1] else: # Adding caching of the token file to avoid reading it every iteration if self._tokenfile is not None: replacementLines = self._tokenfile # Otherwise, lets read the file and build our cached results, pick a result and return it else: # logger.debug("replacementFile: %s replacementColumn: %s" % # (replacementFile, replacementColumn)) replacementFile = os.path.abspath(replacementFile) logger.debug("Normalized replacement file %s" % replacementFile) if os.path.exists(replacementFile) and os.path.isfile( replacementFile): replacementFH = open(replacementFile, 'rU') replacementLines = replacementFH.readlines() replacementFH.close() if len(replacementLines) == 0: logger.error( "Replacement file '%s' is empty; will not replace" % (replacementFile)) return old else: self._tokenfile = replacementLines else: logger.error("File '%s' does not exist" % (replacementFile)) return old if self.replacementType == 'seqfile': # pick value one by one from replacement file replacement = replacementLines[ self._tokenfilecounter % len(replacementLines)].strip() self._tokenfilecounter += 1 else: # pick value randomly from replacement file replacement = replacementLines[random.randint( 0, len(replacementLines) - 1)].strip() if replacementColumn > 0: self.mvhash[replacementFile] = replacement.split(',') if replacementColumn > len(self.mvhash[replacementFile]): logger.error( "Index for column '%s' in replacement file '%s' is out of bounds" % (replacementColumn, replacementFile)) return old else: return self.mvhash[replacementFile][replacementColumn - 1] else: return replacement elif self.replacementType == 'integerid': temp = self.replacement self.replacement = str(int(self.replacement) + 1) return temp else: logger.error("Unknown replacementType '%s'; will not replace" % self.replacementType) return old
def gen(self, count, earliest, latest, samplename=None): # 9/8/15 CS Check to make sure we have events to replay self._sample.loadSample() previous_event = None previous_event_timestamp = None self.current_time = self._sample.now() # If backfill exists, calculate the start of the backfill time relative to the current time. # Otherwise, backfill time equals to the current time self.backfill_time = self._sample.get_backfill_time(self.current_time) if not self._sample.backfill or self._sample.backfilldone: self.backfill_time = EventgenTimestamp.get_random_timestamp_backfill( earliest, latest, self._sample.earliest, self._sample.latest) for line in self._sample.get_loaded_sample(): # Add newline to a raw line if necessary try: if line['_raw'][-1] != '\n': line['_raw'] += '\n' index = line.get('index', self._sample.index) host = line.get('host', self._sample.host) hostRegex = line.get('hostRegex', self._sample.hostRegex) source = line.get('source', self._sample.source) sourcetype = line.get('sourcetype', self._sample.sourcetype) rpevent = { '_raw': line['_raw'], 'index': index, 'host': host, 'hostRegex': hostRegex, 'source': source, 'sourcetype': sourcetype } except: if line[-1] != '\n': line += '\n' rpevent = { '_raw': line, 'index': self._sample.index, 'host': self._sample.host, 'hostRegex': self._sample.hostRegex, 'source': self._sample.source, 'sourcetype': self._sample.sourcetype } # If timestamp doesn't exist, the sample file should be fixed to include timestamp for every event. try: current_event_timestamp = self._sample.getTSFromEvent( rpevent[self._sample.timeField]) except Exception: try: current_event_timestamp = self._sample.getTSFromEvent( line[self._sample.timeField]) except Exception: try: logger.error( "Sample timeField {} failed to locate. Trying to locate _time field." .format(self._sample.timeField)) current_event_timestamp = self._sample.getTSFromEvent( line["_time"]) except Exception: logger.exception( "Extracting timestamp from an event failed.") continue # Always flush the first event if previous_event is None: previous_event = rpevent previous_event_timestamp = current_event_timestamp self.set_time_and_send(rpevent, self.backfill_time, earliest, latest) continue # Refer to the last event to calculate the new backfill time time_difference = datetime.timedelta( seconds=(current_event_timestamp - previous_event_timestamp).total_seconds() * self._sample.timeMultiple) if self.backfill_time + time_difference >= self.current_time: sleep_time = time_difference - (self.current_time - self.backfill_time) if self._sample.backfill and not self._sample.backfilldone: time.sleep(sleep_time.seconds) self.current_time += sleep_time self.backfill_time = self.current_time else: self.backfill_time += time_difference previous_event = rpevent previous_event_timestamp = current_event_timestamp self.set_time_and_send(rpevent, self.backfill_time, earliest, latest) self._out.flush(endOfInterval=True) return
def load_sample_file(self): line_list = [] for line in self._sample.get_loaded_sample(): # Add newline to a raw line if necessary try: if line["_raw"][-1] != "\n": line["_raw"] += "\n" current_event_timestamp = False index = line.get("index", self._sample.index) host = line.get("host", self._sample.host) hostRegex = line.get("hostRegex", self._sample.hostRegex) source = line.get("source", self._sample.source) sourcetype = line.get("sourcetype", self._sample.sourcetype) rpevent = { "_raw": line["_raw"], "index": index, "host": host, "hostRegex": hostRegex, "source": source, "sourcetype": sourcetype, } except: if line[-1] != "\n": line += "\n" rpevent = { "_raw": line, "index": self._sample.index, "host": self._sample.host, "hostRegex": self._sample.hostRegex, "source": self._sample.source, "sourcetype": self._sample.sourcetype, } try: current_event_timestamp = self._sample.getTSFromEvent( rpevent[self._sample.timeField]) rpevent["base_time"] = current_event_timestamp except Exception: try: current_event_timestamp = self._sample.getTSFromEvent( line[self._sample.timeField]) rpevent["base_time"] = current_event_timestamp except Exception: try: logger.error( "Sample timeField {} failed to locate. Trying to locate _time field." .format(self._sample.timeField)) current_event_timestamp = self._sample.getTSFromEvent( line["_time"]) except Exception: logger.exception( "Extracting timestamp from an event failed.") continue line_list.append(rpevent) # now interate the list 1 time and figure out the time delta of every event current_event = None previous_event = None for index, line in enumerate(line_list): current_event = line # if it's the first event, there is no previous event. if index == 0: previous_event = current_event else: previous_event = line_list[index - 1] # Refer to the last event to calculate the new backfill time time_difference = ( current_event["base_time"] - previous_event["base_time"]) * self._sample.timeMultiple current_event["timediff"] = time_difference return line_list
def flush(self, q): if len(q) > 0: # Store each source/sourcetype combo with its events so we can send them all together queues = {} for row in q: if row["source"] is None: row["source"] = "" if row["sourcetype"] is None: row["sourcetype"] = "" if not row["source"] + "_" + row["sourcetype"] in queues: queues[row["source"] + "_" + row["sourcetype"]] = deque([]) queues[row["source"] + "_" + row["sourcetype"]].append(row) # Iterate sub-queues, each holds events for a specific source/sourcetype combo for k, queue in list(queues.items()): if len(queue) > 0: streamout = "" index = source = sourcetype = host = hostRegex = None metamsg = queue.popleft() # We need the raw string for each event, but other data will stay the same within its own sub-queue msg = metamsg["_raw"] try: index = metamsg["index"] source = metamsg["source"] sourcetype = metamsg["sourcetype"] host = metamsg["host"] hostRegex = metamsg["hostRegex"] except KeyError: pass logger.debug( "Flushing output for sample '%s' in app '%s' for queue '%s'" % (self._sample.name, self._app, self._sample.source) ) try: if self._splunkMethod == "https": connmethod = http.client.HTTPSConnection else: connmethod = http.client.HTTPConnection splunkhttp = connmethod(self._splunkHost, self._splunkPort) splunkhttp.connect() urlparams = [] if index: urlparams.append(("index", index)) if source: urlparams.append(("source", source)) if sourcetype: urlparams.append(("sourcetype", sourcetype)) if hostRegex: urlparams.append(("host_regex", hostRegex)) if host: urlparams.append(("host", host)) url = "/services/receivers/simple?%s" % ( urllib.parse.urlencode(urlparams) ) headers = { "Authorization": "Splunk %s" % self._sample.sessionKey } # Iterate each raw event string in its sub-queue while msg: if msg[-1] != "\n": msg += "\n" streamout += msg try: msg = queue.popleft()["_raw"] except IndexError: msg = False splunkhttp.request("POST", url, streamout, headers) logger.debug( "POSTing to url %s on %s://%s:%s with sessionKey %s" % ( url, self._splunkMethod, self._splunkHost, self._splunkPort, self._sample.sessionKey, ) ) except http.client.HTTPException as e: logger.error( 'Error connecting to Splunk for logging for sample %s. Exception "%s" Config: %s' % (self._sample.name, e.args, self) ) raise IOError( "Error connecting to Splunk for logging for sample %s" % self._sample ) try: response = splunkhttp.getresponse() data = response.read() if response.status != 200: logger.error( "Data not written to Splunk. Splunk returned %s" % data ) except http.client.BadStatusLine: logger.error( "Received bad status from Splunk for sample '%s'" % self._sample ) logger.debug("Closing splunkhttp connection") if splunkhttp: splunkhttp.close()