示例#1
0
    def on_message(self, body, message):
        # print("RECEIVED MESSAGE: %r" % (body, ))
        try:
            # default elastic search metadata for an event
            metadata = {'index': 'events', 'doc_type': 'event', 'id': None}
            # just to be safe..check what we were sent.
            if isinstance(body, dict):
                bodyDict = body
            elif isinstance(body, str) or isinstance(body, unicode):
                try:
                    bodyDict = json.loads(body)  # lets assume it's json
                except ValueError as e:
                    # not json..ack but log the message
                    logger.error(
                        "esworker exception: unknown body type received %r" %
                        body)
                    # message.ack()
                    return
            else:
                logger.error(
                    "esworker exception: unknown body type received %r" % body)
                # message.ack()
                return

            if 'customendpoint' in bodyDict.keys(
            ) and bodyDict['customendpoint']:
                # custom document
                # send to plugins to allow them to modify it if needed
                (normalizedDict,
                 metadata) = sendEventToPlugins(bodyDict, metadata, pluginList)
            else:
                # normalize the dict
                # to the mozdef events standard
                normalizedDict = keyMapping(bodyDict)

                # send to plugins to allow them to modify it if needed
                if normalizedDict is not None and isinstance(
                        normalizedDict, dict) and normalizedDict.keys():
                    (normalizedDict,
                     metadata) = sendEventToPlugins(normalizedDict, metadata,
                                                    pluginList)

            # drop the message if a plug in set it to None
            # signaling a discard
            if normalizedDict is None:
                # message.ack()
                return

            # make a json version for posting to elastic search
            jbody = json.JSONEncoder().encode(normalizedDict)

            if isCEF(normalizedDict):
                # cef records are set to the 'deviceproduct' field value.
                metadata['doc_type'] = 'cef'
                if 'details' in normalizedDict.keys(
                ) and 'deviceproduct' in normalizedDict['details'].keys():
                    # don't create strange doc types..
                    if ' ' not in normalizedDict['details'][
                            'deviceproduct'] and '.' not in normalizedDict[
                                'details']['deviceproduct']:
                        metadata['doc_type'] = normalizedDict['details'][
                            'deviceproduct']

            try:
                bulk = False
                if options.esbulksize != 0:
                    bulk = True

                self.esConnection.save_event(index=metadata['index'],
                                             doc_id=metadata['id'],
                                             doc_type=metadata['doc_type'],
                                             body=jbody,
                                             bulk=bulk)

            except (ElasticsearchBadServer, ElasticsearchInvalidIndex) as e:
                # handle loss of server or race condition with index rotation/creation/aliasing
                try:
                    self.esConnection = esConnect()
                    # message.requeue()
                    return
                except kombu.exceptions.MessageStateError:
                    # state may be already set.
                    return
            except ElasticsearchException as e:
                # exception target for queue capacity issues reported by elastic search so catch the error, report it and retry the message
                try:
                    logger.exception(
                        'ElasticSearchException: {0} reported while indexing event'
                        .format(e))
                    # message.requeue()
                    return
                except kombu.exceptions.MessageStateError:
                    # state may be already set.
                    return

            # message.ack()
        except Exception as e:
            logger.exception(e)
            logger.error('Malformed message body: %r' % body)
示例#2
0
def keyMapping(aDict):
    '''map common key/fields to a normalized structure,
       explicitly typed when possible to avoid schema changes for upsteam consumers
       Special accomodations made for logstash,nxlog, beaver, heka and CEF
       Some shippers attempt to conform to logstash-style @fieldname convention.
       This strips the leading at symbol since it breaks some elastic search
       libraries like elasticutils.
    '''
    returndict = dict()

    returndict['source'] = 'cloudtrail'
    returndict['details'] = {}
    returndict['category'] = 'cloudtrail'
    returndict['processid'] = str(os.getpid())
    returndict['processname'] = sys.argv[0]
    returndict['severity'] = 'INFO'
    if 'sourceIPAddress' in aDict and 'eventName' in aDict and 'eventSource' in aDict:
        summary_str = "{0} performed {1} in {2}".format(
            aDict['sourceIPAddress'],
            aDict['eventName'],
            aDict['eventSource']
        )
        returndict['summary'] = summary_str

    if 'eventName' in aDict:
        # Uppercase first character
        aDict['eventName'] = aDict['eventName'][0].upper() + aDict['eventName'][1:]
        returndict['details']['eventVerb'] = CLOUDTRAIL_VERB_REGEX.findall(aDict['eventName'])[0]
        returndict['details']['eventReadOnly'] = (returndict['details']['eventVerb'] in ['Describe', 'Get', 'List'])
    # set the timestamp when we received it, i.e. now
    returndict['receivedtimestamp'] = toUTC(datetime.now()).isoformat()
    returndict['mozdefhostname'] = options.mozdefhostname
    try:
        for k, v in aDict.iteritems():
            k = removeAt(k).lower()

            if k == 'sourceip':
                returndict[u'details']['sourceipaddress'] = v

            elif k == 'sourceipaddress':
                returndict[u'details']['sourceipaddress'] = v

            elif k == 'facility':
                returndict[u'source'] = v

            elif k in ('eventsource'):
                returndict[u'hostname'] = v

            elif k in ('message', 'summary'):
                returndict[u'summary'] = toUnicode(v)

            elif k in ('payload') and 'summary' not in aDict.keys():
                # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section.
                returndict[u'summary'] = toUnicode(v)
            elif k in ('payload'):
                returndict[u'details']['payload'] = toUnicode(v)

            elif k in ('eventtime', 'timestamp', 'utctimestamp', 'date'):
                returndict[u'utctimestamp'] = toUTC(v).isoformat()
                returndict[u'timestamp'] = toUTC(v).isoformat()

            elif k in ('hostname', 'source_host', 'host'):
                returndict[u'hostname'] = toUnicode(v)

            elif k in ('tags'):
                if 'tags' not in returndict.keys():
                    returndict[u'tags'] = []
                if type(v) == list:
                    returndict[u'tags'] += v
                else:
                    if len(v) > 0:
                        returndict[u'tags'].append(v)

            # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level.
            elif k in ('syslogseverity', 'severity', 'severityvalue', 'level', 'priority'):
                returndict[u'severity'] = toUnicode(v).upper()

            elif k in ('facility', 'syslogfacility'):
                returndict[u'facility'] = toUnicode(v)

            elif k in ('pid', 'processid'):
                returndict[u'processid'] = toUnicode(v)

            # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname
            elif k in ('pname', 'processname', 'sourcename', 'program'):
                returndict[u'processname'] = toUnicode(v)

            # the file, or source
            elif k in ('path', 'logger', 'file'):
                returndict[u'eventsource'] = toUnicode(v)

            elif k in ('type', 'eventtype', 'category'):
                returndict[u'category'] = toUnicode(v)

            # custom fields as a list/array
            elif k in ('fields', 'details'):
                if type(v) is not dict:
                    returndict[u'details'][u'message'] = v
                else:
                    if len(v) > 0:
                        for details_key, details_value in v.iteritems():
                            returndict[u'details'][details_key] = details_value

            # custom fields/details as a one off, not in an array
            # i.e. fields.something=value or details.something=value
            # move them to a dict for consistency in querying
            elif k.startswith('fields.') or k.startswith('details.'):
                newName = k.replace('fields.', '')
                newName = newName.lower().replace('details.', '')
                # add a dict to hold the details if it doesn't exist
                if 'details' not in returndict.keys():
                    returndict[u'details'] = dict()
                # add field with a special case for shippers that
                # don't send details
                # in an array as int/floats/strings
                # we let them dictate the data type with field_datatype
                # convention
                if newName.endswith('_int'):
                    returndict[u'details'][unicode(newName)] = int(v)
                elif newName.endswith('_float'):
                    returndict[u'details'][unicode(newName)] = float(v)
                else:
                    returndict[u'details'][unicode(newName)] = toUnicode(v)
            else:
                returndict[u'details'][k] = v

        if 'utctimestamp' not in returndict.keys():
            # default in case we don't find a reasonable timestamp
            returndict['utctimestamp'] = toUTC(datetime.now()).isoformat()
    except Exception as e:
        logger.exception(e)
        logger.error('Malformed message: %r' % aDict)

    return returndict
示例#3
0
 def save_documents(self, documents):
     try:
         bulk(self.es_connection, documents)
     except BulkIndexError as e:
         logger.error("Error bulk indexing: " + str(e))
示例#4
0
def keyMapping(aDict):
    '''map common key/fields to a normalized structure,
       explicitly typed when possible to avoid schema changes for upsteam consumers
       Special accomodations made for logstash,nxlog, beaver, heka and CEF
       Some shippers attempt to conform to logstash-style @fieldname convention.
       This strips the leading at symbol since it breaks some elastic search
       libraries like elasticutils.
    '''
    returndict = dict()

    # uncomment to save the source event for debugging, or chain of custody/forensics
    # returndict['original']=aDict

    # set the timestamp when we received it, i.e. now
    returndict['receivedtimestamp'] = toUTC(datetime.now()).isoformat()
    returndict['mozdefhostname'] = options.mozdefhostname
    returndict['details'] = {}
    try:
        for k, v in aDict.iteritems():
            k = removeAt(k).lower()

            if k in ('message', 'summary'):
                returndict[u'summary'] = toUnicode(v)

            if k in ('payload') and 'summary' not in aDict.keys():
                # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section.
                returndict[u'summary'] = toUnicode(v)
            elif k in ('payload'):
                returndict[u'details']['payload'] = toUnicode(v)

            if k in ('eventtime', 'timestamp', 'utctimestamp'):
                returndict[u'utctimestamp'] = toUTC(v).isoformat()
                returndict[u'timestamp'] = toUTC(v).isoformat()

            if k in ('hostname', 'source_host', 'host'):
                returndict[u'hostname'] = toUnicode(v)

            if k in ('tags'):
                if len(v) > 0:
                    returndict[u'tags'] = v

            # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level.
            if k in ('syslogseverity', 'severity', 'severityvalue', 'level'):
                returndict[u'severity'] = toUnicode(v).upper()

            if k in ('facility', 'syslogfacility'):
                returndict[u'facility'] = toUnicode(v)

            if k in ('pid', 'processid'):
                returndict[u'processid'] = toUnicode(v)

            # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname
            if k in ('pname', 'processname', 'sourcename'):
                returndict[u'processname'] = toUnicode(v)

            # the file, or source
            if k in ('path', 'logger', 'file'):
                returndict[u'eventsource'] = toUnicode(v)

            if k in ('type', 'eventtype', 'category'):
                returndict[u'category'] = toUnicode(v)

            # custom fields as a list/array
            if k in ('fields', 'details'):
                if type(v) is not dict:
                    returndict[u'details'][u'message'] = v
                else:
                    if len(v) > 0:
                        for details_key, details_value in v.iteritems():
                            returndict[u'details'][details_key] = details_value

            # custom fields/details as a one off, not in an array
            # i.e. fields.something=value or details.something=value
            # move them to a dict for consistency in querying
            if k.startswith('fields.') or k.startswith('details.'):
                newName = k.replace('fields.', '')
                newName = newName.lower().replace('details.', '')
                # add field with a special case for shippers that
                # don't send details
                # in an array as int/floats/strings
                # we let them dictate the data type with field_datatype
                # convention
                if newName.endswith('_int'):
                    returndict[u'details'][unicode(newName)] = int(v)
                elif newName.endswith('_float'):
                    returndict[u'details'][unicode(newName)] = float(v)
                else:
                    returndict[u'details'][unicode(newName)] = toUnicode(v)

        # nxlog windows log handling
        if 'Domain' in aDict.keys() and 'SourceModuleType' in aDict.keys():
            # nxlog parses all windows event fields very well
            # copy all fields to details
            returndict[u'details'][k] = v

        if 'utctimestamp' not in returndict.keys():
            # default in case we don't find a reasonable timestamp
            returndict['utctimestamp'] = toUTC(datetime.now()).isoformat()

    except Exception as e:
        logger.exception('Received exception while normalizing message: %r' %
                         e)
        logger.error('Malformed message: %r' % aDict)
        return None

    return returndict
示例#5
0
    def on_message(self, message):
        try:
            # default elastic search metadata for an event
            metadata = {'index': 'events', 'doc_type': 'event', 'id': None}
            event = {}

            event['receivedtimestamp'] = toUTC(datetime.now()).isoformat()
            event['mozdefhostname'] = self.options.mozdefhostname

            if 'tags' in event:
                event['tags'].extend([self.options.taskexchange])
            else:
                event['tags'] = [self.options.taskexchange]

            event['severity'] = 'INFO'

            # Set defaults
            event['processid'] = ''
            event['processname'] = ''
            event['category'] = 'syslog'

            for message_key, message_value in message.iteritems():
                if 'Message' == message_key:
                    try:
                        message_json = json.loads(message_value)
                        for inside_message_key, inside_message_value in message_json.iteritems(
                        ):
                            if inside_message_key in ('processid', 'pid'):
                                processid = str(inside_message_value)
                                processid = processid.replace('[', '')
                                processid = processid.replace(']', '')
                                event['processid'] = processid
                            elif inside_message_key in ('pname'):
                                event['processname'] = inside_message_value
                            elif inside_message_key in ('hostname'):
                                event['hostname'] = inside_message_value
                            elif inside_message_key in ('time', 'timestamp'):
                                event['timestamp'] = toUTC(
                                    inside_message_value).isoformat()
                                event['utctimestamp'] = toUTC(
                                    event['timestamp']).astimezone(
                                        pytz.utc).isoformat()
                            elif inside_message_key in ('type'):
                                event['category'] = inside_message_value
                            elif inside_message_key in ('payload', 'message'):
                                event['summary'] = inside_message_value
                            else:
                                if 'details' not in event:
                                    event['details'] = {}
                                event['details'][
                                    inside_message_key] = inside_message_value
                    except ValueError:
                        event['summary'] = message_value
            (event, metadata) = sendEventToPlugins(event, metadata,
                                                   self.pluginList)
            # Drop message if plugins set to None
            if event is None:
                return
            self.save_event(event, metadata)
        except Exception as e:
            logger.exception(e)
            logger.error('Malformed message: %r' % message)
示例#6
0
 def save_documents(self, documents):
     try:
         bulk(self.es_connection, documents)
     except BulkIndexError as e:
         logger.error("Error bulk indexing: " + str(e))
示例#7
0
    def on_message(self, body):
        # print("RECEIVED MESSAGE: %r" % (body, ))
        try:
            # default elastic search metadata for an event
            metadata = {
                'index': 'events',
                'doc_type': 'cloudtrail',
                'id': None
            }
            # just to be safe..check what we were sent.
            if isinstance(body, dict):
                bodyDict = body
            elif isinstance(body, str) or isinstance(body, unicode):
                try:
                    bodyDict = json.loads(body)  # lets assume it's json
                except ValueError as e:
                    # not json..ack but log the message
                    logger.error("Unknown body type received %r" % body)
                    return
            else:
                logger.error("Unknown body type received %r\n" % body)
                return

            if 'customendpoint' in bodyDict.keys(
            ) and bodyDict['customendpoint']:
                # custom document
                # send to plugins to allow them to modify it if needed
                (normalizedDict,
                 metadata) = sendEventToPlugins(bodyDict, metadata, pluginList)
            else:
                # normalize the dict
                # to the mozdef events standard
                normalizedDict = keyMapping(bodyDict)

                # send to plugins to allow them to modify it if needed
                if normalizedDict is not None and isinstance(
                        normalizedDict, dict) and normalizedDict.keys():
                    (normalizedDict,
                     metadata) = sendEventToPlugins(normalizedDict, metadata,
                                                    pluginList)

            # drop the message if a plug in set it to None
            # signaling a discard
            if normalizedDict is None:
                return

            # This handles the fact that cloudtrail sends
            # inconsistent data types in the iamInstanceProfile field
            if 'details' in normalizedDict:
                if 'requestparameters' in normalizedDict['details']:
                    if normalizedDict['details'][
                            'requestparameters'] is not None and 'iamInstanceProfile' in normalizedDict[
                                'details']['requestparameters']:
                        iam_instance_profile = normalizedDict['details'][
                            'requestparameters']['iamInstanceProfile']
                        if type(iam_instance_profile) is not dict:
                            normalizedDict['details']['requestparameters'][
                                'iamInstanceProfile'] = {
                                    'name': iam_instance_profile
                                }

            # make a json version for posting to elastic search
            jbody = json.JSONEncoder().encode(normalizedDict)

            try:
                bulk = False
                if options.esbulksize != 0:
                    bulk = True

                bulk = False
                self.esConnection.save_event(index=metadata['index'],
                                             doc_id=metadata['id'],
                                             doc_type=metadata['doc_type'],
                                             body=jbody,
                                             bulk=bulk)

            except (ElasticsearchBadServer, ElasticsearchInvalidIndex) as e:
                # handle loss of server or race condition with index rotation/creation/aliasing
                try:
                    self.esConnection = esConnect()
                    return
                except kombu.exceptions.MessageStateError:
                    # state may be already set.
                    return
            except ElasticsearchException as e:
                # exception target for queue capacity issues reported by elastic search so catch the error, report it and retry the message
                try:
                    logger.exception(
                        'ElasticSearchException: {0} reported while indexing event'
                        .format(e))
                    return
                except kombu.exceptions.MessageStateError:
                    # state may be already set.
                    return
        except Exception as e:
            logger.exception(e)
            logger.error('Malformed message: %r' % body)
示例#8
0
def getQueueSizes():
    logger.debug('starting')
    logger.debug(options)
    es = ElasticsearchClient(options.esservers)
    sqslist = {}
    sqslist['queue_stats'] = {}
    qcount = len(options.taskexchange)
    qcounter = qcount - 1
    try:
        # meant only to talk to SQS using boto
        # and return queue attributes.a

        mqConn = boto.sqs.connect_to_region(
            options.region,
            aws_access_key_id=options.accesskey,
            aws_secret_access_key=options.secretkey)

        while qcounter >= 0:
            for exchange in options.taskexchange:
                logger.debug('Looking for sqs queue stats in queue' + exchange)
                eventTaskQueue = mqConn.get_queue(exchange)
                # get queue stats
                taskQueueStats = eventTaskQueue.get_attributes('All')
                sqslist['queue_stats'][qcounter] = taskQueueStats
                sqslist['queue_stats'][qcounter]['name'] = exchange
                qcounter -= 1
    except Exception as e:
        logger.error("Exception %r when gathering health and status " % e)

    # setup a log entry for health/status.
    sqsid = '{0}-{1}'.format(options.account, options.region)
    healthlog = dict(utctimestamp=toUTC(datetime.now()).isoformat(),
                     hostname=sqsid,
                     processid=os.getpid(),
                     processname=sys.argv[0],
                     severity='INFO',
                     summary='mozdef health/status',
                     category='mozdef',
                     source='aws-sqs',
                     tags=[],
                     details=[])
    healthlog['details'] = dict(username='******')
    healthlog['details']['queues'] = list()
    healthlog['details']['total_messages_ready'] = 0
    healthlog['details']['total_feeds'] = qcount
    healthlog['tags'] = ['mozdef', 'status', 'sqs']
    ready = 0
    qcounter = qcount - 1
    for q in sqslist['queue_stats'].keys():
        queuelist = sqslist['queue_stats'][qcounter]
        if 'ApproximateNumberOfMessages' in queuelist:
            ready1 = int(queuelist['ApproximateNumberOfMessages'])
            ready = ready1 + ready
            healthlog['details']['total_messages_ready'] = ready
        if 'ApproximateNumberOfMessages' in queuelist:
            messages = int(queuelist['ApproximateNumberOfMessages'])
        if 'ApproximateNumberOfMessagesNotVisible' in queuelist:
            inflight = int(queuelist['ApproximateNumberOfMessagesNotVisible'])
        if 'ApproximateNumberOfMessagesDelayed' in queuelist:
            delayed = int(queuelist['ApproximateNumberOfMessagesDelayed'])
        if 'name' in queuelist:
            name = queuelist['name']
        queueinfo = dict(queue=name,
                         messages_delayed=delayed,
                         messages_ready=messages,
                         messages_inflight=inflight)
        healthlog['details']['queues'].append(queueinfo)
        qcounter -= 1
    # post to elasticsearch servers directly without going through
    # message queues in case there is an availability issue
    es.save_event(index=options.index,
                  doc_type='mozdefhealth',
                  body=json.dumps(healthlog))
    # post another doc with a static docid and tag
    # for use when querying for the latest sqs status
    healthlog['tags'] = ['mozdef', 'status', 'sqs-latest']
    es.save_event(index=options.index,
                  doc_type='mozdefhealth',
                  doc_id=getDocID(sqsid),
                  body=json.dumps(healthlog))
示例#9
0
    def run(self):
        # Boto expects base64 encoded messages - but if the writer is not boto it's not necessarily base64 encoded
        # Thus we've to detect that and decode or not decode accordingly
        self.taskQueue.set_message_class(RawMessage)
        while True:
            try:
                records = self.taskQueue.get_messages(options.prefetch)
                for msg in records:
                    # msg.id is the id,
                    # get_body() should be json

                    # pre process the message a bit
                    tmp = msg.get_body()
                    try:
                        msgbody = json.loads(tmp)
                    except ValueError:
                        # If Boto wrote to the queue, it might be base64 encoded, so let's decode that
                        try:
                            tmp = base64.b64decode(tmp)
                            msgbody = json.loads(tmp)
                        except Exception as e:
                            logger.error(
                                'Invalid message, not JSON <dropping message and continuing>: %r'
                                % msg.get_body())
                            self.taskQueue.delete_message(msg)
                            continue

                    # If this is still not a dict,
                    # let's just drop the message and move on
                    if type(msgbody) is not dict:
                        logger.debug(
                            "Message is not a dictionary, dropping message.")
                        self.taskQueue.delete_message(msg)
                        continue

                    event = dict()
                    event = msgbody

                    # Was this message sent by fluentd-sqs
                    fluentd_sqs_specific_fields = {
                        'az', 'instance_id', '__tag'
                    }
                    if fluentd_sqs_specific_fields.issubset(set(
                            msgbody.keys())):
                        # Until we can influence fluentd-sqs to set the
                        # 'customendpoint' key before submitting to SQS, we'll
                        # need to do it here
                        # TODO : Change nubis fluentd output to include
                        # 'customendpoint'
                        event['customendpoint'] = True

                    if 'tags' in event:
                        event['tags'].extend([options.taskexchange])
                    else:
                        event['tags'] = [options.taskexchange]

                    # process message
                    self.on_message(event, msg)

                    # delete message from queue
                    self.taskQueue.delete_message(msg)
                time.sleep(.1)

            except ValueError as e:
                logger.exception('Exception while handling message: %r' % e)
                self.taskQueue.delete_message(msg)
            except (SSLEOFError, SSLError, socket.error):
                logger.info('Received network related error...reconnecting')
                time.sleep(5)
                self.connection, self.taskQueue = connect_sqs(
                    options.region, options.accesskey, options.secretkey,
                    options.taskexchange)
                self.taskQueue.set_message_class(RawMessage)