Exemplo n.º 1
0
class StrptimeFindCommand(StreamingCommand):
    """ Finds a strptime value in a string

    ##Syntax

    .. code-block::
        strptimefind fieldname=<field> pattern=<valid_strptime_string> outputfield=<field>

    ##Description

    Uses multiple methods to find a valid time from within the string from the field <fieldname>

    ##Example

    From the first 35 characters of this line, find a valid time.

    .. code-block::
        | makeresults | eval line="Tue Oct 30 16:58:50 EDT 2018 This i", strptime_string="%a %b %d %H:%M:%S %Z %Y" | strptimefind fieldname=line pattern=strptime_string outputfield=found_time

    """
    fieldname = Option(doc='''
        **Syntax:** **fieldname=***<fieldname>*
        **Description:** Name of the field that holds the text to be evaluated''',
                       require=True,
                       validate=validators.Fieldname())

    pattern = Option(doc='''
        **Syntax:** **pattern=***<valid_strptime_string>*
        **Description:** A valid strptime string''',
                     require=True)

    outputfield = Option(doc='''
        **Syntax:** **outputfield=***<fieldname>*
        **Description:** Name of the field that will hold the found time''',
                         require=True,
                         validate=validators.Fieldname())

    def stream(self, records):
        self.logger.debug('StrptimeFindCommand: %s', self)  # logs command line
        pattern = self.pattern
        for record in records:
            count = 0
            datetime_str_orig = record[self.fieldname]
            valid_strptime_string = record[self.pattern]
            datetime_object = 0
            limit = len(valid_strptime_string)
            while len(datetime_str_orig) > limit:
                datetime_str = datetime_str_orig
                while len(datetime_str) > limit:
                    try:
                        datetime_object = datetime.strptime(
                            datetime_str, valid_strptime_string)
                        break
                    except:
                        datetime_str = datetime_str[:-1]
                datetime_str_orig = datetime_str_orig[1:]
            if datetime_object:
                record[self.outputfield] = time.mktime(
                    datetime_object.timetuple())
            yield record
Exemplo n.º 2
0
class DurationByHour(StreamingCommand):

    field_starttime = Option(require=True,
                             validate=validators.Fieldname(),
                             doc='''
    **Syntax:** **field=***<field>*
        **Description:** define column which contain starttime''')
    field_duration = Option(require=True,
                            validate=validators.Fieldname(),
                            doc='''
    **Syntax:** **field=***<field>*
        **Description:** define column which contain duration''')
    result = Option(require=False,
                    validate=validators.Fieldname(),
                    default="Timestamp_Duration",
                    doc='''
    **Syntax:** **result=***<result>*
        **Description:** define column name of result column''')

    def stream(self, events):
        for event in events:
            time_duration = self.durationByHour(
                int(event[self.field_starttime]),
                int(event[self.field_duration]))
            event[self.result] = time_duration
            yield event

    def durationByHour(self, field_starttime, field_duration):

        starttime = datetime.datetime.fromtimestamp(field_starttime)
        duration = datetime.timedelta(seconds=field_duration)
        endtime = starttime + duration
        current = starttime
        # Set next_current to the next hour-aligned datetime
        next_current = (starttime + datetime.timedelta(hours=1)).replace(
            minute=0, second=0)
        result = []
        if (next_current > endtime):
            time = str(next_current - datetime.timedelta(hours=1))
            duration = str(int((endtime - current).total_seconds()))
            result.append(time + '_' + duration)
        # Grab the start block (that ends on an hour alignment)
        # and then any full-hour blocks
        while next_current <= endtime:
            if (starttime == current):
                time = str(next_current - datetime.timedelta(hours=1))
            else:
                time = str(current)
            duration = str(int((next_current - current).total_seconds()))

            result.append(time + '_' + duration)
            # Advance both current and next_current to the following hour-aligned spots
            current = next_current
            if (next_current == endtime):
                break
            if ((next_current + datetime.timedelta(hours=1)) > endtime):
                next_current = endtime
            else:
                next_current += datetime.timedelta(hours=1)
        return (result)
Exemplo n.º 3
0
class UpdateAlertsCommand(StreamingCommand):

    json = Option(
        doc='''
        **Syntax:** **json=***<field>*
        **Description:** Field name that contains the alert as a json string''',
        require=False, validate=validators.Fieldname())

    key = Option(
        doc='''
        **Syntax:** **key=***<field>*
        **Description:** The internal key of the alert''',
        require=False, validate=validators.Fieldname())

    status = Option(
        doc='''
        **Syntax:** **status=***<string>*
        **Description:** The new status''',
        require=False)

    action = Option(
        doc='''
        **Syntax:** **action=***<string>*
        **Description:** The action''',
        require=False)

    notes = Option(
        doc='''
        **Syntax:** **notes=***<string>*
        **Description:** Optional notes to be added to the work log''',
        require=False)

    alerts = None

    def stream(self, records):
        self.logger.info('UpdateAlertsCommand: %s', self)  # logs command line
        #self.logger.info('SEARCHINFO %s', self._metadata.searchinfo)
        if not self.alerts:
            self.alerts = AlertCollection(self._metadata.searchinfo.session_key)

        for record in records:
            if self.json and self.json in record:
                self.alerts.replace(json.loads(record[self.json]),
                    notes = self.notes,
                    logger=self.logger,
                    sid=self._metadata.searchinfo.sid,
                    username=self._metadata.searchinfo.username)
            elif self.action and self.status and self.key and self.key in record:
                self.alerts.update(record[self.key], action=self.action, status=self.status, notes=self.notes,
                    logger=self.logger,
                    sid=self._metadata.searchinfo.sid,
                    username=self._metadata.searchinfo.username)
            else:
                self.logger.error('json field should be present OR the key field, action value and status value should be provided')

            yield record
Exemplo n.º 4
0
class FieldsToJsonCommand(StreamingCommand):
    json = Option(
        doc='''
        **Syntax:** **json=***<field>*
        **Description:** Field name that receives the json string''',
        require=True, validate=validators.Fieldname())
    prefix = Option(
        doc='''
        **Syntax:** **prefix=***<string>*
        **Description:** Any field that is prefixed with this string is serialized.''',
        require=True)

    def stream(self, records):
        self.logger.info('FieldsToJsonCommand: %s', self)  # logs command line
        for record in records:
            json_obj = {}
            for key, value in record.iteritems():
                if key[2:].startswith(self.prefix):
                    tp = key[0]
                    actual_key = key[(2+len(self.prefix)):]
                    if tp == 's':
                        json_obj[actual_key] = value
                    elif tp == 'l':
                        json_obj[actual_key] = long(value)
                    elif tp == 'i':
                        json_obj[actual_key] = int(value)
                    elif tp == 'f':
                        json_obj[actual_key] = float(value)
                    elif tp == 'j':
                        json_obj[actual_key] = json.loads(value)
                    elif tp == 'a':
                        json_obj[actual_key] = [ json.loads(v) for v in value ]
            record[self.json] = json.dumps(json_obj)
            yield record
Exemplo n.º 5
0
class FixNameCommand(StreamingCommand):
    """ Takes the first letter of each word in the field and capitalizes it

    ##Syntax

    .. code-block::
        fixname fieldname=<field>



    Takes the first letter of each word in the field and capitalizes it

    ##Example

    Uppercase the first letter of each word in the message field in the _internal index

    .. code-block::
        index=_internal | head 20 | fixname fieldname=message

    """
    fieldname = Option(doc='''
        **Syntax:** **fieldname=***<fieldname>*
        **Description:** Name of the field that will be capitalized''',
                       require=True,
                       validate=validators.Fieldname())

    def stream(self, records):
        self.logger.debug('FixNameCommand: %s' % self)  # logs command line
        for record in records:
            record[self.fieldname] = record[self.fieldname].title()
            yield record
Exemplo n.º 6
0
class HybridAnalysisStreamingCommand(StreamingCommand):
    file_hash = Option(doc='''
        **Syntax:** **file_hash=***<file_hash>*
        **Description:** This field contains the file hash you want to search''',
                       require=True,
                       validate=validators.Fieldname())

    def prepare(self):
        """
        Called by splunkd before the command executes.
        Used to get configuration data for this command from splunk.
        :return: None
        https://gitlab.com/adarma_public_projects/splunk/TA-VirusTotal/-/blob/master/bin/virustotal.py
        """
        global API_KEY

        # Get the API key from Splunkd's REST API
        # Also get proxy password if configured
        for passwd in self.service.storage_passwords:  # type: StoragePassword
            if (passwd.realm is None or passwd.realm.strip()
                    == "") and passwd.username == "hybridanalysis":
                API_KEY = passwd.clear_password

        # Verify we got the key
        if API_KEY is None or API_KEY == "defaults_empty":
            self.error_exit(
                None,
                "No API key found for HybridAnalysis. Re-run the app setup for the TA."
            )

    def stream(self, records):
        for record in records:
            record['hash_result'] = get_threat_score(API_KEY,
                                                     record[self.file_hash])
            yield record
Exemplo n.º 7
0
class GetAddressCommand(StreamingCommand):
    """ Counts the number of non-overlapping matches to a regular expression in a set of fields.

    ##Syntax

    .. code-block::
        get_address field=<field-to-convert>

    ##Description

    Using python, converts raw string decimal into legitimate hex fields without letter representation.

    ##Example

    Convert, using python, raw strings for decimals representations into their hexadecimal values.

    .. code-block::
        | <base search> | get_address field=<field-to-convert>

    """
    field = Option(doc='''
        **Syntax:** **fieldname=***<fieldname>*
        **Description:** Name of the field that will hold the match count''',
                   require=True,
                   validate=validators.Fieldname())

    def stream(self, records):
        self.logger.debug('CountMatchesCommand: %s', self)  # logs command line
        for record in records:
            record[str(self.field + "_raw")] = str(hex(int(
                record[self.field])))
            yield record
Exemplo n.º 8
0
class functCommand(StreamingCommand):

    fieldname = Option(
        doc='''
        **Syntax:** **fieldname=***<fieldname>*
        **Description:** Name of the field that will hold the match count''',
        require=True, validate=validators.Fieldname())

    char_limit = Option(
        doc='''
        **Syntax:** **char_limit=***<positive int>*
        **Description:** Determines how many characters in a field to process. Default is 150''',
        require=False, validate=validators.Integer(maximum=10000), default=150)

    def stream(self, records):
        self.logger.debug('functCommand: %s', self)  # logs command line
        for record in records:
            for fieldname in self.fieldnames:
		char_limit = self.char_limit
		x = record[fieldname][0:char_limit]
		if re.search(r'\W{1}', record[fieldname]):
    		   x = re.sub(r'\w', "", x)
    		   x = re.sub(r'\s', "_", x)
    		   record[self.fieldname] = x
  		else:
    		   x = re.sub(r'[B-Z]', "A", x)
    		   x = re.sub(r'[b-z]', "a", x)
    		   x = re.sub(r'[0-8]', "9", x)
		   x = re.sub(r'\s', "w", x)
		   record[self.fieldname] = x
            yield record
Exemplo n.º 9
0
class DecodeHeaderCommand(StreamingCommand):
    """ Decode an smtp header using python's email.header.decode_header function.

    ##Syntax

    .. code-block::
        decodeheader inputfield=<field> outputfield=<field>

    ##Description

    If inputfield exists, it's value is decoded as an encoded email header and stored in outputfield.
    Event records are otherwise passed through to the next pipeline processor unmodified.

    ##Example

    Decode `subject` and stored in `decoded_subject`.

    .. code-block::
        | decodeheader inputfield=subject outputfield=decoded_subject

    """
    inputfield = Option(doc='''
        **Syntax:** **inputfield=***<fieldname>*
        **Description:** Name of the field that holds the header value''',
                        require=True,
                        validate=validators.Fieldname())

    outputfield = Option(doc='''
        **Syntax:** **outputfield=***<fieldname>*
        **Description:** Name of the field that will hold the decoded header value''',
                         require=True,
                         validate=validators.Fieldname())

    def stream(self, records):
        self.logger.debug('DecodeHeaderCommand: %s', self)  # logs command line
        default_charset = 'ASCII'
        for record in records:
            if self.inputfield in record:
                try:
                    dh = header.decode_header(record[self.inputfield])
                    s = ''.join(
                        [unicode(t[0], t[1] or default_charset) for t in dh])
                    record[self.outputfield] = s
                except Exception as e:
                    record[self.outputfield + '_err'] = str(e)
                yield record
Exemplo n.º 10
0
class CommunityIDStreamingCommand(StreamingCommand):
    src_ip = Option(doc='''
        **Syntax:** **file_hash=***<file_hash>*
        **Description:** This field contains the file hash you want to search''',
                    require=True,
                    validate=validators.Fieldname())

    src_port = Option(doc='''
        **Syntax:** **file_hash=***<file_hash>*
        **Description:** This field contains the file hash you want to search''',
                      require=True,
                      validate=validators.Fieldname())

    dest_ip = Option(doc='''
        **Syntax:** **file_hash=***<file_hash>*
        **Description:** This field contains the file hash you want to search''',
                     require=True,
                     validate=validators.Fieldname())

    dest_port = Option(doc='''
        **Syntax:** **file_hash=***<file_hash>*
        **Description:** This field contains the file hash you want to search''',
                       require=True,
                       validate=validators.Fieldname())

    protocol = Option(doc='''
        **Syntax:** **protocol=***<field name that contains protocol>*
        **Description:** This field contains the file hash you want to search''',
                      require=True,
                      validate=validators.Fieldname())

    def stream(self, records):
        for record in records:
            src_ip = record[self.src_ip]
            src_port = record[self.src_port]
            dest_ip = record[self.dest_ip]
            dest_port = record[self.dest_port]
            protocol = record[self.protocol]
            cid = generate_community_id(src_ip, src_port, dest_ip, dest_port,
                                        protocol)

            record['community_id'] = cid
            yield record
class StubbedReportingCommand(ReportingCommand):
    boolean = Option(
        doc='''
        **Syntax:** **boolean=***<value>*
        **Description:** A boolean value''',
        require=False, validate=validators.Boolean())

    duration = Option(
        doc='''
        **Syntax:** **duration=***<value>*
        **Description:** A length of time''',
        validate=validators.Duration())

    fieldname = Option(
        doc='''
        **Syntax:** **fieldname=***<value>*
        **Description:** Name of a field''',
        validate=validators.Fieldname())

    file = Option(
        doc='''
        **Syntax:** **file=***<value>*
        **Description:** Name of a file''',
        validate=validators.File(mode='r'))

    integer = Option(
        doc='''
        **Syntax:** **integer=***<value>*
        **Description:** An integer value''',
        validate=validators.Integer())

    optionname = Option(
        doc='''
        **Syntax:** **optionname=***<value>*
        **Description:** The name of an option (used internally)''',
        validate=validators.OptionName())

    regularexpression = Option(
        doc='''
        **Syntax:** **regularexpression=***<value>*
        **Description:** Regular expression pattern to match''',
        validate=validators.RegularExpression())

    set = Option(
        doc='''
        **Syntax:** **set=***<value>*
        **Description:** Regular expression pattern to match''',
        validate=validators.Set("foo", "bar", "test"))

    @Configuration()
    def map(self, records):
        pass

    def reduce(self, records):
        pass
Exemplo n.º 12
0
class blahCommand(ReportingCommand):
    child = Option(
        doc='''
        **Syntax:** **child=***<fieldname>*
        **Description:** Name of the field that holds the child''',
        require=True, validate=validators.Fieldname())
    parent = Option(
        doc='''
        **Syntax:** **parent=***<fieldname>*
        **Description:** Name of the field that holds the parent''',
        require=True, validate=validators.Fieldname())
    bfs_path = Option(
        doc='''
        **Syntax:** **parent=***<fieldname>*
        **Description:** Name of the field that will hold the computed bfs_path''',
        require=False, default="bfs_path", validate=validators.Fieldname())
    bfs_count = Option(
        doc='''
        **Syntax:** **parent=***<fieldname>*
        **Description:** Name of the field that will hold the computed bfs count''',
        require=False, default="bfs_count", validate=validators.Fieldname())

    @Configuration()
    def map(self, records):
      child = self.child
      parent = self.parent
      graph = nx.Graph()
      for item in records:
        graph.add_edge(item[child], item[parent])
      yield { 'json_data': nx.node_link_data(graph) }

    def reduce(self, records):
      bfs_path = self.bfs_path
      bfs_count = self.bfs_count
      for item in records:
        graph_j = json.loads(item['json_data'])
        graph = nx.Graph()
        for tup in graph_j['links']:
           graph.add_edge(tup['source'], tup['target'])
        for node in graph_j['nodes']:
           bfs = list(nx.bfs_tree(graph, node['id']))
           yield { bfs_path: bfs, bfs_count: len(bfs) }
Exemplo n.º 13
0
class nxBfsCommand(StreamingCommand):
    child = Option(doc='''
        **Syntax:** **child=***<fieldname>*
        **Description:** Name of the field that holds the child''',
                   require=True,
                   validate=validators.Fieldname())

    parent = Option(doc='''
        **Syntax:** **parent=***<fieldname>*
        **Description:** Name of the field that holds the parent''',
                    require=True,
                    validate=validators.Fieldname())

    bfs_path = Option(doc='''
        **Syntax:** **parent=***<fieldname>*
        **Description:** Name of the field that will hold the computed bfs_path''',
                      require=False,
                      default="bfs_path",
                      validate=validators.Fieldname())

    bfs_count = Option(doc='''
        **Syntax:** **parent=***<fieldname>*
        **Description:** Name of the field that will hold the computed bfs count''',
                       require=False,
                       default="bfs_count",
                       validate=validators.Fieldname())

    @Configuration()
    def stream(self, records):
        self.logger.debug('nxbfs: %s', self)
        child = self.child
        parent = self.parent
        graph = nx.Graph()

        for item in records:
            graph.add_edge(item[child], item[parent])
            bfs_path = list(nx.bfs_tree(graph, item[parent]))
            item[self.bfs_path] = bfs_path
            item[self.bfs_count] = len(bfs_path)
            yield item
Exemplo n.º 14
0
class SumCommand(ReportingCommand):
    """ Computes the sum of a set of fields.

    ##Syntax

    .. code-block::
        sum total=<field> <field-list>

    ##Description:

    The total produced is sum(sum(fieldname, 1, n), 1, N) where n = number of fields, N = number of records.

    ##Example

    ..code-block::
        index = _internal | head 200 | sum total=lines linecount

    This example computes the total linecount in the first 200 records in the
    :code:`_internal index`.

    """
    total = Option(doc='''
        **Syntax:** **total=***<fieldname>*
        **Description:** Name of the field that will hold the computed sum''',
                   require=True,
                   validate=validators.Fieldname())

    @Configuration()
    def map(self, records):
        """ Computes sum(fieldname, 1, n) and stores the result in 'total' """
        self.logger.debug('SumCommand.map')
        fieldnames = self.fieldnames
        total = 0.0
        for record in records:
            for fieldname in fieldnames:
                total += float(record[fieldname])
        yield {self.total: total}

    def reduce(self, records):
        """ Computes sum(total, 1, N) and stores the result in 'total' """
        self.logger.debug('SumCommand.reduce')
        fieldname = self.total
        total = 0.0
        for record in records:
            value = record[fieldname]
            try:
                total += float(value)
            except ValueError:
                self.logger.debug('  could not convert %s value to float: %s',
                                  fieldname, repr(value))
        yield {self.total: total}
Exemplo n.º 15
0
class CountMatchesCommand(StreamingCommand):
    """ Counts the number of non-overlapping matches to a regular expression in
    a set of fields.

    ##Syntax

    .. code-block::
        countmatches fieldname=<field> pattern=<regular_expression> <field-list>

    ##Description

    A count of the number of non-overlapping matches to the regular expression
    specified by `pattern` is computed for each record processed. The result
    is stored in the field specified by `fieldname`. If `fieldname` exists,
    its value is replaced. If `fieldname` does not exist, it is created.
    Event records are otherwise passed through to the next pipeline processor
    unmodified.

    ##Example

    Count the number of words in the `text` of each tweet in tweets.csv and
    store the result in `word_count`.

    .. code-block::
        | inputcsv tweets.csv | countmatches fieldname=word_count
        pattern="\\w+" text

    """
    fieldname = Option(doc='''
        **Syntax:** **fieldname=***<fieldname>*
        **Description:** Name of the field that will hold the match count''',
                       require=True,
                       validate=validators.Fieldname())

    pattern = Option(doc='''
        **Syntax:** **pattern=***<regular-expression>*
        **Description:** Regular expression pattern to match''',
                     require=True,
                     validate=validators.RegularExpression())

    def stream(self, records):
        self.logger.debug('CountMatchesCommand: %s' %
                          self)  # logs command line
        for record in records:
            count = 0.0
            for fieldname in self.fieldnames:
                matches = self.pattern.finditer(str(record[fieldname]))
                count += len(list(matches))
            record[self.fieldname] = count
            yield record
Exemplo n.º 16
0
class Vader(StreamingCommand):
    """ Returns sentiment score between -1 and 1, can also return detailed sentiment values.

    ##Syntax

    .. code-block::
        vader textfield=<field>

    ##Description

    Sentiment analysis using Valence Aware Dictionary and sEntiment Reasoner
    Using option full_output will return scores for neutral, positive, and negative which
    are the scores that make up the compound score (that is just returned as the field
    "sentiment". Best to feed in uncleaned data as it takes into account capitalization
    and punctuation.

    ##Example

    .. code-block::
        * | vader textfield=sentence
    """

    textfield = Option(require=True,
                       doc='''
        **Syntax:** **textfield=***<fieldname>*
        **Description:** Name of the field that will contain the text to search against''',
                       validate=validators.Fieldname())

    full_output = Option(default=False,
                         doc='''
        **Syntax:** **full_output=***<fieldname>*
        **Description:** If true, returns full sentiment values--neutral, positive, and negative--otherwise only compound is reutrned''',
                         validate=validators.Boolean())

    def stream(self, records):
        sentiment_analyzer = SentimentIntensityAnalyzer()
        for record in records:
            polarity = sentiment_analyzer.polarity_scores(
                record[self.textfield])
            record['sentiment'] = polarity['compound']
            if self.full_output:
                record['sentiment_neutral'] = polarity['neu']
                record['sentiment_negative'] = polarity['neg']
                record['sentiment_positive'] = polarity['pos']

            yield record
Exemplo n.º 17
0
class SumCommand(ReportingCommand):
    """ Computes the sum of a set of fields.

    ##Syntax

    .. code-block::
        sum total=<field> <field-list>

    ##Description:

    The total produced is sum(sum(fieldname, 1, n), 1, N) where n = number of
    fields, N = number of records.

    ##Example

    ..code-block::
        index = _internal | head 200 | sum total=lines linecount

    This example computes the total linecount in the first 200 records in the
    :code:`_internal index`.

    """
    total = Option(doc='''
        **Syntax:** **total=***<fieldname>*
        **Description:** Name of the field that will hold the computed sum''',
                   require=True,
                   validate=validators.Fieldname())

    @Configuration(clear_required_fields=True)
    def map(self, records):
        """ Computes sum(fieldname, 1, n) and stores the result in 'total' """
        total = 0.0
        for record in records:
            for fieldname in self.fieldnames:
                total += float(record[fieldname])
        yield {self.total: total}

    def reduce(self, records):
        """ Computes sum(total, 1, N) and stores the result in 'total' """
        total = 0.0
        for record in records:
            total += float(record[self.total])
        yield {self.total: total}
Exemplo n.º 18
0
class Splongo(GeneratingCommand):
    hostname = Option(doc='''
        **Syntax:** **hostname=***<hostname>*
        **Description:** MongoDB host to connect to
        ''',
                      require=True,
                      validate=validators.Fieldname())
    db = Option(doc='''
        **Syntax:** **db=***<db>*
        **Description:** MongoDB database to connect to
        ''',
                require=True)
    collection = Option(doc='''
        **Syntax:** **collection=***<collection>*
        **Description:** MongoDB collection to connect to
        ''')

    def generate(self):
        client = MongoClient(self.hostname)
        coll = client[self.db][self.collection]

        try:
            query = self.fieldnames[0]
            formatted_query = query.replace(
                "'", '"')  # replace single qoutes with double qoutes
            json_query = json.loads(formatted_query)
        except Exception as e:
            raise SyntaxError("could not parse raw query ")

        data = coll.find(json_query)
        for doc in data:
            doc = self._format_result(doc)
            yield {'_raw': json.dumps(doc)}

    def _format_result(self, doc):
        """
        need to stringify object id
        """
        for key, value in doc.items():
            if type(value).__name__ == 'ObjectId':
                doc[key] = str(value)
        return doc
class DeleteAlertsCommand(StreamingCommand):

    key = Option(
        doc='''
        **Syntax:** **key=***<field>*
        **Description:** The internal key of the alert''',
        require=True, validate=validators.Fieldname())

    alerts = None

    def stream(self, records):
        self.logger.info('DeleteAlertsCommand: %s', self)  # logs command line
        if not self.alerts:
            self.alerts = AlertCollection(self._metadata.searchinfo.session_key)

        for record in records:
            if self.key in record:
                self.alerts.delete(record[self.key], logger=self.logger)
            else:
                self.logger.error('DeleteAlertsCommand: no key field %s', str(self.json))  # logs command line
            yield record
Exemplo n.º 20
0
class whoisCommand(StreamingCommand):
    lookupfield = Option(doc='''
        **Syntax:** **lookupfield=***<fieldname>*
        **Description:** Name of the field to perform whois lookup on''',
                         require=True,
                         validate=validators.Fieldname())

    def stream(self, records):
        self.logger.debug('WhoisLookup: %s', self)  # logs command line
        for record in records:
            whois_result = {}
            new_whois_result = {}
            request = caching_whois_proxy + record[self.lookupfield]
            resp = requests.get(request)
            whois_result = json.loads(resp.text)
            if whois_result is not None:
                for k, v in whois_result.iteritems():
                    ## Iterate through every field returned by the whois lookup, prepend them whois_ before adding to splunk result
                    nk = "whois_" + k
                    new_whois_result[nk] = v
                record.update(new_whois_result)
            yield record
Exemplo n.º 21
0
class URLCheckerCommand(StreamingCommand):

    field = Option(require=True, default=True, validate=validators.Fieldname())
    strictness = Option(require=False,
                        default=0,
                        validate=validators.Integer())

    def stream(self, records):
        logger = setup_logging()

        correct_records = []
        incorrect_records = []
        for record in records:
            if self.field in record:
                correct_records.append(record)
            else:
                incorrect_records.append(record)

        if len(incorrect_records) > 0:
            self.logger.error('url field missing from ' +
                              str(len(incorrect_records)) +
                              " events. They will be ignored.")

        if len(correct_records) > 0:
            storage_passwords = self.service.storage_passwords
            for credential in storage_passwords:
                if credential.content.get('realm') != 'ipqualityscore_realm':
                    continue
                usercreds = {
                    'username': credential.content.get('username'),
                    'password': credential.content.get('clear_password')
                }
            if usercreds is not None:
                ipqualityscoreclient = IPQualityScoreClient(
                    usercreds.get('password'), logger)

                links = []
                rs = []
                for record in correct_records:
                    links.append(record.get(self.field))
                    rs.append(record)

                results_dict = ipqualityscoreclient.url_checker_multithreaded(
                    links, strictness=self.strictness)

                for record in rs:
                    detection_result = results_dict.get(record[self.field])

                    if detection_result is not None:
                        for key, val in detection_result.items():
                            new_key = ipqualityscoreclient.get_prefix(
                            ) + "_" + key
                            record[new_key] = val
                        record[ipqualityscoreclient.get_prefix() +
                               "_status"] = 'api call success'
                    else:
                        record[ipqualityscoreclient.get_prefix() +
                               "_status"] = 'api call failed'

                    yield record
            else:
                raise Exception("No credentials have been found")
        else:
            raise Exception("There are no events with url field.")
Exemplo n.º 22
0
class StreamFilterWildcardCommand(StreamingCommand):
    """ Returns a field with a list of non-overlapping matches to a wildcard pattern in a set of fields.

    ##Syntax

    .. code-block::
        StreamFilterWildcardCommand fieldname=<field> pattern=<field containing wildcard pattern> <field-list>

    ##Description

    Returns the non-overlapping matches to the wildcard pattern contained in the field specified by `pattern`
    The result is stored in the field specified by `fieldname`. If `fieldname` exists, its value
    is replaced. If `fieldname` does not exist, it is created. Event records are otherwise passed through to the next
    pipeline processor unmodified.

    ##Example

    Return the wildcard pattern matches in the `text` field (field named text) of each tweet in tweets.csv and store the result in `word_count`.

    .. code-block::
        | inputlookup tweets | eval pattern="\\w+" | streamfilter fieldname=word_count pattern=pattern text

    """
    fieldname = Option(doc='''
        **Syntax:** **fieldname=***<fieldname>*
        **Description:** Name of the field that will hold the match count''',
                       require=True,
                       validate=validators.Fieldname())

    pattern = Option(doc='''
        **Syntax:** **pattern=***<fieldname>* 
        **Description:** Field name containing the wildcard pattern pattern to match''',
                     require=True,
                     validate=validators.Fieldname())

    #Filter the data based on the passed in Wildcard pattern, this function exists so we can handle mutli-value pattern fields
    def thefilter(self, record, pattern):
        values = ""
        for fieldname in self.fieldnames:
            #multivalue fields come through as a list, iterate through the list and run the Wildcard against each entry
            #in the multivalued field
            if not fieldname in record:
                continue
            if isinstance(record[fieldname], list):
                for aRecord in record[fieldname]:
                    matches = pattern.findall(six.ensure_str(aRecord))
                    for match in matches:
                        values = values + " " + match
            else:
                matches = pattern.findall(six.ensure_str(record[fieldname]))
                for match in matches:
                    values = values + " " + match
        return values

    #Change a wildcard pattern to a Wildcard pattern
    def changeToWildcard(self, pattern):
        pattern = pattern.replace("\"", "")
        pattern = pattern.replace("'", "")
        pattern = pattern.replace("*", ".*")
        if pattern.find(".*") == 0:
            pattern = "[^_].*" + pattern[2:]
        pattern = "(?i)^" + pattern + "$"
        return pattern

    #Streaming command to work with each record
    def stream(self, records):
        self.logger.debug('StreamFilterWildcardCommand: %s',
                          self)  # logs command line
        for record in records:
            values = ""
            pattern = self.pattern
            if pattern not in record:
                self.logger.warn(
                    "StreamFilterWildcardCommand: pattern field is %s but cannot find this field"
                    % (pattern), self)
                sys.exit(-1)
            if isinstance(record[pattern], list):
                for aPattern in record[pattern]:
                    pattern = re.compile(self.changeToWildcard(aPattern))
                    values = values + self.thefilter(record, pattern)
            else:
                pattern = re.compile(self.changeToWildcard(record[pattern]))
                values = values + self.thefilter(record, pattern)

            record[self.fieldname] = values
            yield record
Exemplo n.º 23
0
class MispSearchCommand(StreamingCommand):
    """ search in MISP for attributes matching the value of field.

    ##Syntax

        code-block::
        mispsearch field=<field> onlyids=y|n

    ##Description

        body =  {
                    "returnFormat": "mandatory",
                    "page": "optional",
                    "limit": "optional",
                    "value": "optional",
                    "type": "optional",
                    "category": "optional",
                    "org": "optional",
                    "tags": "optional",
                    "from": "optional",
                    "to": "optional",
                    "last": "optional",
                    "eventid": "optional",
                    "withAttachments": "optional",
                    "uuid": "optional",
                    "publish_timestamp": "optional",
                    "timestamp": "optional",
                    "enforceWarninglist": "optional",
                    "to_ids": "optional",
                    "deleted": "optional",
                    "includeEventUuid": "optional",
                    "includeEventTags": "optional",
                    "event_timestamp": "optional",
                    "threat_level_id": "optional",
                    "eventinfo": "optional"
                }

    ##Example

    Search in MISP for value of fieldname r_ip (remote IP in proxy logs).

        code-block::
         * | mispsearch field=r_ip

    """

    misp_instance = Option(doc='''
        **Syntax:** **misp_instance=instance_name*
        **Description:**MISP instance parameters as described in local/inputs.conf''',
                           require=True)
    field = Option(doc='''
        **Syntax:** **field=***<fieldname>*
        **Description:**Name of the field containing the value to search for.''',
                   require=True,
                   validate=validators.Fieldname())
    onlyids = Option(doc='''
        **Syntax:** **onlyids=***<y|n>*
        **Description:** Boolean to search only attributes with to_ids set''',
                     require=False,
                     validate=validators.Boolean())
    gettag = Option(doc='''
        **Syntax:** **gettag=***<y|n>*
        **Description:** Boolean to return attribute tags''',
                    require=False,
                    validate=validators.Boolean())
    includeEventUuid = Option(doc='''
        **Syntax:** **includeEventUuid=***y|Y|1|true|True|n|N|0|false|False*
        **Description:**Boolean to include event UUID(s) to results.''',
                              require=False,
                              validate=validators.Boolean())
    includeEventTags = Option(doc='''
        **Syntax:** **includeEventTags=***y|Y|1|true|True|n|N|0|false|False*
        **Description:**Boolean to include event UUID(s) to results.''',
                              require=False,
                              validate=validators.Boolean())
    last = Option(doc='''
        **Syntax:** **last=***<int>d|h|m*
        **Description:**publication duration in day(s), hour(s) or minute(s). **eventid**, **last** and **date_from** are mutually exclusive''',
                  require=False,
                  validate=validators.Match("last", r"^[0-9]+[hdm]$"))
    limit = Option(doc='''
        **Syntax:** **limit=***<int>*
        **Description:**define the limit for each MISP search; default 1000. 0 = no pagination.''',
                   require=False,
                   validate=validators.Match("limit", r"^[0-9]+$"))
    page = Option(doc='''
        **Syntax:** **page=***<int>*
        **Description:**define the page for each MISP search; default 1.''',
                  require=False,
                  validate=validators.Match("limit", r"^[0-9]+$"))
    json_request = Option(doc='''
        **Syntax:** **json_request=***valid JSON request*
        **Description:**Valid JSON request''',
                          require=False)

    def stream(self, records):
        # Generate args
        my_args = prepare_config(self)
        my_args['misp_url'] = my_args['misp_url'] + '/attributes/restSearch'
        # set proper headers
        headers = {'Content-type': 'application/json'}
        headers['Authorization'] = my_args['misp_key']
        headers['Accept'] = 'application/json'

        fieldname = str(self.field)
        if self.gettag is True:
            get_tag = True
        else:
            get_tag = False

        pagination = True
        if self.limit is not None:
            if int(self.limit) == 0:
                pagination = False
            else:
                limit = int(self.limit)
        else:
            limit = 1000
        if self.page is not None:
            page = int(self.page)
        else:
            page = 1

        if self.json_request is not None:
            body_dict = json.loads(self.json_request)
            logging.info('Option "json_request" set')
            body_dict['returnFormat'] = 'json'
            body_dict['withAttachments'] = False
            if 'limit' in body_dict:
                limit = int(body_dict['limit'])
                if limit == 0:
                    pagination = False
            if 'page' in body_dict:
                page = body_dict['page']
                pagination = False
        else:
            # build search JSON object
            body_dict = {"returnFormat": "json", "withAttachments": False}
            if self.onlyids is True:
                body_dict['to_ids'] = "True"
            if self.includeEventUuid is not None:
                body_dict['includeEventUuid'] = self.includeEventUuid
            if self.includeEventTags is not None:
                body_dict['includeEventTags'] = self.includeEventTags
            if self.last is not None:
                body_dict['last'] = self.last
        for record in records:
            if fieldname in record:
                value = record.get(fieldname, None)
                if value is not None:
                    body_dict['value'] = str(value)
                    misp_category = []
                    misp_event_id = []
                    misp_event_uuid = []
                    misp_orgc_id = []
                    misp_to_ids = []
                    misp_tag = []
                    misp_type = []
                    misp_value = []
                    misp_uuid = []
                    # search
                    if pagination is True:
                        body_dict['page'] = page
                        body_dict['limit'] = limit
                    body = json.dumps(body_dict)
                    logging.debug('mispsearch request body: %s', body)
                    r = requests.post(my_args['misp_url'],
                                      headers=headers,
                                      data=body,
                                      verify=my_args['misp_verifycert'],
                                      cert=my_args['client_cert_full_path'],
                                      proxies=my_args['proxies'])
                    # check if status is anything other than 200; throw an exception if it is
                    r.raise_for_status()
                    # response is 200 by this point or we would have thrown an exception
                    # print >> sys.stderr, "DEBUG MISP REST API response: %s" % response.json()
                    response = r.json()
                    if 'response' in response:
                        if 'Attribute' in response['response']:
                            for a in response['response']['Attribute']:
                                if str(a['type']) not in misp_type:
                                    misp_type.append(str(a['type']))
                                if str(a['value']) not in misp_value:
                                    misp_value.append(str(a['value']))
                                if str(a['to_ids']) not in misp_to_ids:
                                    misp_to_ids.append(str(a['to_ids']))
                                if str(a['category']) not in misp_category:
                                    misp_category.append(str(a['category']))
                                if str(a['uuid']) not in misp_uuid:
                                    misp_uuid.append(str(a['uuid']))
                                if str(a['event_id']) not in misp_event_id:
                                    misp_event_id.append(str(a['event_id']))
                                if 'Tag' in a:
                                    for tag in a['Tag']:
                                        if str(tag['name']) not in misp_tag:
                                            misp_tag.append(str(tag['name']))
                                if 'Event' in a:
                                    if a['Event'][
                                            'uuid'] not in misp_event_uuid:
                                        misp_event_uuid.append(
                                            str(a['Event']['uuid']))
                                    if a['Event'][
                                            'orgc_id'] not in misp_orgc_id:
                                        misp_orgc_id.append(
                                            str(a['Event']['orgc_id']))
                            record['misp_type'] = misp_type
                            record['misp_value'] = misp_value
                            record['misp_to_ids'] = misp_to_ids
                            record['misp_category'] = misp_category
                            record['misp_attribute_uuid'] = misp_uuid
                            record['misp_event_id'] = misp_event_id
                            record['misp_event_uuid'] = misp_event_uuid
                            record['misp_orgc_id'] = misp_orgc_id
                            record['misp_tag'] = misp_tag

            yield record
Exemplo n.º 24
0
class JsonToFieldsCommand(StreamingCommand):
    json = Option(
        doc='''
        **Syntax:** **json=***<field>*
        **Description:** Field name that contains the json string''',
        require=True, validate=validators.Fieldname())
    prefix = Option(
        doc='''
        **Syntax:** **prefix=***<string>*
        **Description:** Prefix to use to expand fields''',
        require=False)
    typeprefix = Option(
        doc='''
        **Syntax:** **typeprefix=***<bool>*
        **Description:** If true, prefix fields with a letter indicating the type (long, int, float, string, json, array)''',
        require=False, default=False, validate=validators.Boolean())

    def stream(self, records):
        self.logger.info('JsonToFieldsCommand: %s', self)  # logs command line
        for record in records:
            json_str = record.get(self.json)
            if json_str:
                json_obj = json.loads(json_str)
                if self.prefix:
                    prefix = self.prefix 
                else:
                    prefix = ""
                for key, value in json_obj.iteritems():
                    if (not self.fieldnames) or (key in self.fieldnames):
                        if isinstance(value, basestring):
                            if self.typeprefix:
                                tp = "s_"
                            else:
                                tp = ""
                            record[tp + prefix + key] = value
                        elif isinstance(value, collections.Mapping):
                            if self.typeprefix:
                                tp = "j_"
                            else:
                                tp = ""
                            record[tp + prefix + key] = json.dumps(value)
                        elif isinstance(value, collections.Sequence):
                            if self.typeprefix:
                                tp = "a_"
                            else:
                                tp = ""
                            record[tp + prefix + key] = [ json.dumps(s) for s in value ]
                        else:
                            if self.typeprefix:
                                if isinstance(value, int):
                                    tp = "i_"
                                elif isinstance(value, float):
                                    tp = "f_"
                                elif isinstance(value, long):
                                    tp = "l_"
                                else:
                                    tp = "x_"
                            else:
                                tp = ""
                            record[tp + prefix + key] = value
            else:
                self.logger.warn('JsonToFieldsCommand: no field named %s', self.json)
            yield record
Exemplo n.º 25
0
class TestSearchCommand(SearchCommand):

    boolean = Option(
        doc='''
        **Syntax:** **boolean=***<value>*
        **Description:** A boolean value''',
        validate=validators.Boolean())

    required_boolean = Option(
        doc='''
        **Syntax:** **boolean=***<value>*
        **Description:** A boolean value''',
        require=True, validate=validators.Boolean())

    aliased_required_boolean = Option(
        doc='''
        **Syntax:** **boolean=***<value>*
        **Description:** A boolean value''',
        name='foo', require=True, validate=validators.Boolean())

    code = Option(
        doc='''
        **Syntax:** **code=***<value>*
        **Description:** A Python expression, if mode == "eval", or statement, if mode == "exec"''',
        validate=validators.Code())

    required_code = Option(
        doc='''
        **Syntax:** **code=***<value>*
        **Description:** A Python expression, if mode == "eval", or statement, if mode == "exec"''',
        require=True, validate=validators.Code())

    duration = Option(
        doc='''
        **Syntax:** **duration=***<value>*
        **Description:** A length of time''',
        validate=validators.Duration())

    required_duration = Option(
        doc='''
        **Syntax:** **duration=***<value>*
        **Description:** A length of time''',
        require=True, validate=validators.Duration())

    fieldname = Option(
        doc='''
        **Syntax:** **fieldname=***<value>*
        **Description:** Name of a field''',
        validate=validators.Fieldname())

    required_fieldname = Option(
        doc='''
        **Syntax:** **fieldname=***<value>*
        **Description:** Name of a field''',
        require=True, validate=validators.Fieldname())

    file = Option(
        doc='''
        **Syntax:** **file=***<value>*
        **Description:** Name of a file''',
        validate=validators.File())

    required_file = Option(
        doc='''
        **Syntax:** **file=***<value>*
        **Description:** Name of a file''',
        require=True, validate=validators.File())

    integer = Option(
        doc='''
        **Syntax:** **integer=***<value>*
        **Description:** An integer value''',
        validate=validators.Integer())

    required_integer = Option(
        doc='''
        **Syntax:** **integer=***<value>*
        **Description:** An integer value''',
        require=True, validate=validators.Integer())

    map = Option(
        doc='''
        **Syntax:** **map=***<value>*
        **Description:** A mapping from one value to another''',
        validate=validators.Map(foo=1, bar=2, test=3))

    required_map = Option(
        doc='''
        **Syntax:** **map=***<value>*
        **Description:** A mapping from one value to another''',
        require=True, validate=validators.Map(foo=1, bar=2, test=3))

    match = Option(
        doc='''
        **Syntax:** **match=***<value>*
        **Description:** A value that matches a regular expression pattern''',
        validate=validators.Match('social security number', r'\d{3}-\d{2}-\d{4}'))

    required_match = Option(
        doc='''
        **Syntax:** **required_match=***<value>*
        **Description:** A value that matches a regular expression pattern''',
        require=True, validate=validators.Match('social security number', r'\d{3}-\d{2}-\d{4}'))

    optionname = Option(
        doc='''
        **Syntax:** **optionname=***<value>*
        **Description:** The name of an option (used internally)''',
        validate=validators.OptionName())

    required_optionname = Option(
        doc='''
        **Syntax:** **optionname=***<value>*
        **Description:** The name of an option (used internally)''',
        require=True, validate=validators.OptionName())

    regularexpression = Option(
        doc='''
        **Syntax:** **regularexpression=***<value>*
        **Description:** Regular expression pattern to match''',
        validate=validators.RegularExpression())

    required_regularexpression = Option(
        doc='''
        **Syntax:** **regularexpression=***<value>*
        **Description:** Regular expression pattern to match''',
        require=True, validate=validators.RegularExpression())

    set = Option(
        doc='''
        **Syntax:** **set=***<value>*
        **Description:** A member of a set''',
        validate=validators.Set('foo', 'bar', 'test'))

    required_set = Option(
        doc='''
        **Syntax:** **set=***<value>*
        **Description:** A member of a set''',
        require=True, validate=validators.Set('foo', 'bar', 'test'))

    class ConfigurationSettings(SearchCommand.ConfigurationSettings):
        @classmethod
        def fix_up(cls, command_class):
            pass
Exemplo n.º 26
0
class StreamFilterCommand(StreamingCommand):
    """ Returns a field with a list of non-overlapping matches to a regular expression in a set of fields.

    ##Syntax

    .. code-block::
        StreamFilterCommand fieldname=<field> pattern=<field containing regex pattern> <field-list>

    ##Description

    Returns the non-overlapping matches to the regular expression contained in the field specified by `pattern`
    The result is stored in the field specified by `fieldname`. If `fieldname` exists, its value
    is replaced. If `fieldname` does not exist, it is created. Event records are otherwise passed through to the next
    pipeline processor unmodified.

    ##Example

    Return the regular expression matches in the `text` field (field named text) of each tweet in tweets.csv and store the result in `word_count`.

    .. code-block::
        | inputlookup tweets | eval pattern="\\w+" | streamfilter fieldname=word_count pattern=pattern text

    """
    fieldname = Option(
        doc='''
        **Syntax:** **fieldname=***<fieldname>*
        **Description:** Name of the field that will hold the match count''',
        require=True, validate=validators.Fieldname())

    pattern = Option(
        doc='''
        **Syntax:** **pattern=***<fieldname>* 
        **Description:** Field name containign the regular expression pattern to match''',
        require=True, validate=validators.Fieldname())

    #Filtering function created so we can handle multi-value pattern fields
    def thefilter(self, record, pattern):
        values = ""
        for fieldname in self.fieldnames:
            #multivalue fields come through as a list, iterate through the list and run the regex against each entry
            #in the multivalued field
            if isinstance(record[fieldname], list):
                for aRecord in record[fieldname]:
                    matches = pattern.findall(six.text_type(aRecord.decode("utf-8")))
                    for match in matches:
                        values = values + " " + match
            else:
                matches = pattern.findall(six.text_type(record[fieldname].decode("utf-8")))
                for match in matches:
                    values = values + " " + match
        return values

    #stream function to work on each event which may or may not be multi-valued
    def stream(self, records):
        self.logger.debug('StreamFilterCommand: %s', self)  # logs command line
        for record in records:
            values = ""
            pattern = self.pattern
            if pattern not in record:
               self.logger.warn("StreamFilterCommand: pattern field is %s but cannot find this field" % (pattern), self)
               sys.exit(-1)
            if isinstance(record[pattern], list):
                for aPattern in record[pattern]:
                    pattern = re.compile(aPattern)
                    values = values + self.thefilter(record, pattern)
            else:
                pattern = re.compile(record[pattern])
                values = values + self.thefilter(record, pattern)

            record[self.fieldname] = values
            yield record
Exemplo n.º 27
0
class Bs4(StreamingCommand):
    """ A wrapper for BeautifulSoup4 to extract html/xml tags and text from them to use in Splunk.

    ##Syntax

    .. code-block::
       bs4 textfield=<field> [get_text=<bool>] [get_text_label=<string>] [parser=<string>] [find=<tag>] [find_attrs=<quoted_key:value_pairs>] [find_all=<tag>] [find_all_attrs=<quoted_key:value_pairs>] [find_child=<tag>] [find_child_attrs=<quoted_key:value_pairs>] [find_children=<tag>] [find_children_attrs=<quoted_key:value_pairs>]

    ##Description

    A wrapper script to bring some functionality from BeautifulSoup to Splunk. Default is to 
    get the text and send it to a new field 'get_text', otherwise the selection is returned 
    in a field named 'soup'. Default is to use the 'lxml' parser, though you can specify others, 
    'html5lib' is not currently included. The find methods can be used in conjuction, their order 
    of operation is find > find_all > find_child > find children. Each option has a similar
    named option appended '_attrs' that will accept inner and outer quoted key:value pairs for
    more precise selections.

    ##Example

    .. code-block::
        * | bs4 textfield=_raw find="div" get_text=t
    """

    textfield = Option(
        require=True,
        doc='''
        **Syntax:** **textfield=***<fieldname>*
        **Description:** Name of the field that will contain the text to search against''',
        validate=validators.Fieldname())

    parser = Option(
        default='lxml',
        doc='''
        **Syntax:** **parser=***<string>*
        **Description:** Corresponds to parsers listed here https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser (currently html5lib not packaged with so not an option)''',
        )
 
    find = Option(
        default=False,
        doc='''
        **Syntax:** **find=***<tag>*
        **Description:** Corresponds to the name attribute of BeautifulSoup's find method''',
        )
 
    find_attrs = Option(
        default=None,
        doc='''
        **Syntax:** **find_attrs=***<quoted_key:value_pairs>*
        **Description:** Corresponds to the attrs attribute of BeautifulSoup's find method. Expects inner and outer quoted "'key1':'value1','key2':'value2'" pairs comma-separated but contained in outer quotes.''',
        )
 
    find_all = Option(
        default=False,
        doc='''
        **Syntax:** **find_all=***<tag>*
        **Description:** Corresponds to the name attribute of BeautifulSoup's find_all method. Order of operation is find > find_all > find_child > find_children so can be used in conjunction''',
        )
 
    find_all_attrs = Option(
        default=None,
        doc='''
        **Syntax:** **find_all_attrs=***<quoted_key:value_pairs>*
        **Description:** Corresponds to the attrs attribute of BeautifulSoup's find_all method. Expects inner and outer quoted "'key1':'value1','key2':'value2'" pairs comma-separated but contained in outer quotes.''',
        )
 
    find_child = Option(
        default=False,
        doc='''
        **Syntax:** **find_child=***<tag>*
        **Description:** Corresponds to the name attribute of BeautifulSoup's find_child method. Order of operation is find > find_all > find_child > find_children so can be used in conjunction''',
        )

    find_child_attrs = Option(
        default=None,
        doc='''
        **Syntax:** **find_child_attrs=***<quoted_key:value_pairs>*
        **Description:** Corresponds to the attrs attribute of BeautifulSoup's find_child method. Expects inner and outer quoted "'key1':'value1','key2':'value2'" pairs comma-separated but contained in outer quotes.''',
        )
 
    find_children = Option(
        default=False,
        doc='''
        **Syntax:** **find_children=***<tag>*
        **Description:** Corresponds to the name attribute of BeautifulSoup's find_children method. Order of operation is find > find_all > find_child > find_children so can be used in conjunction''',
        )

    find_children_attrs = Option(
        default=None,
        doc='''
        **Syntax:** **find_children_attrs=***<quoted_key:value_pairs>*
        **Description:** Corresponds to the attrs attribute of BeautifulSoup's find_children method. Expects inner and outer quoted "'key1':'value1','key2':'value2'" pairs comma-separated but contained in outer quotes.''',
        )
 
    get_text = Option(
        default=True,
        doc='''
        **Syntax:** **get_text=***<bool>*
        **Description:** If true, returns text minus html/xml formatting for given selection and places in field `get_text` otherwise returns the selection in a field called `soup1`''',
        validate=validators.Boolean())

    get_text_label = Option(
        default='get_text',
        doc='''
        **Syntax:** **get_text_label=***<string>*
        **Description:** If get_text is true, sets the label for the return field''',
        )

    #http://dev.splunk.com/view/logging/SP-CAAAFCN
    def setup_logging(self):
        logger = logging.getLogger('splunk.foo')    
        SPLUNK_HOME = os.environ['SPLUNK_HOME']
        
        LOGGING_DEFAULT_CONFIG_FILE = os.path.join(SPLUNK_HOME, 'etc', 'log.cfg')
        LOGGING_LOCAL_CONFIG_FILE = os.path.join(SPLUNK_HOME, 'etc', 'log-local.cfg')
        LOGGING_STANZA_NAME = 'python'
        LOGGING_FILE_NAME = "nlp-text-analytics.log"
        BASE_LOG_PATH = os.path.join('var', 'log', 'splunk')
        LOGGING_FORMAT = "%(asctime)s %(levelname)-s\t%(module)s:%(lineno)d - %(message)s"
        splunk_log_handler = logging.handlers.RotatingFileHandler(
            os.path.join(
                SPLUNK_HOME,
                BASE_LOG_PATH,
                LOGGING_FILE_NAME
            ), mode='a') 
        splunk_log_handler.setFormatter(logging.Formatter(LOGGING_FORMAT))
        logger.addHandler(splunk_log_handler)
        setupSplunkLogger(
            logger,
            LOGGING_DEFAULT_CONFIG_FILE,
            LOGGING_LOCAL_CONFIG_FILE,
            LOGGING_STANZA_NAME
        )
        return logger

    def stream(self, records):
        for record in records:
            soup = BeautifulSoup(record[self.textfield], self.parser)
            if self.find:
                if self.find_attrs is not None:
                    soup = soup.find(
                        self.find, 
                        literal_eval('{'+self.find_attrs+'}')
                    )
                else:
                    soup = soup.find(self.find)
            if self.find_all:
                if self.find_all_attrs is not None:
                    soup = soup.find_all(
                        self.find_all, 
                        literal_eval('{'+self.find_all_attrs+'}')
                    )
                else:
                    soup = soup.find_all(self.find_all)
            if self.find_child:
                if self.find_child_attrs is not None:
                    soup = soup.findChild(
                        self.find_child, 
                        literal_eval('{'+self.find_child_attrs+'}')
                    )
                else:
                    soup = soup.findChild(self.find_child)
            if self.find_children:
                if self.find_children_attrs is not None:
                    soup = soup.findChildren(
                        self.find_children, 
                        literal_eval('{'+self.find_children_attrs+'}')
                    )
                else:
                    soup = soup.findChildren(self.find_children)
            if self.get_text and not (self.find_all or self.find_children):
                record[self.get_text_label] = \
                    soup.get_text().decode('unicode_escape').encode('ascii','ignore')
            elif self.get_text and (self.find_all or self.find_children):
                record[self.get_text_label] = [
                    i.get_text().decode('unicode_escape').encode('ascii','ignore')
                    for i in soup
                ]
            else:
                record['soup'] = soup

            yield record
Exemplo n.º 28
0
class MakeAlertsCommand(StreamingCommand):
    time = Option(doc='''
        **Syntax:** **time=***<field>*
        **Description:** Field name used to determine event time for the alert''',
                  require=False,
                  validate=validators.Fieldname(),
                  default='_time')
    entity = Option(doc='''
        **Syntax:** **entity=***<field>*
        **Description:** Field name used to determine the entity triggering the alert (account name, machine name, ...)''',
                    require=False,
                    validate=validators.Fieldname(),
                    default='entity')
    alert_type = Option(doc='''
        **Syntax:** **type=***<string>*
        **Description:** Field name used to determine the type of alert''',
                        require=True,
                        name='type')
    severity = Option(doc='''
        **Syntax:** **severity=***<field>*
        **Description:** Field name used to set severity of the alert''',
                      require=False,
                      validate=validators.Fieldname(),
                      default=None)
    idfield = Option(doc='''
        **Syntax:** **idfield=***<field>*
        **Description:** Field name used to store the alert id''',
                     require=False,
                     default=None,
                     validate=validators.Fieldname())
    combine = Option(doc='''
        **Syntax:** **combine=***"<fields>"*
        **Description:** Comma separated field names where alerts should be combined instead of creating new ones.''',
                     require=False,
                     default=None)
    combine_window = Option(doc='''
        **Syntax:** **combine_window=***<string>*
        **Description:** hours or days. ''',
                            require=False,
                            default=None)
    interactive = Option(doc='''
        **Syntax:** **interactive=***<bool>*
        **Description:** If true, makealerts can run in an interactive search, otherwise it will run only in scheduled
        search (this is to prevent alerts created accidentally when copy and pasting scheduled search text)''',
                         require=False,
                         default=False,
                         validate=validators.Boolean())
    preview = Option(doc='''
        **Syntax:** **preview=***<bool>*
        **Description:** If true, makealerts does not create alerts but instead indicates what it would do in the
        preview field''',
                     require=False,
                     default=False,
                     validate=validators.Boolean())

    alerts = None

    def __init__(self):
        super(MakeAlertsCommand, self).__init__()
        self.insert_stats = InsertStats()
        self.loggerExtra = self.logger

    def is_scheduled(self):
        sid = self._metadata.searchinfo.sid
        return sid.startswith("scheduler_") or sid.startswith("rt_scheduler_")

    def stream(self, records):
        #self.logger.info('MakeAlertsCommand: %s, type of record %s', self, type(records))  # logs command line
        #self.logger.info('SEARCHINFO %s', self._metadata.searchinfo)

        sid = self._metadata.searchinfo.sid
        self.loggerExtra = CustomLogAdapter(self.logger, {
            'sid': sid,
            'type': self.alert_type
        })

        if not self.interactive and not self.is_scheduled():
            raise RuntimeError(
                "When testing makealerts from interactive search, provide the 'interative=t' option."
            )

        if not self.alerts:
            self.alerts = AlertCollection(
                self._metadata.searchinfo.session_key)

        for record in records:
            search_context = SearchContext(self._metadata.searchinfo,
                                           self.loggerExtra)
            self.alerts.insert(record,
                               event_time=self.time,
                               entity=self.entity,
                               alert_type=self.alert_type,
                               severity=self.severity,
                               idfield=self.idfield,
                               combine=self.combine,
                               combine_window=self.combine_window,
                               preview=self.preview,
                               search_context=search_context,
                               insert_stats=self.insert_stats)
            if self.preview:
                record['preview'] = str(search_context.messages)
            yield record

    def finish(self):
        if self.interactive and (
                not self.is_scheduled()) and self.insert_stats.errors > 0:
            self.write_error(
                "There were {0} error(s) when trying to insert data, check logs with this search 'index=_internal MakeAlertsCommand source=*super_simple_siem.log* ERROR'",
                self.insert_stats.errors)

        if not self.preview:
            self.loggerExtra.info('s3tag=stats', str(self.insert_stats))

        try:
            super(MakeAlertsCommand, self).finish()
        except:
            pass
Exemplo n.º 29
0
class CleanText(StreamingCommand):
    """ Counts the number of non-overlapping matches to a regular expression in a set of fields.

    ##Syntax

    .. code-block::
        cleantext textfield=<field> [default_clean=<bool>] [remove_urls=<bool>] [remove_stopwords=<bool>] 
            [base_word=<bool>] [base_type=<string>] [mv=<bool>] [force_nltk_tokenize=<bool>] 
            [pos_tagset=<string>] [custom_stopwords=<comma_separated_string_list>] [term_min_len=<int>] 
            [ngram_range=<int>-<int>] [ngram_mix=<bool>]

    ##Description

    Tokenize and normalize text (remove punctuation, digits, change to base_word)
    Different options result in better and slower cleaning. base_type="lemma_pos" being the
    slowest option, base_type="lemma" assumes every word is a noun, which is faster but still
    results in decent lemmatization. Many fields have a default already set, textfield is only
    required field. By default results in a multi-valued field which is ready for used with
    stats count by.

    ##Example

    .. code-block::
        * | cleantext textfield=sentence
    """

    textfield = Option(require=True,
                       doc='''
        **Syntax:** **textfield=***<fieldname>*
        **Description:** Name of the field that will contain the text to search against''',
                       validate=validators.Fieldname())
    keep_orig = Option(default=False,
                       doc='''**Syntax:** **keep_orig=***<boolean>*
        **Description:** Maintain a copy of the original text for comparison or searching into field called
        orig_text''',
                       validate=validators.Boolean())
    default_clean = Option(default=True,
                           doc='''**Syntax:** **default_clean=***<boolean>*
        **Description:** Change text to lowercase, remove punctuation, and removed numbers, defaults to true''',
                           validate=validators.Boolean())
    remove_urls = Option(default=True,
                         doc='''**Syntax:** **remove_urls=***<boolean>*
        **Description:** Remove html links as part of text cleaning, defaults to true''',
                         validate=validators.Boolean())
    remove_stopwords = Option(
        default=True,
        doc='''**Syntax:** **remove_stopwords=***<boolean>*
        **Description:** Remove stopwords as part of text cleaning, defaults to true''',
        validate=validators.Boolean())
    base_word = Option(default=True,
                       doc='''**Syntax:** **base_word=***<boolean>*
        **Description:** Convert words to a base form as part of text cleaning, defaults to true and subject to value of base_type setting''',
                       validate=validators.Boolean())
    base_type = Option(
        default='lemma',
        doc='''**Syntax:** **base_type=***<string>*
        **Description:** Options are lemma, lemma_pos, or stem, defaults to lemma and subject to value of base_word setting being true''',
    )
    mv = Option(default=True,
                doc='''**Syntax:** **mv=***<boolean>*
        **Description:** Returns words as multivalue otherwise words are space separated, defaults to true''',
                validate=validators.Boolean())
    force_nltk_tokenize = Option(
        default=False,
        doc='''**Syntax:** **force_nltk_tokenize=***<boolean>*
        **Description:** Forces use of better NLTK word tokenizer but is slower, defaults to false''',
        validate=validators.Boolean())
    pos_tagset = Option(
        default=None,
        doc='''**Syntax:** **pos_tagset=***<string>*
        **Description:** Options are universal, wsj, or brown; defaults to universal and subject to base_type set to "lemma_pos"''',
    )
    custom_stopwords = Option(
        doc='''**Syntax:** **custom_stopwords=***<string>*
        **Description:** comma-separated list of custom stopwords, enclose in quotes''',
    )
    term_min_len = Option(default=0,
                          doc='''**Syntax:** **term_min_len=***<int>*
        **Description:** Only terms greater than or equal to this number will be returned. Useful if data has a lot of HTML markup.''',
                          validate=validators.Integer())
    ngram_range = Option(
        default='1-1',
        doc='''**Syntax:** **ngram_rane=***<int>-<int>*
        **Description:** Returns new ngram column with range of ngrams specified if max is greater than 1"''',
    )
    ngram_mix = Option(default=False,
                       doc='''**Syntax:** **ngram_mix=***<boolean>*
        **Description:** Determines if ngram output is combined or separate columns. Defaults to false which results in separate columns''',
                       validate=validators.Boolean())

    #http://dev.splunk.com/view/logging/SP-CAAAFCN
    def setup_logging(self):
        logger = logging.getLogger('splunk.foo')
        SPLUNK_HOME = os.environ['SPLUNK_HOME']

        LOGGING_DEFAULT_CONFIG_FILE = os.path.join(SPLUNK_HOME, 'etc',
                                                   'log.cfg')
        LOGGING_LOCAL_CONFIG_FILE = os.path.join(SPLUNK_HOME, 'etc',
                                                 'log-local.cfg')
        LOGGING_STANZA_NAME = 'python'
        LOGGING_FILE_NAME = "nlp-text-analytics.log"
        BASE_LOG_PATH = os.path.join('var', 'log', 'splunk')
        LOGGING_FORMAT = "%(asctime)s %(levelname)-s\t%(module)s:%(lineno)d - %(message)s"
        splunk_log_handler = logging.handlers.RotatingFileHandler(os.path.join(
            SPLUNK_HOME, BASE_LOG_PATH, LOGGING_FILE_NAME),
                                                                  mode='a')
        splunk_log_handler.setFormatter(logging.Formatter(LOGGING_FORMAT))
        logger.addHandler(splunk_log_handler)
        setupSplunkLogger(logger, LOGGING_DEFAULT_CONFIG_FILE,
                          LOGGING_LOCAL_CONFIG_FILE, LOGGING_STANZA_NAME)
        return logger

    #https://stackoverflow.com/a/15590384
    def get_wordnet_pos(self, treebank_tag):
        if treebank_tag.startswith('J'):
            return wordnet.ADJ
        elif treebank_tag.startswith('V'):
            return wordnet.VERB
        elif treebank_tag.startswith('N'):
            return wordnet.NOUN
        elif treebank_tag.startswith('R'):
            return wordnet.ADV
        else:
            return 'n'

    def f_remove_urls(self, text):
        return re.sub('https?://[^\b\s<]+', '', text)

    def ngram(self, text, min_n, max_n):
        ngram_list = []
        for n in range(min_n, max_n):
            for ngram in ngrams(text, n):
                if len(ngram) > 1:
                    ngram_list.append((len(ngram), ' '.join(ngram)))
        return ngram_list

    def stream(self, records):
        logger = self.setup_logging()
        logger.info('textfield set to: ' + self.textfield)
        if self.custom_stopwords:
            custom_stopwords = self.custom_stopwords.replace(' ',
                                                             '').split(',')
        for record in records:
            if self.keep_orig:
                record['orig_text'] = record[self.textfield]
            #URL removal
            if self.remove_urls:
                record[self.textfield] = self.f_remove_urls(
                    record[self.textfield])
            #Tokenization
            if (self.base_word and self.base_type
                    == 'lemma_pos') or self.force_nltk_tokenize:
                #lemma_pos - if option is lemmatization with POS tagging do cleaning and stopword options now
                if (self.base_word and self.base_type == 'lemma_pos'):
                    record['pos_tuple'] = pos_tag(word_tokenize(
                        record[self.textfield].decode('utf-8').encode(
                            'ascii', 'ignore')),
                                                  tagset=self.pos_tagset)
                    if self.default_clean and self.remove_stopwords:
                        if self.custom_stopwords:
                            stopwords = set(
                                stop_words.words('english') + custom_stopwords)
                        else:
                            stopwords = set(stop_words.words('english'))
                        record['pos_tuple'] = [
                            [re.sub(r'[\W\d]', '', text[0]).lower(), text[1]]
                            for text in record['pos_tuple']
                            if re.sub(r'[\W\d]', '', text[0]).lower() not in
                            stopwords and not re.search(r'[\W]', text[0])
                        ]
                    elif self.default_clean and not self.remove_stopwords:
                        record['pos_tuple'] = [
                            [re.sub(r'[\W\d]', '', text[0]).lower(), text[1]]
                            for text in record['pos_tuple']
                            if not re.search(r'[\W]', text[0])
                        ]
                elif self.force_nltk_tokenize:
                    record[self.textfield] = word_tokenize(
                        record[self.textfield])
            elif self.default_clean or (self.base_word
                                        and self.base_type == 'lemma'):
                #https://stackoverflow.com/a/1059601
                record[self.textfield] = re.split('\W+',
                                                  record[self.textfield])
            else:
                record[self.textfield] = record[self.textfield].split()
            #Default Clean
            if self.default_clean and not self.base_type == 'lemma_pos':
                record[self.textfield] = [
                    re.sub(r'[\W\d]', '', text).lower()
                    for text in record[self.textfield]
                ]
            #Lemmatization with POS tagging
            if self.base_word and self.base_type == 'lemma_pos':
                lm = WordNetLemmatizer()
                tuple_list = []
                tag_list = []
                record[self.textfield] = []
                record['pos_tag'] = []
                for text in record['pos_tuple']:
                    keep_text = lm.lemmatize(text[0],
                                             self.get_wordnet_pos(
                                                 text[1])).encode(
                                                     'ascii', 'ignore')
                    if keep_text:
                        record[self.textfield].append(keep_text)
                        tuple_list.append([keep_text, text[1]])
                        tag_list.append(text[1])
                        record['pos_tag'] = tag_list
                        record['pos_tuple'] = tuple_list
            #Lemmatization or Stemming with stopword removal
            if self.remove_stopwords and self.base_word and self.base_type != 'lemma_pos':
                if self.custom_stopwords:
                    stopwords = set(
                        stop_words.words('english') + custom_stopwords)
                else:
                    stopwords = set(stop_words.words('english'))
                if self.base_type == 'lemma':
                    lm = WordNetLemmatizer()
                    record[self.textfield] = [
                        lm.lemmatize(text) for text in record[self.textfield]
                        if text not in stopwords
                    ]
                if self.base_type == 'stem':
                    ps = PorterStemmer()
                    record[self.textfield] = [
                        ps.stem(text) for text in record[self.textfield]
                        if text not in stopwords
                    ]
            #Lemmatization or Stemming without stopword removal
            if not self.remove_stopwords and self.base_word:
                if self.base_type == 'lemma':
                    lm = WordNetLemmatizer()
                    record[self.textfield] = [
                        lm.lemmatize(text) for text in record[self.textfield]
                    ]
                if self.base_type == 'stem':
                    ps = PorterStemmer()
                    record[self.textfield] = [
                        ps.stem(text) for text in record[self.textfield]
                    ]
            #Stopword Removal
            if self.remove_stopwords and not self.base_word:
                if self.custom_stopwords:
                    stopwords = set(
                        stop_words.words('english') + custom_stopwords)
                else:
                    stopwords = set(stop_words.words('english'))
                record[self.textfield] = [
                    text for text in record[self.textfield]
                    if text not in stopwords
                ]
            #Minimum term length
            if self.term_min_len > 0:
                record[self.textfield] = [
                    i for i in record[self.textfield]
                    if len(i) >= self.term_min_len
                ]
            #ngram column creation
            (min_n, max_n) = self.ngram_range.split('-')
            if max_n > 1 and max_n >= min_n:
                max_n = int(max_n) + 1
                ngram_extract = self.ngram(
                    filter(None, record[self.textfield]), int(min_n), max_n)
                if ngram_extract:
                    for i in ngram_extract:
                        if not self.ngram_mix:
                            if 'ngrams_' + str(i[0]) not in record:
                                record['ngrams_' + str(i[0])] = []
                            record['ngrams_' + str(i[0])].append(i[1])
                        else:
                            if 'ngrams' not in record:
                                record['ngrams'] = []
                            record['ngrams'].append(i[1])
                else:
                    if not self.ngram_mix:
                        for n in range(int(min_n), int(max_n)):
                            if n != 1:
                                record['ngrams_' + str(n)] = []
                    else:
                        if 'ngrams' not in record:
                            record['ngrams'] = []
            #Final Multi-Value Output
            if not self.mv:
                record[self.textfield] = ' '.join(record[self.textfield])
                try:
                    record['pos_tag'] = ' '.join(record['pos_tag'])
                except:
                    pass

            yield record
Exemplo n.º 30
0
class MispSightCommand(StreamingCommand):
    """
    search in MISP for attributes matching the value of field.

    ##Syntax

        code-block::
        mispsearch field=<field> onlyids=y|n

    ##Description

        search_body = {"returnFormat": "json",
                "value": "optional",
                "type": "optional",
                "category": "optional",
                "org": "optional",
                "tags": "optional",
                "from": "optional",
                "to": "optional",
                "last": "optional",
                "eventid": "optional",
                "withAttachments": "optional",
                "uuid": "optional",
                "publish_timestamp": "optional",
                "timestamp": "optional",
                "enforceWarninglist": "optional",
                "to_ids": "optional",
                "deleted": "optional",
                "includeEventUuid": "optional",
                "event_timestamp": "optional",
                "threat_level_id": "optional"
                }

    ##Example

    Search in MISP for value of fieldname r_ip (remote IP in proxy logs).

        code-block::
         * | mispsearch fieldname=r_ip

    """

    field = Option(doc='''
        **Syntax:** **field=***<fieldname>*
        **Description:**Name of the field containing \
        the value to search for.''',
                   require=True,
                   validate=validators.Fieldname())
    misp_instance = Option(doc='''
        **Syntax:** **misp_instance=instance_name*
        **Description:**MISP instance parameters as described \
        in local/inputs.conf.''',
                           require=True)

    def stream(self, records):
        # self.self.logging.debug('mispgetioc.reduce')

        # Generate args
        my_args = prepare_config(self, 'misp42splunk')
        # set proper headers
        headers = {'Content-type': 'application/json'}
        headers['Authorization'] = my_args['misp_key']
        headers['Accept'] = 'application/json'

        fieldname = str(self.field)
        search_url = my_args['misp_url'] + '/attributes/restSearch'
        sight_url = my_args['misp_url'] + \
            '/sightings/restSearch/attribute'

        for record in records:
            if fieldname in record:
                value = record.get(fieldname, None)
                if value is not None:
                    search_dict = {"returnFormat": "json"}
                    search_dict['value'] = str(value)
                    search_dict['withAttachments'] = "false",
                    search_body = json.dumps(search_dict)

                    sight_dict = {"returnFormat": "json"}

                    misp_value = ''
                    misp_fp = False
                    misp_fp_ts = 0
                    misp_fp_id = ''
                    ms_seen = False
                    ms = {
                        'count': 0,
                        'first': 0,
                        'f_id': 0,
                        'last': 0,
                        'l_id': 0
                    }
                    # search
                    logging.debug('mispsight request body: %s', search_body)
                    rs = requests.post(search_url,
                                       headers=headers,
                                       data=search_body,
                                       verify=my_args['misp_verifycert'],
                                       cert=my_args['client_cert_full_path'],
                                       proxies=my_args['proxies'])
                    # check if status is anything other than 200;
                    # throw an exception if it is
                    rs.raise_for_status()
                    # response is 200 by this point or we would
                    # have thrown an exception
                    response = rs.json()
                    logging.info(
                        "MISP REST API %s has got a response with status code 200",
                        search_url)
                    if 'response' in response:
                        if 'Attribute' in response['response']:
                            r_number = len(response['response']['Attribute'])
                            logging.debug(
                                "MISP REST API %s: response: with %s records" %
                                (search_url, str(r_number)))
                            for a in response['response']['Attribute']:
                                if misp_value == '':
                                    misp_value = str(a['value'])
                                if misp_fp is False:
                                    sight_dict['id'] = str(a['id'])
                                    sight_body = json.dumps(sight_dict)
                                    rt = requests.post(
                                        sight_url,
                                        headers=headers,
                                        data=sight_body,
                                        verify=my_args['misp_verifycert'],
                                        cert=my_args['client_cert_full_path'],
                                        proxies=my_args['proxies'])
                                    # check if status is anything
                                    # other than 200; throw an exception
                                    rt.raise_for_status()
                                    # response is 200 by this point or we
                                    # would have thrown an exception
                                    sight = rt.json()
                                    logging.info(
                                        "MISP REST API %s has got a response with status code 200",
                                        sight_url)
                                    logging.debug(
                                        "MISP REST API %s has got a response: with %s records"
                                        % (sight_url, len(sight)))
                                    if 'response' in sight:
                                        for s in sight['response']:
                                            if 'Sighting' in s:
                                                # true sighting
                                                ty = s['Sighting']['type']
                                                ds = int(s['Sighting']
                                                         ['date_sighting'])
                                                ev = str(
                                                    s['Sighting']['event_id'])
                                                if int(ty) == 0:
                                                    ms_seen = True
                                                    ms['count'] = \
                                                        ms['count'] + 1
                                                    if ms['first'] == 0 or \
                                                       ms['first'] > ds:
                                                        ms['first'] = ds
                                                        ms['f_id'] = ev
                                                    if ms['last'] < int(ds):
                                                        ms['last'] = int(ds)
                                                        ms['l_id'] = ev
                                                # false positive
                                                elif int(ty) == 1:
                                                    misp_fp = True
                                                    misp_fp_ts = ds
                                                    misp_fp_id = ev
                            if misp_fp is True:
                                record['misp_value'] = misp_value
                                record['misp_fp'] = "True"
                                record['misp_fp_timestamp'] = str(misp_fp_ts)
                                record['misp_fp_event_id'] = str(misp_fp_id)
                            if ms_seen is True:
                                record['misp_value'] = misp_value
                                record['misp_count'] = str(ms['count'])
                                record['misp_first'] = str(ms['first'])
                                record['misp_first_event_id'] = str(ms['f_id'])
                                record['misp_last'] = str(ms['last'])
                                record['misp_last_event_id'] = str(ms['l_id'])
            yield record