Exemplo n.º 1
0
        m = self.RE_LINE.search(input)
        if not m:
            return []
        contents = m.group(5)
        tokens = []
        for elem in self.analyzer(contents):
            if hasattr(elem, "pos"):
                tokens.append((elem.text, elem.pos))
            else:
                tokens.append((elem.text, None))
        unique_tokens = list(set([text for (text, pos) in tokens]))
        return sorted(unique_tokens)

    def __repr__(self):
        return "LogDatum: %s" % (self.get_dict_representation(), )

    def __str__(self):
        return "%s" % (self.get_dict_representation(), )

if __name__ == "__main__":
    logger.debug("starting")
    fields_to_index = ["datetime", "keywords", "failure_type", "failure_id", "contents_hash"]
    try:
        base_parser.main(APP_NAME, NgmgMsMessagesParserLogDatum, fields_to_index)
    except KeyboardInterrupt:
        logger.debug("CTRL-C")
    finally:
        logger.debug("exiting")

Exemplo n.º 2
0
        I'm going to cheat and use Whoosh."""

        m = self.RE_LINE.search(input)
        if not m:
            return []
        contents = m.group(7)
        tokens = []
        for elem in self.analyzer(contents):
            if hasattr(elem, "pos"):
                tokens.append((elem.text, elem.pos))
            else:
                tokens.append((elem.text, None))
        unique_tokens = list(set([text for (text, pos) in tokens]))
        return sorted(unique_tokens)

    def __repr__(self):
        return "LogDatum: %s" % (self.get_dict_representation(), )

    def __str__(self):
        return "%s" % (self.get_dict_representation(), )

if __name__ == "__main__":
    logger.debug("starting")
    try:
        fields_to_index = ["datetime", "keywords", "error_level", "error_id", "contents_hash"]
        base_parser.main(APP_NAME, NgmgEpParserLogDatum, fields_to_index)
    except KeyboardInterrupt:
        logger.debug("CTRL-C")
    finally:
        logger.debug("exiting")
Exemplo n.º 3
0
        """Given a blob of input prepare a list of strings that is suitable
        for full-text indexing by MongoDB.

        I'm going to cheat and use Whoosh."""

        tokens = []
        for elem in self.analyzer(input):
            if hasattr(elem, "pos"):
                tokens.append((elem.text, elem.pos))
            else:
                tokens.append((elem.text, None))
        unique_tokens = list(set([text for (text, pos) in tokens]))
        return sorted(unique_tokens)

    def __repr__(self):
        return "LogDatum: %s" % (self.get_dict_representation(), )

    def __str__(self):
        return "%s" % (self.get_dict_representation(), )

if __name__ == "__main__":
    logger.debug("starting")
    try:
        fields_to_index = ["datetime", "keywords", "contents_hash", "source", "event_type", "component_path", "sensor_num", "sensor_type"]
        base_parser.main(APP_NAME, NgmgShmHpilistParserLogDatum, fields_to_index)
    except KeyboardInterrupt:
        logger.debug("CTRL-C")
    finally:
        logger.debug("exiting")