Exemplo n.º 1
0
def template_step(lines, template_algorithm="logcluster", *args, **kwargs):
    CHOICES = {"logcluster": logcluster_substep, "stringmatch": stringmatch_substep}
    template_fn = CHOICES.get(template_algorithm, None)
    if not template_fn:
        log_exc(logger, "template_algorithm must be one of: %s" % CHOICES)
    gen_templates = template_fn(lines, *args, **kwargs)
    return gen_templates
Exemplo n.º 2
0
def logcluster(lines, *args, **kwargs):
    """
    This function uses the logcluster algorithm (available at http://ristov.github.io/logcluster/) to cluster
    log files and mine line patterns. See http://ristov.github.io/publications/cnsm15-logcluster-web.pdf for
    additional details on the algorithm. The current implementation writes loglines to a temporary file then
    feeds it to the logcluster command line tool (written in perl).

    Eventually, the goal is to fully translate logcluster.pl into python to eliminate this step.

    Behavior of this function differs depending on how of lines and file_path are set:
    lines AND file_path set: write lines to file at file_path
    lines BUT NOT file_path set: write lines to temporary file
    file_path BUT NOT lines: pass file_path directly into logcluster
    NEITHER lines NOR file_path: throw exception

    Args:
        lines: an iterable of LogLine named tuples
        *args:
        **kwargs:

    Kwargs:
        file_path (string): target path to pass to logcluster.pl (only used if lines is None, otherwise ignored).
        All other kwargs are passed on the command line to logcluster.pl. See above for details.

    Returns:
        templates: a list of Template named tuples
    """
    file_path = kwargs.pop("file_path", None)
    fp = None

    if lines and file_path:
        logger.info("Writing lines to file: %s", file_path)
        LogCluster.write_file(lines, file_path)
    elif lines and not file_path:
        fp = tempfile.NamedTemporaryFile()
        file_path = fp.name
        logger.info("Writing lines to temporary file: %s", file_path)
        LogCluster.write_file(lines, file_path)
    elif not lines and file_path:
        logger.info("Using existing lines in file: %s", file_path)
    else:  # not lines and not passed_file_path
        log_exc(
            logger,
            "Must pass either argument 'lines' or keyword argument 'file_path' (or both)."
        )

    support = kwargs.pop("support", None)
    if not support:
        log_exc(logger, "Must pass kwarg 'support'.")
    output = LogCluster.run_on_file(file_path, support, *args, **kwargs)

    if fp:
        # Temporary files are deleted when closed.
        logger.info("Closing file: %s", file_path)
        fp.close()

    templates = LogCluster.parse_output(output)
    return templates
Exemplo n.º 3
0
def logcluster(lines, *args, **kwargs):
    """
    This function uses the logcluster algorithm (available at http://ristov.github.io/logcluster/) to cluster
    log files and mine line patterns. See http://ristov.github.io/publications/cnsm15-logcluster-web.pdf for
    additional details on the algorithm. The current implementation writes loglines to a temporary file then
    feeds it to the logcluster command line tool (written in perl).

    Eventually, the goal is to fully translate logcluster.pl into python to eliminate this step.

    Behavior of this function differs depending on how of lines and file_path are set:
    lines AND file_path set: write lines to file at file_path
    lines BUT NOT file_path set: write lines to temporary file
    file_path BUT NOT lines: pass file_path directly into logcluster
    NEITHER lines NOR file_path: throw exception

    Args:
        lines: an iterable of LogLine named tuples
        *args:
        **kwargs:

    Kwargs:
        file_path (string): target path to pass to logcluster.pl (only used if lines is None, otherwise ignored).
        All other kwargs are passed on the command line to logcluster.pl. See above for details.

    Returns:
        templates: a list of Template named tuples
    """
    file_path = kwargs.pop("file_path", None)
    fp = None

    if lines and file_path:
        logger.info("Writing lines to file: %s", file_path)
        LogCluster.write_file(lines, file_path)
    elif lines and not file_path:
        fp = tempfile.NamedTemporaryFile()
        file_path = fp.name
        logger.info("Writing lines to temporary file: %s", file_path)
        LogCluster.write_file(lines, file_path)
    elif not lines and file_path:
        logger.info("Using existing lines in file: %s", file_path)
    else:  # not lines and not passed_file_path
        log_exc(
            logger,
            "Must pass either argument 'lines' or keyword argument 'file_path' (or both).")

    support = kwargs.pop("support", None)
    if not support:
        log_exc(logger, "Must pass kwarg 'support'.")
    output = LogCluster.run_on_file(file_path, support, *args, **kwargs)

    if fp:
        # Temporary files are deleted when closed.
        logger.info("Closing file: %s", file_path)
        fp.close()

    templates = LogCluster.parse_output(output)
    return templates
Exemplo n.º 4
0
def template_step(lines, template_algorithm="logcluster", *args, **kwargs):
    CHOICES = {
        "logcluster": logcluster_substep,
        "stringmatch": stringmatch_substep
    }
    template_fn = CHOICES.get(template_algorithm, None)
    if not template_fn:
        log_exc(logger, "template_algorithm must be one of: %s" % CHOICES)
    gen_templates = template_fn(lines, *args, **kwargs)
    return gen_templates
Exemplo n.º 5
0
def event_step(gen_windows, event_algorithm="fp_growth", *args, **kwargs):
    CHOICES = {
        "fp_growth": fp_growth_substep,
        "paris": paris_substep,
        "glove": glove_substep}
    event_fn = CHOICES.get(event_algorithm, None)
    if not event_fn:
        log_exc(logger, "event_algorithm must be one of: %s" % CHOICES)
    threshold = kwargs.pop("tfidf_threshold", None)
    gen_events = event_fn(gen_windows, *args, **kwargs)

    logger.info("==========Custom post processing for sample data==========")
    if threshold is not None:
        # Note that calling this will reassign random event IDs.
        logger.info(
            "Applying a tfidf filter to each event's template_ids. (threshold = %s)",
            threshold)
        gen_events = tfidf_filter_events(gen_events, threshold)
    else:
        logger.info("Skipping tfidf filter")
    logger.info("==========End custom post processing==========")

    return gen_events
Exemplo n.º 6
0
def event_step(gen_windows, event_algorithm="fp_growth", *args, **kwargs):
    CHOICES = {
        "fp_growth": fp_growth_substep,
        "paris": paris_substep,
        "glove": glove_substep
    }
    event_fn = CHOICES.get(event_algorithm, None)
    if not event_fn:
        log_exc(logger, "event_algorithm must be one of: %s" % CHOICES)
    threshold = kwargs.pop("tfidf_threshold", None)
    gen_events = event_fn(gen_windows, *args, **kwargs)

    logger.info("==========Custom post processing for sample data==========")
    if threshold is not None:
        # Note that calling this will reassign random event IDs.
        logger.info(
            "Applying a tfidf filter to each event's template_ids. (threshold = %s)",
            threshold)
        gen_events = tfidf_filter_events(gen_events, threshold)
    else:
        logger.info("Skipping tfidf filter")
    logger.info("==========End custom post processing==========")

    return gen_events