示例#1
0
def filter_log_by_variants_percentage(log, variants, variants_percentage=0.0):
    """
    Filter the log by variants percentage

    Parameters
    ----------
    log
        Log
    variants
        Dictionary with variant as the key and the list of traces as the value
    variants_percentage
        Percentage of variants that should be kept (the most common variant is always kept)

    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = EventLog()
    no_of_traces = len(log)
    variant_count = get_variants_sorted_by_count(variants)
    already_added_sum = 0

    for i in range(len(variant_count)):
        variant = variant_count[i][0]
        varcount = variant_count[i][1]
        percentage_already_added = already_added_sum / no_of_traces
        if already_added_sum == 0 or percentage_already_added < variants_percentage:
            for trace in variants[variant]:
                filtered_log.append(trace)
            already_added_sum = already_added_sum + varcount

    return filtered_log
示例#2
0
def filter_variants_top_k(log, k, parameters=None):
    """
    Keeps the top-k variants of the log

    Parameters
    -------------
    log
        Event log
    k
        Number of variants that should be kept
    parameters
        Parameters

    Returns
    -------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    variants = get_variants(log, parameters=parameters)
    variant_count = get_variants_sorted_by_count(variants)
    variant_count = variant_count[:min(k, len(variant_count))]
    variants_to_filter = [x[0] for x in variant_count]

    return apply(log, variants_to_filter, parameters=parameters)
示例#3
0
def filter_variants_variants_percentage(log,
                                        variants,
                                        variants_percentage=0.0):
    """
    Filter the log by variants percentage

    Parameters
    ----------
    log
        Log
    variants
        Dictionary with variant as the key and the list of traces as the value
    variants_percentage
        Percentage of variants that should be kept (the most common variant is always kept)

    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = EventLog(list(),
                            attributes=log.attributes,
                            extensions=log.extensions,
                            classifiers=log.classifiers,
                            omni_present=log.omni_present,
                            properties=log.properties)
    no_of_traces = len(log)
    variant_count = get_variants_sorted_by_count(variants)
    already_added_sum = 0
    shall_break_under = -1

    for i in range(len(variant_count)):
        variant = variant_count[i][0]
        varcount = variant_count[i][1]
        if varcount < shall_break_under:
            break
        for trace in variants[variant]:
            filtered_log.append(trace)
        already_added_sum = already_added_sum + varcount
        percentage_already_added = already_added_sum / no_of_traces
        if percentage_already_added >= variants_percentage:
            shall_break_under = varcount

    return filtered_log
示例#4
0
def find_auto_threshold(log, variants, decreasing_factor):
    """
    Find automatically variants filtering threshold
    based on specified decreasing factor
    
    Parameters
    ----------
    log
        Log
    variants
        Dictionary with variant as the key and the list of traces as the value
    decreasing_factor
        Decreasing factor (stops the algorithm when the next variant by occurrence is below this factor
        in comparison to previous)
    
    Returns
    ----------
    variantsPercentage
        Percentage of variants to keep in the log
    """
    no_of_traces = len(log)
    variant_count = get_variants_sorted_by_count(variants)
    already_added_sum = 0

    prev_var_count = -1
    percentage_already_added = 0
    for i in range(len(variant_count)):
        varcount = variant_count[i][1]
        percentage_already_added = already_added_sum / no_of_traces
        if already_added_sum == 0 or varcount > decreasing_factor * prev_var_count:
            already_added_sum = already_added_sum + varcount
        else:
            break
        prev_var_count = varcount

    percentage_already_added = already_added_sum / no_of_traces

    return percentage_already_added