def apply(trace_log, admitted_variants, parameters=None):
    """
    Filter log keeping/removing only provided variants

    Parameters
    -----------
    trace_log
        Trace log object
    admitted_variants
        Admitted variants
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed
    """

    if parameters is None:
        parameters = {}
    positive = parameters["positive"] if "positive" in parameters else True
    variants = get_variants(trace_log, parameters=parameters)
    trace_log = TraceLog()
    for variant in variants:
        if (positive and variant in admitted_variants) or (
                not positive and variant not in admitted_variants):
            for trace in variants[variant]:
                trace_log.append(trace)
    return trace_log
def generate_log(pt, no_traces=100):
    """
    Generate a log out of a process tree

    Parameters
    ------------
    pt
        Process tree
    no_traces
        Number of traces contained in the process tree

    Returns
    ------------
    log
        Trace log object
    """
    log = TraceLog()

    for i in range(no_traces):
        ex_seq = execute(pt)
        ex_seq_labels = pt_util.project_execution_sequence_to_labels(ex_seq)

        trace = Trace()
        trace.attributes[xes.DEFAULT_NAME_KEY] = str(i)
        for label in ex_seq_labels:
            event = Event()
            event[xes.DEFAULT_NAME_KEY] = label
            trace.append(event)
        log.append(trace)

    return log
def filter_log_by_start_activities(start_activities,
                                   variants,
                                   vc,
                                   threshold,
                                   activity_key="concept:name"):
    """
    Keep only variants of the log with a start activity which number of occurrences is above the threshold
    
    Parameters
    ----------
    start_activities
        Dictionary of start attributes associated with their count
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    vc
        List of variant names along with their count
    threshold
        Cutting threshold (remove variants having start attributes which number of occurrences is below the threshold
    activity_key
        (If specified) Specify the activity key in the log (default concept:name)
    
    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = TraceLog()
    fvsa = variants[vc[0][0]][0][0][activity_key]
    for variant in variants:
        vsa = variants[variant][0][0][activity_key]
        if vsa in start_activities:
            if vsa == fvsa or start_activities[vsa] >= threshold:
                for trace in variants[variant]:
                    filtered_log.append(trace)
    return filtered_log
def sample_tracelog(trace_log, no_traces=100):
    """
    Randomly sample a fixed number of traces from the original log

    Parameters
    -----------
    trace_log
        Trace log
    no_traces
        Number of traces that the sample should have

    Returns
    -----------
    newLog
        Filtered log
    """
    new_log = TraceLog(attributes=trace_log.attributes, extensions=trace_log.extensions, globals=trace_log._omni,
                       classifiers=trace_log.classifiers)
    set_traces = set()
    for i in range(0, min(no_traces, len(trace_log._list))):
        set_traces.add(random.randrange(0, len(trace_log._list)))
    set_traces = list(set_traces)
    for trace in set_traces:
        new_log.append(copy(trace_log._list[trace]))
    return new_log
def filter_log_by_variants_percentage(trace_log,
                                      variants,
                                      variants_percentage=0.0):
    """
    Filter the log by variants percentage

    Parameters
    ----------
    trace_log
        Trace log
    variants
        Dictionary with variant as the key and the list of traces as the value
    variants_percentage
        Percentage of variants that should be kept (the most common variant is always kept)

    Returns
    ----------
    filtered_log
        Filtered trace log
    """
    filtered_log = TraceLog()
    no_of_traces = len(trace_log)
    variant_count = get_variants_sorted_by_count(variants)
    already_added_sum = 0

    for i in range(len(variant_count)):
        variant = variant_count[i][0]
        varcount = variant_count[i][1]
        percentage_already_added = already_added_sum / no_of_traces
        if already_added_sum == 0 or percentage_already_added < variants_percentage:
            for trace in variants[variant]:
                filtered_log.append(trace)
            already_added_sum = already_added_sum + varcount

    return filtered_log
示例#6
0
def filter_log_by_paths(trace_log,
                        paths,
                        variants,
                        vc,
                        threshold,
                        attribute_key="concept:name"):
    """
    Keep only paths which number of occurrences is above the threshold (or they belong to the first variant)

    Parameters
    ----------
    trace_log
        Trace log
    paths
        Dictionary of paths associated with their count
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    vc
        List of variant names along with their count
    threshold
        Cutting threshold (remove paths which number of occurrences is below the threshold)
    attribute_key
        (If specified) Specify the attribute key to use (default concept:name)

    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = TraceLog()
    fvft = variants[vc[0][0]][0]
    fvp = set()
    for i in range(0, len(fvft) - 1):
        path = fvft[i][attribute_key] + "," + fvft[i + 1][attribute_key]
        fvp.add(path)
    for trace in trace_log:
        new_trace = Trace()
        jj = 0
        if len(trace) > 0:
            new_trace.append(trace[0])
            for j in range(1, len(trace) - 1):
                jj = j
                if j >= len(trace):
                    break
                if attribute_key in trace[j] and attribute_key in trace[j + 1]:
                    path = trace[j][attribute_key] + "," + trace[
                        j + 1][attribute_key]
                    if path in paths:
                        if path in fvp or paths[path] >= threshold:
                            new_trace.append(trace[j])
                            new_trace.append(trace[j + 1])
        if len(trace) > 1 and not jj == len(trace):
            new_trace.append(trace[-1])
        if len(new_trace) > 0:
            filtered_log.append(new_trace)
    return filtered_log
示例#7
0
def apply(trace_log, values, parameters=None):
    """
    Filter log by keeping only traces that has/has not events with an attribute value that belongs to the provided
    values list

    Parameters
    -----------
    trace_log
        Trace log
    values
        Allowed attributes
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    positive = parameters["positive"] if "positive" in parameters else True

    filtered_log = TraceLog()
    for trace in trace_log:
        new_trace = Trace()

        found = False
        for j in range(len(trace)):
            if attribute_key in trace[j]:
                attribute_value = trace[j][attribute_key]
                if attribute_value in values:
                    found = True

        if (found and positive) or (not found and not positive):
            new_trace = trace

        if len(new_trace) > 0:
            filtered_log.append(new_trace)
    return filtered_log
示例#8
0
def filter_log_by_attributes_threshold(trace_log,
                                       attributes,
                                       variants,
                                       vc,
                                       threshold,
                                       attribute_key="concept:name"):
    """
    Keep only attributes which number of occurrences is above the threshold (or they belong to the first variant)

    Parameters
    ----------
    trace_log
        Trace log
    attributes
        Dictionary of attributes associated with their count
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    vc
        List of variant names along with their count
    threshold
        Cutting threshold (remove attributes which number of occurrences is below the threshold)
    attribute_key
        (If specified) Specify the activity key in the log (default concept:name)

    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = TraceLog()
    fva = [
        x[attribute_key] for x in variants[vc[0][0]][0] if attribute_key in x
    ]
    for trace in trace_log:
        new_trace = Trace()
        for j in range(len(trace)):
            if attribute_key in trace[j]:
                attribute_value = trace[j][attribute_key]
                if attribute_value in attributes:
                    if attribute_value in fva or attributes[
                            attribute_value] >= threshold:
                        new_trace.append(trace[j])
        if len(new_trace) > 0:
            filtered_log.append(new_trace)
    return filtered_log
示例#9
0
def form_fake_log(prefixes_keys, activity_key=xes_util.DEFAULT_NAME_KEY):
    """
    Form fake log for replay (putting each prefix as separate trace to align)

    Parameters
    ----------
    prefixes_keys
        Keys of the prefixes (to form a log with a given order)
    activity_key
        Activity key (must be provided if different from concept:name)
    """
    fake_log = TraceLog()
    for prefix in prefixes_keys:
        trace = Trace()
        prefix_activities = prefix.split(",")
        for activity in prefix_activities:
            event = Event()
            event[activity_key] = activity
            trace.append(event)
        fake_log.append(trace)
    return fake_log
def project_tracelog(log, allowed_activities, parameters=None):
    """
    Project a log on a given list of allowed (by the user) activities

    Parameters
    -------------
    log
        Trace log
    allowed_activities
        List of allowed activities
    parameters
        Possible parameters of the algorithm, including:
            PARAMETER_CONSTANT_ACTIVITY_KEY -> the activity name to use in the projection

    Returns
    ------------
    projected_log
        Projected trace log
    """
    if parameters is None:
        parameters = {}
    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    projected_log = TraceLog()

    for trace in log:
        projected_trace = Trace()
        for event in trace:
            if event[activity_key] in allowed_activities:
                projected_trace.append(deepcopy(event))
        if len(projected_trace) > 0:
            projected_log.append(projected_trace)

    return projected_log
示例#11
0
def apply(trace_log, paths, parameters=None):
    """
    Apply a filter on traces containing / not containing a path

    Parameters
    -----------
    trace_log
        Trace log
    paths
        Paths that we are looking for (expressed as tuple of 2 strings)
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered trace log
    """
    if parameters is None:
        parameters = {}
    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else xes.DEFAULT_NAME_KEY
    positive = parameters["positive"] if "positive" in parameters else True
    filtered_log = TraceLog()
    for trace in trace_log:
        found = False
        for i in range(len(trace) - 1):
            path = (trace[i][attribute_key], trace[i + 1][attribute_key])
            if path in paths:
                found = True
                break
        if (found and positive) or (not found and not positive):
            filtered_log.append(trace)
    return filtered_log