Exemplo n.º 1
0
def start_activities(log):
    log_start = start_activities_filter.get_start_activities(log)

    n_unique_start_activities = len(log_start)

    start_activities_occurrences = list(log_start.values())
    start_activities_min = np.min(start_activities_occurrences)
    start_activities_max = np.max(start_activities_occurrences)
    start_activities_mean = np.mean(start_activities_occurrences)
    start_activities_median = np.median(start_activities_occurrences)
    start_activities_std = np.std(start_activities_occurrences)
    start_activities_variance = np.var(start_activities_occurrences)
    start_activities_q1 = np.percentile(start_activities_occurrences, 25)
    start_activities_q3 = np.percentile(start_activities_occurrences, 75)
    start_activities_iqr = stats.iqr(start_activities_occurrences)
    start_activities_skewness = stats.skew(start_activities_occurrences)
    start_activities_kurtosis = stats.kurtosis(start_activities_occurrences)

    return [
        n_unique_start_activities,
        start_activities_min,
        start_activities_max,
        start_activities_mean,
        start_activities_median,
        start_activities_std,
        start_activities_variance,
        start_activities_q1,
        start_activities_q3,
        start_activities_iqr,
        start_activities_skewness,
        start_activities_kurtosis,
    ]
Exemplo n.º 2
0
 def test_17(self):
     from pm4py.algo.filtering.pandas.start_activities import start_activities_filter
     dataframe = self.load_running_example_df()
     log_start = start_activities_filter.get_start_activities(dataframe)
     df_start_activities = start_activities_filter.apply(dataframe, ["register request"],
                                                         parameters={
                                                             start_activities_filter.Parameters.CASE_ID_KEY: "case:concept:name",
                                                             start_activities_filter.Parameters.ACTIVITY_KEY: "concept:name"})
Exemplo n.º 3
0
def apply(log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    -----------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters:
        parameters[pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters={
        pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}).items() if v > 0]

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(log, activity_key)

    # gets the start activities from the log
    start_activities = list(start_activities_filter.get_start_activities(log, parameters=parameters).keys())
    # gets the end activities from the log
    end_activities = list(end_activities_filter.get_end_activities(log, parameters=parameters).keys())

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    net, initial_marking, final_marking = apply_dfg(dfg, parameters=parameters, activities=activities,
                                                    contains_empty_traces=contains_empty_traces,
                                                    start_activities=start_activities, end_activities=end_activities)

    return net, initial_marking, final_marking
Exemplo n.º 4
0
def apply(log, parameters=None):
    """
    Gets the performance HNet

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(log, max_no_activities=constants.MAX_NO_ACTIVITIES,
                                                            parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(filtered_log, activity_key)
    start_activities_count = start_activities_filter.get_start_activities(filtered_log, parameters=parameters)
    end_activities_count = end_activities_filter.get_end_activities(filtered_log, parameters=parameters)
    activities = list(activities_count.keys())
    start_activities = list(start_activities_count.keys())
    end_activities = list(end_activities_count.keys())

    dfg_freq = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg_perf = dfg_factory.apply(filtered_log, variant="performance", parameters=parameters)

    heu_net = HeuristicsNet(dfg_freq, performance_dfg=dfg_perf, activities=activities, start_activities=start_activities, end_activities=end_activities, activities_occurrences=activities_count)

    heu_net.calculate(dfg_pre_cleaning_noise_thresh=constants.DEFAULT_DFG_CLEAN_MULTIPLIER * decreasingFactor)

    vis = heu_vis_factory.apply(heu_net, parameters={"format": "svg"})
    vis2 = heu_vis_factory.apply(heu_net, parameters={"format": "dot"})

    gviz_base64 = get_base64_from_file(vis2.name)

    return get_base64_from_file(vis.name), None, "", "xes", activities, start_activities, end_activities, gviz_base64, [], "heuristics", "perf", None, "", activity_key
Exemplo n.º 5
0
    def get_start_activities(self, parameters=None):
        """
        Gets the start activities from the log

        Returns
        ------------
        start_activities_dict
            Dictionary of start activities
        """
        if parameters is None:
            parameters = {}
        parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = self.activity_key
        parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = self.activity_key
        return start_activities_filter.get_start_activities(self.log, parameters=parameters)
Exemplo n.º 6
0
def apply_heu(log, parameters=None):
    """
    Discovers an Heuristics Net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including: activity_key, case_id_glue, timestamp_key,
        dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh,
        loops_length_two_thresh

    Returns
    ------------
    heu
        Heuristics Net
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    start_activities = log_sa_filter.get_start_activities(
        log, parameters=parameters)
    end_activities = log_ea_filter.get_end_activities(log,
                                                      parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(
        log, activity_key, parameters=parameters)
    activities = list(activities_occurrences.keys())
    dfg = dfg_factory.apply(log, parameters=parameters)
    parameters_w2 = deepcopy(parameters)
    parameters_w2["window"] = 2
    dfg_window_2 = dfg_factory.apply(log, parameters=parameters_w2)
    freq_triples = dfg_factory.apply(log,
                                     parameters=parameters,
                                     variant="freq_triples")

    return apply_heu_dfg(dfg,
                         activities=activities,
                         activities_occurrences=activities_occurrences,
                         start_activities=start_activities,
                         end_activities=end_activities,
                         dfg_window_2=dfg_window_2,
                         freq_triples=freq_triples,
                         parameters=parameters)
Exemplo n.º 7
0
def apply_tree(log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters={
        pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}).items() if v > 0]

    # gets the start activities from the log
    start_activities = start_activities_filter.get_start_activities(log, parameters=parameters)
    # gets the end activities from the log
    end_activities = end_activities_filter.get_end_activities(log, parameters=parameters)

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(log, activity_key)

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    return apply_tree_dfg(dfg, parameters, activities=activities, contains_empty_traces=contains_empty_traces,
                          start_activities=start_activities, end_activities=end_activities)
Exemplo n.º 8
0
def read_xes(data_dir, dataset, aggregate_type, mode="pruning"):
    prune_parameter_freq = 350
    prune_parameter_time = -1  #keep all
    #read the xes file
    if dataset in "BPIC14":
        # log = csv_importer.import_event_stream(os.path.join(data_dir, dataset + ".csv"))
        data = csv_import_adapter.import_dataframe_from_path(os.path.join(
            data_dir, dataset + ".csv"),
                                                             sep=";")
        data['case:concept:name'] = data['Incident ID']
        data['time:timestamp'] = data['DateStamp']
        data['concept:name'] = data['IncidentActivity_Type']
        log = conversion_factory.apply(data)
    elif dataset == "Unrineweginfectie":
        data = csv_import_adapter.import_dataframe_from_path(os.path.join(
            data_dir, dataset + ".csv"),
                                                             sep=",")
        data['case:concept:name'] = data['Patientnummer']
        data['time:timestamp'] = data['Starttijd']
        data['concept:name'] = data['Aciviteit']
        log = conversion_factory.apply(data)
    else:
        log = xes_import_factory.apply(os.path.join(data_dir,
                                                    dataset + ".xes"))
        data = get_dataframe_from_event_stream(log)

    # dataframe = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME)
    # dfg_freq = dfg_factory.apply(log,variant="frequency")
    # dfg_time =get_dfg_time(data,aggregate_type,dataset)

    if aggregate_type == AggregateType.FREQ:
        dfg = dfg_factory.apply(log, variant="frequency")
    else:
        dfg = get_dfg_time(data, aggregate_type, dataset)
    """Getting Start and End activities"""
    # log = xes_importer.import_log(xes_file)
    log_start = start_activities_filter.get_start_activities(log)
    log_end = end_activities_filter.get_end_activities(log)
    # return dfg_freq,dfg_time
    return dfg
Exemplo n.º 9
0
def apply(log, parameters=None):
    """
    Gets the process tree using Inductive Miner Directly-Follows

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    tree = inductive_miner.apply_tree_dfg(dfg,
                                          parameters=parameters,
                                          activities=activities,
                                          start_activities=start_activities,
                                          end_activities=end_activities)
    parameters["format"] = "svg"
    gviz = pt_vis_factory.apply(tree, parameters=parameters)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    return get_base64_from_gviz(gviz), None, "", "xes", activities, start_activities, end_activities, gviz_base64, [], "tree", "freq", None, "", activity_key
Exemplo n.º 10
0
def apply(log, net, marking, final_marking, parameters=None):
    """
    Get Align-ET Conformance precision

    Parameters
    ----------
    log
        Trace log
    net
        Petri net
    marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters of the algorithm, including:
            pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Activity key
    """

    if parameters is None:
        parameters = {}

    debug_level = parameters[
        "debug_level"] if "debug_level" in parameters else 0

    activity_key = parameters[
        PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY
    # default value for precision, when no activated transitions (not even by looking at the initial marking) are found
    precision = 1.0
    sum_ee = 0
    sum_at = 0
    unfit = 0

    if not petri.check_soundness.check_relaxed_soundness_net_in_fin_marking(
            net, marking, final_marking):
        raise Exception(
            "trying to apply Align-ETConformance on a Petri net that is not a relaxed sound net!!"
        )

    prefixes, prefix_count = precision_utils.get_log_prefixes(
        log, activity_key=activity_key)
    prefixes_keys = list(prefixes.keys())
    fake_log = precision_utils.form_fake_log(prefixes_keys,
                                             activity_key=activity_key)

    align_stop_marking = align_fake_log_stop_marking(fake_log,
                                                     net,
                                                     marking,
                                                     final_marking,
                                                     parameters=parameters)
    all_markings = transform_markings_from_sync_to_original_net(
        align_stop_marking, net, parameters=parameters)

    for i in range(len(prefixes)):
        markings = all_markings[i]

        if markings is not None:
            log_transitions = set(prefixes[prefixes_keys[i]])
            activated_transitions_labels = set()
            for m in markings:
                # add to the set of activated transitions in the model the activated transitions
                # for each prefix
                activated_transitions_labels = activated_transitions_labels.union(
                    x.label for x in utils.
                    get_visible_transitions_eventually_enabled_by_marking(
                        net, m) if x.label is not None)
            escaping_edges = activated_transitions_labels.difference(
                log_transitions)

            sum_at += len(activated_transitions_labels) * prefix_count[
                prefixes_keys[i]]
            sum_ee += len(escaping_edges) * prefix_count[prefixes_keys[i]]

            if debug_level > 1:
                print("")
                print("prefix=", prefixes_keys[i])
                print("log_transitions=", log_transitions)
                print("activated_transitions=", activated_transitions_labels)
                print("escaping_edges=", escaping_edges)
        else:
            unfit += prefix_count[prefixes_keys[i]]

    if debug_level > 0:
        print("\n")
        print("overall unfit", unfit)
        print("overall activated transitions", sum_at)
        print("overall escaping edges", sum_ee)

    # fix: also the empty prefix should be counted!
    start_activities = set(
        start_activities_filter.get_start_activities(log,
                                                     parameters=parameters))
    trans_en_ini_marking = set([
        x.label for x in get_visible_transitions_eventually_enabled_by_marking(
            net, marking)
    ])
    diff = trans_en_ini_marking.difference(start_activities)
    sum_at += len(log) * len(trans_en_ini_marking)
    sum_ee += len(log) * len(diff)
    # end fix

    if sum_at > 0:
        precision = 1 - float(sum_ee) / float(sum_at)

    return precision
Exemplo n.º 11
0
                                                parameters={
                                                    "sort": True,
                                                    "sort_field":
                                                    "time:timestamp"
                                                })

log = conversion_factory.apply(event_stream,
                               parameters={"timestamp_sort": True})

####### Filter the event log

## Start activities

from pm4py.algo.filtering.log.start_activities import start_activities_filter

log_start = start_activities_filter.get_start_activities(log)

log = start_activities_filter.apply_auto_filter(
    log, parameters={"decreasingFactor": 0.6})
print(start_activities_filter.get_start_activities(log))

## End activities

from pm4py.algo.filtering.log.end_activities import end_activities_filter

log_end = end_activities_filter.get_end_activities(log)

log_af_ea = end_activities_filter.apply_auto_filter(
    log, parameters={"decreasingFactor": 0.6})
print(end_activities_filter.get_end_activities(log_af_ea))
Exemplo n.º 12
0
 def test_16(self):
     from pm4py.algo.filtering.log.start_activities import start_activities_filter
     log = self.load_running_example_xes()
     log_start = start_activities_filter.get_start_activities(log)
     filtered_log = start_activities_filter.apply(log,
                                                  ["register request"])
Exemplo n.º 13
0
# print(rr)
# print(df)
# print(type(df['org:resource'][5]))
# df = pd.read_csv("not_so_good_data.csv")
# df['org:resource'] = df['org:resource'].astype(str)

from pm4py.objects.conversion.log import factory as log_conv_factory
conv_log = log_conv_factory.apply(df)
print(len(conv_log))
print(conv_log)
from pm4py.objects.log.util import sorting
sorted_log = sorting.sort_lambda(conv_log,
                                 lambda x: x.attributes["concept:name"],
                                 reverse=False)
from pm4py.algo.filtering.log.start_activities import start_activities_filter
log_start = start_activities_filter.get_start_activities(sorted_log)
filtered_log = start_activities_filter.apply(sorted_log, ["A"])
# print(rrr)
# from pm4py.algo.discovery.simple.model.log import factory as simple_algorithm

# net, initial_marking, final_marking = simple_algorithm.apply(conv_log, classic_output=True, parameters={"max_no_variants": 20})
# gviz = pn_vis_factory.apply(net, im, fm)
# pn_vis_factory.view(gviz)

# print(ooo)
from pm4py.algo.discovery.alpha import factory as alpha_miner
# the same exact discovery technique can be applied directly to Pandas dataframes! :)
from pm4py.algo.discovery.inductive import factory as inductive_miner
# discovers an accepting Petri net
net, im, fm = inductive_miner.apply(filtered_log)
tree = inductive_miner.apply_tree(filtered_log)
Exemplo n.º 14
0
    def detect_cut(self, second_iteration=False, parameters=None):
        if pkgutil.find_loader("networkx"):
            import networkx as nx

            if parameters is None:
                parameters = {}
            activity_key = exec_utils.get_param_value(
                Parameters.ACTIVITY_KEY, parameters,
                pmutil.xes_constants.DEFAULT_NAME_KEY)

            # check base cases:
            empty_log = base_case.empty_log(self.log)
            single_activity = base_case.single_activity(self.log, activity_key)
            if empty_log:
                self.detected_cut = 'empty_log'
            elif single_activity:
                self.detected_cut = 'single_activity'
            # if no base cases are found, search for a cut:
            else:
                conn_components = detection_utils.get_connected_components(
                    self.ingoing, self.outgoing, self.activities)
                this_nx_graph = transform_dfg_to_directed_nx_graph(
                    self.dfg, activities=self.activities)
                strongly_connected_components = [
                    list(x)
                    for x in nx.strongly_connected_components(this_nx_graph)
                ]
                xor_cut = self.detect_xor(conn_components)
                # the following part searches for a cut in the current log_skeleton
                # if a cut is found, the log_skeleton is split according to the cut, the resulting logs are saved in new_logs
                # recursion is used on all the logs in new_logs
                if xor_cut[0]:
                    logging.debug("xor_cut")
                    self.detected_cut = 'concurrent'
                    new_logs = split.split_xor(xor_cut[1], self.log,
                                               activity_key)
                    for i in range(len(new_logs)):
                        new_logs[
                            i] = filtering_utils.keep_one_trace_per_variant(
                                new_logs[i], parameters=parameters)
                    for l in new_logs:
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            l, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            l, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                l, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                l, parameters=parameters).keys())
                        self.children.append(
                            SubtreePlain(l,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                else:
                    sequence_cut = cut_detection.detect_sequential_cut(
                        self, self.dfg, strongly_connected_components)
                    if sequence_cut[0]:
                        logging.debug("sequence_cut")
                        new_logs = split.split_sequence(
                            sequence_cut[1], self.log, activity_key)
                        for i in range(len(new_logs)):
                            new_logs[
                                i] = filtering_utils.keep_one_trace_per_variant(
                                    new_logs[i], parameters=parameters)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    else:
                        parallel_cut = self.detect_concurrent()
                        if parallel_cut[0]:
                            logging.debug("parallel_cut")
                            new_logs = split.split_parallel(
                                parallel_cut[1], self.log, activity_key)
                            for i in range(len(new_logs)):
                                new_logs[
                                    i] = filtering_utils.keep_one_trace_per_variant(
                                        new_logs[i], parameters=parameters)
                            self.detected_cut = "parallel"
                            for l in new_logs:
                                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                    l, parameters=parameters).items() if v > 0]
                                activities = attributes_filter.get_attribute_values(
                                    l, activity_key)
                                start_activities = list(
                                    start_activities_filter.
                                    get_start_activities(
                                        l, parameters=parameters).keys())
                                end_activities = list(
                                    end_activities_filter.get_end_activities(
                                        l, parameters=parameters).keys())
                                self.children.append(
                                    SubtreePlain(
                                        l,
                                        new_dfg,
                                        self.master_dfg,
                                        self.initial_dfg,
                                        activities,
                                        self.counts,
                                        self.rec_depth + 1,
                                        noise_threshold=self.noise_threshold,
                                        start_activities=start_activities,
                                        end_activities=end_activities,
                                        initial_start_activities=self.
                                        initial_start_activities,
                                        initial_end_activities=self.
                                        initial_end_activities,
                                        parameters=parameters))
                        else:
                            loop_cut = self.detect_loop()
                            if loop_cut[0]:
                                logging.debug("loop_cut")
                                new_logs = split.split_loop(
                                    loop_cut[1], self.log, activity_key)
                                for i in range(len(new_logs)):
                                    new_logs[
                                        i] = filtering_utils.keep_one_trace_per_variant(
                                            new_logs[i], parameters=parameters)
                                self.detected_cut = "loopCut"
                                for l in new_logs:
                                    new_dfg = [
                                        (k, v) for k, v in dfg_inst.apply(
                                            l, parameters=parameters).items()
                                        if v > 0
                                    ]
                                    activities = attributes_filter.get_attribute_values(
                                        l, activity_key)
                                    start_activities = list(
                                        start_activities_filter.
                                        get_start_activities(
                                            l, parameters=parameters).keys())
                                    end_activities = list(
                                        end_activities_filter.
                                        get_end_activities(
                                            l, parameters=parameters).keys())
                                    self.children.append(
                                        SubtreePlain(
                                            l,
                                            new_dfg,
                                            self.master_dfg,
                                            self.initial_dfg,
                                            activities,
                                            self.counts,
                                            self.rec_depth + 1,
                                            noise_threshold=self.
                                            noise_threshold,
                                            start_activities=start_activities,
                                            end_activities=end_activities,
                                            initial_start_activities=self.
                                            initial_start_activities,
                                            initial_end_activities=self.
                                            initial_end_activities,
                                            parameters=parameters))

                            # if the code gets to this point, there is no base_case and no cut found in the log_skeleton
                            # therefore, we now apply fall through:
                            else:
                                self.apply_fall_through(parameters)
        else:
            msg = "networkx is not available. inductive miner cannot be used!"
            logging.error(msg)
            raise Exception(msg)
Exemplo n.º 15
0
def apply(log, parameters=None, classic_output=False):
    """
    Gets a simple model out of a log

    Parameters
    -------------
    log
        Trace log
    parameters
        Parameters of the algorithm, including:
            maximum_number_activities -> Maximum number of activities to keep
            discovery_algorithm -> Discovery algorithm to use (alpha, inductive)
            desidered_output -> Desidered output of the algorithm (default: Petri)
            include_filtered_log -> Include the filtered log in the output
            include_dfg_frequency -> Include the DFG of frequencies in the output
            include_dfg_performance -> Include the DFG of performance in the output
            include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output
            include_filtered_dfg_performance -> Include the filtered DFG of performance in the output
    classic_output
        Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking)
        or can return a more detailed dictionary
    """
    if parameters is None:
        parameters = {}

    returned_dictionary = {}

    net = None
    initial_marking = None
    final_marking = None
    bpmn_graph = None
    dfg_frequency = None
    dfg_performance = None
    filtered_dfg_frequency = None
    filtered_dfg_performance = None

    maximum_number_activities = parameters[
        "maximum_number_activities"] if "maximum_number_activities" in parameters else 20
    discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alpha"
    desidered_output = parameters["desidered_output"] if "desidered_output" in parameters else "petri"
    include_filtered_log = parameters["include_filtered_log"] if "include_filtered_log" in parameters else True
    include_dfg_frequency = parameters["include_dfg_frequency"] if "include_dfg_frequency" in parameters else True
    include_dfg_performance = parameters[
        "include_dfg_performance"] if "include_dfg_performance" in parameters else False
    include_filtered_dfg_frequency = parameters[
        "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True
    include_filtered_dfg_performance = parameters[
        "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else False

    if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters:
        activity_key = parameters[
            PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key
    else:
        log, activity_key = insert_classifier.search_act_class_attr(log)
        if activity_key is None:
            activity_key = DEFAULT_NAME_KEY
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key

    if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY]

    activities_count_dictio = attributes_filter.get_attribute_values(log, activity_key)
    activities_count_list = []
    for activity in activities_count_dictio:
        activities_count_list.append([activity, activities_count_dictio[activity]])

    activities_count_list = sorted(activities_count_list, key=lambda x: x[1], reverse=True)
    activities_count_list = activities_count_list[:min(len(activities_count_list), maximum_number_activities)]
    activities_keep_list = [x[0] for x in activities_count_list]

    log = attributes_filter.apply(log, activities_keep_list, parameters=parameters)

    filtered_log = None

    if "alpha" in discovery_algorithm:
        # parameters_sa = deepcopy(parameters)
        # parameters_sa["decreasingFactor"] = 1.0
        filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters)
        filtered_log = filter_topvariants_soundmodel.apply(filtered_log, parameters=parameters)
    elif "dfg_mining" in discovery_algorithm:
        filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters)
        filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters)
        filtered_log = auto_filter.apply_auto_filter(filtered_log, parameters=parameters)

    if include_dfg_frequency or "dfg_mining" in discovery_algorithm:
        dfg_frequency = dfg_factory.apply(log, parameters=parameters, variant="frequency")
    if include_dfg_performance:
        dfg_performance = dfg_factory.apply(log, parameters=parameters, variant="performance")
    if include_filtered_dfg_frequency:
        filtered_dfg_frequency = dfg_factory.apply(filtered_log, parameters=parameters, variant="frequency")
    if include_filtered_dfg_performance:
        filtered_dfg_performance = dfg_factory.apply(filtered_log, parameters=parameters, variant="performance")

    if "alpha" in discovery_algorithm:
        net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters)
    elif "dfg_mining" in discovery_algorithm:
        start_activities = start_activities_filter.get_start_activities(filtered_log, parameters=parameters)
        end_activities = end_activities_filter.get_end_activities(filtered_log, parameters=parameters)

        parameters_conv = {}
        parameters_conv["start_activities"] = start_activities
        parameters_conv["end_activities"] = end_activities

        net, initial_marking, final_marking = dfg_conv_factory.apply(dfg_frequency, parameters=parameters_conv)

    if filtered_log is not None and include_filtered_log:
        returned_dictionary["filtered_log"] = filtered_log
    if net is not None and desidered_output == "petri":
        returned_dictionary["net"] = net
    if initial_marking is not None and desidered_output == "petri":
        returned_dictionary["initial_marking"] = initial_marking
    if final_marking is not None and desidered_output == "petri":
        returned_dictionary["final_marking"] = final_marking
    if bpmn_graph is not None and desidered_output == "bpmn":
        returned_dictionary["bpmn_graph"] = bpmn_graph
    if dfg_frequency is not None and include_dfg_frequency:
        returned_dictionary["dfg_frequency"] = dfg_frequency
    if dfg_performance is not None and include_dfg_performance:
        returned_dictionary["dfg_performance"] = dfg_performance
    if filtered_dfg_frequency is not None and include_filtered_dfg_frequency:
        returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency
    if filtered_dfg_performance is not None and include_filtered_dfg_performance:
        returned_dictionary["filtered_dfg_performance"] = filtered_dfg_performance

    if classic_output:
        if net is not None and desidered_output == "petri":
            return net, initial_marking, final_marking

    return returned_dictionary
Exemplo n.º 16
0
    log_tree_num = 0
    sum_tree_num = 0
    print(tree)
    tree_avg_tel = 0
    tree_avg_log = 0
    tree_avg_sum = 0
    for sam in range(1, 11):
        print(sam)

        path = os.path.join("input_data", "df_complete_logs",
                            "%d_1000_%d.xes" % (tree, sam))
        log = xes_importer.apply(path)
        tel = xes_importer.apply(path)
        xes_utils.set_enabled(tel)
        dfg_100 = dfg_factory.apply(log)
        start_act = set(get_start_activities(log).keys())
        end_act = set(get_end_activities(log).keys())

        result_norm = []
        result_tel = []

        num = len(dfg_100.keys())
        score_tel = 0
        score_log = 0
        score_sum = 0
        su_tel = 0
        su_log = 0
        su_sum = 0
        for k in range(10):
            found_tel = False
            found_log = False
Exemplo n.º 17
0
def apply_tree(log, parameters):
    """
    Apply the IM_FF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    if type(log) is pd.DataFrame:
        vars = variants_get.get_variants_count(log, parameters=parameters)
        return apply_tree_variants(vars, parameters=parameters)
    else:
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        log = converter.apply(log, parameters=parameters)
        # keep only the activity attribute (since the others are not used)
        log = filtering_utils.keep_only_one_attribute_per_event(
            log, activity_key)

        noise_threshold = exec_utils.get_param_value(
            Parameters.NOISE_THRESHOLD, parameters,
            shared_constants.NOISE_THRESHOLD_IMF)

        dfg = [(k, v)
               for k, v in dfg_inst.apply(log, parameters=parameters).items()
               if v > 0]
        c = Counts()
        activities = attributes_filter.get_attribute_values(log, activity_key)
        start_activities = list(
            start_activities_filter.get_start_activities(
                log, parameters=parameters).keys())
        end_activities = list(
            end_activities_filter.get_end_activities(
                log, parameters=parameters).keys())
        contains_empty_traces = False
        traces_length = [len(trace) for trace in log]
        if traces_length:
            contains_empty_traces = min([len(trace) for trace in log]) == 0

        # set the threshold parameter based on f and the max value in the dfg:
        max_value = 0
        for key, value in dfg:
            if value > max_value:
                max_value = value
        threshold = noise_threshold * max_value

        recursion_depth = 0
        sub = subtree.make_tree(log,
                                dfg,
                                dfg,
                                dfg,
                                activities,
                                c,
                                recursion_depth,
                                noise_threshold,
                                threshold,
                                start_activities,
                                end_activities,
                                start_activities,
                                end_activities,
                                parameters=parameters)

        process_tree = get_tree_repr_implain.get_repr(
            sub, 0, contains_empty_traces=contains_empty_traces)
        # Ensures consistency to the parent pointers in the process tree
        tree_consistency.fix_parent_pointers(process_tree)
        # Fixes a 1 child XOR that is added when single-activities flowers are found
        tree_consistency.fix_one_child_xor_flower(process_tree)
        # folds the process tree (to simplify it in case fallthroughs/filtering is applied)
        process_tree = util.fold(process_tree)

        return process_tree
Exemplo n.º 18
0
def apply(log, net, marking, final_marking, parameters=None):
    """
    Get Align-ET Conformance precision

    Parameters
    ----------
    log
        Trace log
    net
        Petri net
    marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters of the algorithm, including:
            pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Activity key
    """

    if parameters is None:
        parameters = {}

    activity_key = parameters[
        PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY
    # default value for precision, when no activated transitions (not even by looking at the initial marking) are found
    precision = 1.0
    sum_ee = 0
    sum_at = 0

    if not (petri.check_soundness.check_wfnet(net) and
            petri.check_soundness.check_relaxed_soundness_net_in_fin_marking(
                net, marking, final_marking)):
        raise Exception(
            "trying to apply Align-ETConformance on a Petri net that is not a relaxed sound workflow net!!"
        )

    prefixes, prefix_count = precision_utils.get_log_prefixes(
        log, activity_key=activity_key)
    prefixes_keys = list(prefixes.keys())
    fake_log = precision_utils.form_fake_log(prefixes_keys,
                                             activity_key=activity_key)

    align_stop_marking = align_fake_log_stop_marking(fake_log,
                                                     net,
                                                     marking,
                                                     final_marking,
                                                     parameters=parameters)
    all_markings = transform_markings_from_sync_to_original_net(
        align_stop_marking, net, parameters=parameters)

    for i in range(len(all_markings)):
        atm = all_markings[i]

        log_transitions = set(prefixes[prefixes_keys[i]])
        activated_transitions_labels = set(
            x.label for x in utils.
            get_visible_transitions_eventually_enabled_by_marking(net, atm)
            if x.label is not None)
        sum_at += len(activated_transitions_labels) * prefix_count[
            prefixes_keys[i]]
        escaping_edges = activated_transitions_labels.difference(
            log_transitions)
        sum_ee += len(escaping_edges) * prefix_count[prefixes_keys[i]]

    # fix: also the empty prefix should be counted!
    start_activities = set(
        start_activities_filter.get_start_activities(log,
                                                     parameters=parameters))
    trans_en_ini_marking = set([
        x.label for x in get_visible_transitions_eventually_enabled_by_marking(
            net, marking)
    ])
    diff = trans_en_ini_marking.difference(start_activities)
    sum_at += len(log) * len(trans_en_ini_marking)
    sum_ee += len(log) * len(diff)
    # end fix

    if sum_at > 0:
        precision = 1 - float(sum_ee) / float(sum_at)

    return precision
Exemplo n.º 19
0
def apply(log, net, marking, final_marking, parameters=None):
    """
    Get ET Conformance precision

    Parameters
    ----------
    log
        Trace log
    net
        Petri net
    marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters of the algorithm, including:
            pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Activity key
    """

    if parameters is None:
        parameters = {}

    cleaning_token_flood = parameters[
        "cleaning_token_flood"] if "cleaning_token_flood" in parameters else False

    activity_key = parameters[
        PARAM_ACTIVITY_KEY] if PARAM_ACTIVITY_KEY in parameters else log_lib.util.xes.DEFAULT_NAME_KEY
    # default value for precision, when no activated transitions (not even by looking at the initial marking) are found
    precision = 1.0
    sum_ee = 0
    sum_at = 0

    parameters_tr = {
        "consider_remaining_in_fitness": False,
        "try_to_reach_final_marking_through_hidden": False,
        "stop_immediately_unfit": True,
        "walk_through_hidden_trans": True,
        "cleaning_token_flood": cleaning_token_flood,
        PARAM_ACTIVITY_KEY: activity_key
    }

    prefixes, prefix_count = precision_utils.get_log_prefixes(
        log, activity_key=activity_key)
    prefixes_keys = list(prefixes.keys())
    fake_log = precision_utils.form_fake_log(prefixes_keys,
                                             activity_key=activity_key)

    aligned_traces = token_replay.apply(fake_log,
                                        net,
                                        marking,
                                        final_marking,
                                        parameters=parameters_tr)

    # fix: also the empty prefix should be counted!
    start_activities = set(
        start_activities_filter.get_start_activities(log,
                                                     parameters=parameters))
    trans_en_ini_marking = set([
        x.label for x in get_visible_transitions_eventually_enabled_by_marking(
            net, marking)
    ])
    diff = trans_en_ini_marking.difference(start_activities)
    sum_at += len(log) * len(trans_en_ini_marking)
    sum_ee += len(log) * len(diff)
    # end fix

    for i in range(len(aligned_traces)):
        if aligned_traces[i]["trace_is_fit"]:
            log_transitions = set(prefixes[prefixes_keys[i]])
            activated_transitions_labels = set([
                x.label
                for x in aligned_traces[i]["enabled_transitions_in_marking"]
                if x.label is not None
            ])
            sum_at += len(activated_transitions_labels) * prefix_count[
                prefixes_keys[i]]
            escaping_edges = activated_transitions_labels.difference(
                log_transitions)
            sum_ee += len(escaping_edges) * prefix_count[prefixes_keys[i]]

    if sum_at > 0:
        precision = 1 - float(sum_ee) / float(sum_at)

    return precision
Exemplo n.º 20
0
def apply(log, parameters=None):
    """
    Gets the Petri net through Inductive Miner, decorated by frequency metric

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    # reduce the depth of the search done by token-based replay
    token_replay.MAX_REC_DEPTH = 1
    token_replay.MAX_IT_FINAL1 = 1
    token_replay.MAX_IT_FINAL2 = 1
    token_replay.MAX_REC_DEPTH_HIDTRANSENABL = 1

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    net, im, fm = inductive_miner.apply_dfg(dfg,
                                            parameters=parameters,
                                            activities=activities,
                                            start_activities=start_activities,
                                            end_activities=end_activities)

    parameters["format"] = "svg"
    gviz = pn_vis_factory.apply(net,
                                im,
                                fm,
                                log=filtered_log,
                                variant="frequency",
                                parameters=parameters)

    svg = get_base64_from_gviz(gviz)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_petri(net, im, fm)

    return svg, export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "inductive", "freq", None, "", activity_key
Exemplo n.º 21
0
alpha_num = []

for tree in range(1, 11):
    tel_tree_num = 0
    log_tree_num = 0
    print(tree)
    tree_avg_tel = 0
    tree_avg_log = 0
    for sam in range(1, 11):
        print(sam)

        path = os.path.join("input_data", "df_complete_logs",
                            "%d_1000_%d.xes" % (tree, sam))
        log = xes_importer.apply(path)
        dfg_org = dfg_factory.apply(log)
        start_act = set(get_start_activities(log).keys())
        end_act = set(get_end_activities(log).keys())

        alpha_path = os.path.join("input_data", "df_complete_logs",
                                  "df_complete_alpha",
                                  "%d_alpha_%d.xes" % (tree, sam))
        alpha_log = xes_importer.apply(alpha_path)

        num = len(dfg_org.keys())
        score_tel = 0
        su_tel = 0
        for k in range(10):
            for n in range(1, 1000):
                sampled_log = sampling.sample_log(alpha_log, no_traces=n)
                dfg_log = dfg_factory.apply(sampled_log)
Exemplo n.º 22
0
    def detect_loop(self):
        # p0 is part of return value, it contains the partition of activities
        # write all start and end activities in p1
        if self.contains_empty_trace():
            return [False, []]
        start_activities = list(
            start_activities_filter.get_start_activities(
                self.log, parameters=self.parameters).keys())
        end_activities = list(
            end_activities_filter.get_end_activities(
                self.log, parameters=self.parameters).keys())
        p1 = []
        for act in start_activities:
            if act not in p1:
                p1.append(act)
        for act in end_activities:
            if act not in p1:
                p1.append(act)

        # create new dfg without the transitions to start and end activities
        new_dfg = copy(self.dfg)
        copy_dfg = copy(new_dfg)
        for ele in copy_dfg:
            if ele[0][0] in p1 or ele[0][1] in p1:
                new_dfg.remove(ele)
        # get connected components of this new dfg
        new_ingoing = get_ingoing_edges(new_dfg)
        new_outgoing = get_outgoing_edges(new_dfg)
        # it was a pain in the *** to get a working directory of the current_activities, as we can't iterate ove the dfg
        current_activities = {}
        for element in self.activities:
            if element not in p1:
                current_activities.update({element: 1})
        p0 = detection_utils.get_connected_components(new_ingoing,
                                                      new_outgoing,
                                                      current_activities)
        p0.insert(0, p1)

        iterable_dfg = []
        for i in range(0, len(self.dfg)):
            iterable_dfg.append(self.dfg[i][0])
        # p0 is like P1,P2,...,Pn in line 3 on page 190 of the IM Thesis
        # check for subsets in p0 that have connections to and end or from a start activity
        p0_copy = []
        for int_el in p0:
            p0_copy.append(int_el)
        for element in p0_copy:  # for every set in p0
            removed = False
            if element in p0 and element != p0[0]:
                for act in element:  # for every activity in this set
                    for e in end_activities:  # for every end activity
                        if e not in start_activities:
                            if (act, e) in iterable_dfg:  # check if connected
                                # is there an element in dfg pointing from any act in a subset of p0 to an end activity
                                for activ in element:
                                    if activ not in p0[0]:
                                        p0[0].append(activ)
                                if element in p0:
                                    p0.remove(
                                        element
                                    )  # remove subsets that are connected to an end activity
                                removed = True
                                break
                    if removed:
                        break
                    for s in start_activities:
                        if s not in end_activities:
                            if not removed:
                                if (s, act) in iterable_dfg:
                                    for acti in element:
                                        if acti not in p0[0]:
                                            p0[0].append(acti)
                                    if element in p0:
                                        p0.remove(
                                            element
                                        )  # remove subsets that are connected to an end activity
                                    removed = True
                                    break
                            else:
                                break
                    if removed:
                        break

        iterable_dfg = []
        for i in range(0, len(self.dfg)):
            iterable_dfg.append(self.dfg[i][0])

        p0_copy = []
        for int_el in p0:
            p0_copy.append(int_el)
        for element in p0_copy:
            if element in p0 and element != p0[0]:
                for act in element:
                    for e in self.end_activities:
                        if (
                                e, act
                        ) in iterable_dfg:  # get those act, that are connected from an end activity
                            for e2 in self.end_activities:  # check, if the act is connected from all end activities
                                if (e2, act) not in iterable_dfg:
                                    for acti in element:
                                        if acti not in p0[0]:
                                            p0[0].append(acti)
                                    if element in p0:
                                        p0.remove(
                                            element
                                        )  # remove subsets that are connected to an end activity
                                    break
                    for s in self.start_activities:
                        if (
                                act, s
                        ) in iterable_dfg:  # same as above (in this case for activities connected to
                            # a start activity)
                            for s2 in self.start_activities:
                                if (act, s2) not in iterable_dfg:
                                    for acti in element:
                                        if acti not in p0[0]:
                                            p0[0].append(acti)
                                    if element in p0:
                                        p0.remove(
                                            element
                                        )  # remove subsets that are connected to an end activity
                                    break

        if len(p0) > 1:
            return [True, p0]
        else:
            return [False, []]
Exemplo n.º 23
0
 def apply_cut_im_plain(self, type_of_cut, cut, activity_key):
     # dfg_viz = dfg_factory.apply(self.log)
     # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
     # dfg_vis_factory.view(gviz)
     if type_of_cut == 'concurrent':
         self.detected_cut = 'concurrent'
         new_logs = split.split_xor(cut[1], self.log, activity_key)
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'sequential':
         new_logs = split.split_sequence(cut[1], self.log, activity_key)
         self.detected_cut = "sequential"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'parallel':
         new_logs = split.split_parallel(cut[1], self.log, activity_key)
         self.detected_cut = "parallel"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'loopCut':
         new_logs = split.split_loop(cut[1], self.log, activity_key)
         self.detected_cut = "loopCut"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
Exemplo n.º 24
0
    def detect_cut_if(self, second_iteration=False, parameters=None):
        # dfg_viz = dfg_factory.apply(self.log)
        # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
        # dfg_vis_factory.view(gviz)
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, self.parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # check base cases:
        empty_log = base_case.empty_log(self.log)
        single_activity = base_case.single_activity(self.log, activity_key)
        if empty_log:
            self.detected_cut = 'empty_log'
        elif single_activity:
            self.detected_cut = 'single_activity'
        # if no base cases are found, search for a cut:
        # use the cutting and splitting functions of im_plain:
        else:
            found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()

            if found_plain_cut:
                self.apply_cut_im_plain(type_of_cut, cut, activity_key)
            # if im_plain does not find a cut, we filter on our threshold and then again apply the im_cut detection
            # but this time, we have to use different splitting functions:
            else:
                self.filter_dfg_on_threshold()
                """
                dfg_viz = dfg_factory.apply(self.log)
                gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
                dfg_vis_factory.view(gviz)
                """
                found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()
                if found_plain_cut:
                    if type_of_cut == 'concurrent':
                        logging.debug("concurrent_cut_if")
                        self.detected_cut = 'concurrent'
                        new_logs = splitting_infrequent.split_xor_infrequent(
                            cut[1], self.log, activity_key)
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'sequential':
                        logging.debug("sequential_if")
                        new_logs = splitting_infrequent.split_sequence_infrequent(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'parallel':
                        logging.debug("parallel_if")
                        new_logs = split.split_parallel(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "parallel"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'loopCut':
                        logging.debug("loopCut_if")
                        new_logs = splitting_infrequent.split_loop_infrequent(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "loopCut"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))

                else:
                    self.apply_fall_through_infrequent(parameters)
Exemplo n.º 25
0
def apply_tree(log, parameters=None):
    """
    Apply the IM algorithm to a log_skeleton obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log_skeleton to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    if pkgutil.find_loader("pandas"):
        import pandas as pd
        from pm4py.statistics.variants.pandas import get as variants_get

        if type(log) is pd.DataFrame:
            vars = variants_get.get_variants_count(log, parameters=parameters)
            return apply_tree_variants(vars, parameters=parameters)

    activity_key = exec_utils.get_param_value(
        Parameters.ACTIVITY_KEY, parameters,
        pmutil.xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log, parameters=parameters)
    # since basic IM is influenced once per variant, it makes sense to keep one trace per variant
    log = filtering_utils.keep_one_trace_per_variant(log,
                                                     parameters=parameters)
    # keep only the activity attribute (since the others are not used)
    log = filtering_utils.keep_only_one_attribute_per_event(log, activity_key)

    dfg = [(k, v)
           for k, v in dfg_inst.apply(log, parameters=parameters).items()
           if v > 0]
    c = Counts()
    activities = attributes_filter.get_attribute_values(log, activity_key)
    start_activities = list(
        start_activities_filter.get_start_activities(
            log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(log,
                                                 parameters=parameters).keys())
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    recursion_depth = 0
    sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth,
                            0.0, start_activities, end_activities,
                            start_activities, end_activities, parameters)

    process_tree = get_tree_repr_implain.get_repr(
        sub, 0, contains_empty_traces=contains_empty_traces)
    # Ensures consistency to the parent pointers in the process tree
    tree_consistency.fix_parent_pointers(process_tree)
    # Fixes a 1 child XOR that is added when single-activities flowers are found
    tree_consistency.fix_one_child_xor_flower(process_tree)
    # folds the process tree (to simplify it in case fallthroughs/filtering is applied)
    process_tree = util.fold(process_tree)

    return process_tree
Exemplo n.º 26
0
    def apply_fall_through_infrequent(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, self.parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_traces_present, enough_traces, new_log = fall_through_infrequent.empty_trace_filtering(
                self.log, self.f)
            self.log = new_log
        else:
            empty_traces_present = False
            enough_traces = False
        # if an empty trace is found, the empty trace fallthrough applies
        if empty_traces_present and enough_traces:
            logging.debug("empty_trace_if")
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=self.parameters).items() if v > 0]
            activities = attributes_filter.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_filter.get_start_activities(
                    new_log, parameters=parameters).keys())
            end_activities = list(
                end_activities_filter.get_end_activities(
                    new_log, parameters=parameters).keys())
            self.children.append(
                SubtreeInfrequent(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    self.f,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        elif empty_traces_present and not enough_traces:
            # no node is added to the PT, instead we just use recursion on the log without the empty traces
            self.detect_cut_if()
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
            else:
                activity_once = False
            if activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_filter.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_filter.get_attribute_values(
                    small_log, activity_key)
                start_activities = list(
                    start_activities_filter.get_start_activities(
                        new_log, parameters=parameters).keys())
                end_activities = list(
                    end_activities_filter.get_end_activities(
                        new_log, parameters=parameters).keys())
                # append the chosen activity as leaf:
                self.children.append(
                    SubtreeInfrequent(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        self.f,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log
                self.children.append(
                    SubtreeInfrequent(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        self.f,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, key = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                else:
                    activity_concurrent = False
                if activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_filter.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_filter.get_attribute_values(
                        small_log, activity_key)
                    start_activities = list(
                        start_activities_filter.get_start_activities(
                            new_log, parameters=parameters).keys())
                    end_activities = list(
                        end_activities_filter.get_end_activities(
                            new_log, parameters=parameters).keys())
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreeInfrequent(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            self.f,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log:
                    self.children.append(
                        SubtreeInfrequent(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            self.f,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                    else:
                        strict_tau_loop = False
                    if strict_tau_loop:
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                new_log, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                new_log, parameters=parameters).keys())
                        self.children.append(
                            SubtreeInfrequent(
                                new_log,
                                new_dfg,
                                self.master_dfg,
                                self.initial_dfg,
                                activities,
                                self.counts,
                                self.rec_depth + 1,
                                self.f,
                                noise_threshold=self.noise_threshold,
                                start_activities=start_activities,
                                end_activities=end_activities,
                                initial_start_activities=self.
                                initial_start_activities,
                                initial_end_activities=self.
                                initial_end_activities,
                                parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                        else:
                            tau_loop = False
                        if tau_loop:
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    new_log, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    new_log, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower_if")
                            self.detected_cut = 'flower'
Exemplo n.º 27
0
def apply(log, parameters=None):
    """
    Gets the frequency DFG

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)

    parameters["format"] = "svg"
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities

    gviz = dfg_vis_factory.apply(dfg,
                                 log=filtered_log,
                                 variant="frequency",
                                 parameters=parameters)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities,
                                             end_activities)

    net, im, fm = dfg_conv_factory.apply(dfg,
                                         parameters={
                                             "start_activities":
                                             start_activities,
                                             "end_activities": end_activities
                                         })

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "freq", None, "", activity_key
Exemplo n.º 28
0
def apply(log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    -----------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters:
        parameters[
            pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    # apply the reduction by default only on very small logs
    enable_reduction = parameters[
        "enable_reduction"] if "enable_reduction" in parameters else True

    # get the DFG
    if isinstance(log[0][0], tel.Event):
        dfg = [(k, v) for k, v in inductive_revise.get_dfg_graph_trans(
            log,
            parameters={
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
            }).items() if v > 0]
    else:
        dfg = [(k, v) for k, v in dfg_inst.apply(
            log,
            parameters={
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
            }).items() if v > 0]

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(log, activity_key)

    # gets the start activities from the log
    start_activities = list(
        start_activities_filter.get_start_activities(
            log, parameters=parameters).keys())
    # gets the end activities from the log
    end_activities = list(
        end_activities_filter.get_end_activities(log,
                                                 parameters=parameters).keys())

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    net, initial_marking, final_marking = apply_dfg(
        dfg,
        parameters=parameters,
        activities=activities,
        contains_empty_traces=contains_empty_traces,
        start_activities=start_activities,
        end_activities=end_activities)
    """if enable_reduction:
        vis_trans = [x for x in net.transitions if x.label]
        hid_trans = [x for x in net.transitions if x.label is None]
        if vis_trans:
            ratio = len(hid_trans) / len(vis_trans)

            if ratio < 2.0:
                # avoid reducting too much complicated processes
                reduction_parameters = copy(parameters)
                if "is_reduction" not in reduction_parameters:
                    reduction_parameters["is_reduction"] = True
                if "thread_maximum_ex_time" not in reduction_parameters:
                    reduction_parameters["thread_maximum_ex_time"] = shared_constants.RED_MAX_THR_EX_TIME

                # do the replay
                aligned_traces = token_replay.apply(log, net, initial_marking, final_marking,
                                                    parameters=reduction_parameters)

                # apply petri_reduction technique in order to simplify the Petri net
                net = petri_cleaning.petri_reduction_treplay(net, parameters={"aligned_traces": aligned_traces})"""

    return net, initial_marking, final_marking
Exemplo n.º 29
0
    def apply_fall_through(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_trace, new_log = fall_through.empty_trace(self.log)
            # if an empty trace is found, the empty trace fallthrough applies
            #
        else:
            empty_trace = False
        if empty_trace:
            logging.debug("empty_trace")
            activites_left = []
            for trace in new_log:
                for act in trace:
                    if act[activity_key] not in activites_left:
                        activites_left.append(act[activity_key])
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=parameters).items() if v > 0]
            activities = attributes_filter.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_filter.get_start_activities(
                    new_log, parameters=self.parameters).keys())
            end_activities = list(
                end_activities_filter.get_end_activities(
                    new_log, parameters=self.parameters).keys())
            self.children.append(
                SubtreePlain(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
                small_log = filtering_utils.keep_one_trace_per_variant(
                    small_log, parameters=parameters)
            else:
                activity_once = False
            if use_act_once_per_trace and activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_filter.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_filter.get_attribute_values(
                    small_log, activity_key)
                self.children.append(
                    SubtreePlain(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log_skeleton
                start_activities = list(
                    start_activities_filter.get_start_activities(
                        new_log, parameters=self.parameters).keys())
                end_activities = list(
                    end_activities_filter.get_end_activities(
                        new_log, parameters=self.parameters).keys())
                self.children.append(
                    SubtreePlain(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                    small_log = filtering_utils.keep_one_trace_per_variant(
                        small_log, parameters=parameters)
                else:
                    activity_concurrent = False
                if use_act_concurrent and activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_filter.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_filter.get_attribute_values(
                        small_log, activity_key)
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreePlain(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log_skeleton:
                    start_activities = list(
                        start_activities_filter.get_start_activities(
                            new_log, parameters=self.parameters).keys())
                    end_activities = list(
                        end_activities_filter.get_end_activities(
                            new_log, parameters=self.parameters).keys())
                    self.children.append(
                        SubtreePlain(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                        new_log = filtering_utils.keep_one_trace_per_variant(
                            new_log, parameters=parameters)
                    else:
                        strict_tau_loop = False
                    if use_strict_tau_loop and strict_tau_loop:
                        activites_left = []
                        for trace in new_log:
                            for act in trace:
                                if act[activity_key] not in activites_left:
                                    activites_left.append(act[activity_key])
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                new_log, parameters=self.parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                new_log, parameters=self.parameters).keys())
                        self.children.append(
                            SubtreePlain(new_log,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                            new_log = filtering_utils.keep_one_trace_per_variant(
                                new_log, parameters=parameters)
                        else:
                            tau_loop = False
                        if use_tau_loop and tau_loop:
                            activites_left = []
                            for trace in new_log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower model")
                            activites_left = []
                            for trace in self.log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'flower'
Exemplo n.º 30
0
result_avg = []
result_num = []

print(model)

tree = 5
tree_num = 0
tree_avg = 0
print(tree)
for sam in range(1, 11):
    print(sam)
    input_file_path = os.path.join("input_data", "df_complete_logs",
                                   "%d_1000_%d.xes" % (tree, sam))
    log = xes_importer.apply(input_file_path)
    dfg_org = dfg_factory.apply(log)
    start_act = set(get_start_activities(log).keys())
    end_act = set(get_end_activities(log).keys())
    #make petri net for simulation
    net, im, fm = heu_factory.apply(log)

    num = len(dfg_org.keys())
    sim_log = sim_factory.apply(net,
                                im,
                                parameters={
                                    'maxTraceLength': 20,
                                    'noTraces': 1000
                                })
    dfg_algo = dfg_factory.apply(log)
    start_act_algo = set(get_start_activities(sim_log).keys())
    end_act_algo = set(get_end_activities(sim_log).keys())