def apply_tree( event_log: Union[pd.DataFrame, EventLog, EventStream], parameters: Optional[Dict[Union[Parameters, str], Any]] = None) -> ProcessTree: if parameters is None: parameters = {} event_log = log_converter.apply( event_log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) act_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY.value, parameters, xes_constants.DEFAULT_NAME_KEY) threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD, parameters, 0.0) if threshold == 0.0: # keep one trace per variant; more performant event_log = filtering_utils.keep_one_trace_per_variant( event_log, parameters=parameters) tree = __inductive_miner( event_log, discover_dfg.apply(event_log, parameters=parameters), threshold, None, act_key, exec_utils.get_param_value(Parameters.USE_MSD_PARALLEL_CUT, parameters, True)) tree_consistency.fix_parent_pointers(tree) tree = generic.fold(tree) generic.tree_sort(tree) return tree
def apply( tree: ProcessTree, parameters: Optional[Dict[Union[str, Parameters], Any]] = None ) -> graphviz.Graph: """ Obtain a Process Tree representation through GraphViz Parameters ----------- tree Process tree parameters Possible parameters of the algorithm Returns ----------- gviz GraphViz object """ if parameters is None: parameters = {} parameters = copy(parameters) parameters[ROOT_NODE_PARAMETER] = tree filename = tempfile.NamedTemporaryFile(suffix='.gv') bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters, "transparent") viz = Graph("pt", filename=filename.name, engine='dot', graph_attr={'bgcolor': bgcolor}) viz.attr('node', shape='ellipse', fixedsize='false') image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") enable_deepcopy = exec_utils.get_param_value(Parameters.ENABLE_DEEPCOPY, parameters, False) if enable_deepcopy: # since the process tree object needs to be sorted in the visualization, make a deepcopy of it before # proceeding tree = deepcopy(tree) generic.tree_sort(tree) repr_tree_2(tree, viz, parameters) viz.attr(overlap='false') viz.attr(splines='false') viz.format = image_format return viz
def apply(tree, parameters=None): """ Obtain a Process Tree representation through GraphViz Parameters ----------- tree Process tree parameters Possible parameters of the algorithm Returns ----------- gviz GraphViz object """ if parameters is None: parameters = {} filename = tempfile.NamedTemporaryFile(suffix='.gv') viz = Graph("pt", filename=filename.name, engine='dot', graph_attr={'bgcolor': 'transparent'}) viz.attr('node', shape='ellipse', fixedsize='false') image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") color_map = exec_utils.get_param_value(Parameters.COLOR_MAP, parameters, {}) enable_deepcopy = exec_utils.get_param_value(Parameters.ENABLE_DEEPCOPY, parameters, True) if enable_deepcopy: # since the process tree object needs to be sorted in the visualization, make a deepcopy of it before # proceeding tree = deepcopy(tree) util.tree_sort(tree) repr_tree(tree, viz, color_map, parameters) viz.attr(overlap='false') viz.attr(splines='false') viz.format = image_format return viz
def apply_tree_dfg(dfg: Dict[Tuple[str, str], int], start_activities: Dict[str, int], end_activities: Dict[str, int], activities: Dict[str, int], parameters=None): if parameters is None: parameters = {} dfg_sa_ea_actcount = DfgSaEaActCount(dfg, start_activities, end_activities, activities) threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD, parameters, 0.0) tree = dfg_im.__imd(dfg_sa_ea_actcount, threshold, None) tree_consistency.fix_parent_pointers(tree) tree = generic.fold(tree) generic.tree_sort(tree) return tree
def apply_tree(event_log: Union[pd.DataFrame, EventLog, EventStream], parameters: Optional[Dict[str, Any]] = None) -> ProcessTree: if parameters is None: parameters = {} event_log = log_converter.apply(event_log, parameters=parameters) if type(event_log) is not EventLog: raise ValueError( 'input argument log should be of type pandas.DataFrame, Event Log or Event Stream' ) act_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY.value, parameters, xes_constants.DEFAULT_NAME_KEY) if exec_utils.get_param_value(Parameters.DFG_ONLY, parameters, False): event_log = None threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD, parameters, 0.0) if threshold == 0.0: # keep one trace per variant; more performant event_log = filtering_utils.keep_one_trace_per_variant( event_log, parameters=parameters) tree = inductive_miner( event_log, discover_dfg.apply( event_log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}), threshold, None, act_key, exec_utils.get_param_value(Parameters.USE_MSD_PARALLEL_CUT, parameters, True)) tree_consistency.fix_parent_pointers(tree) tree = generic.fold(tree) generic.tree_sort(tree) return tree
def apply(net, im, fm, parameters=None): """ Transforms a WF-net to a process tree Parameters ------------- net Petri net im Initial marking fm Final marking Returns ------------- tree Process tree """ if parameters is None: parameters = {} debug = exec_utils.get_param_value(Parameters.DEBUG, parameters, False) fold = exec_utils.get_param_value(Parameters.FOLD, parameters, True) grouped_net = group_blocks_in_net(net, parameters=parameters) if len(grouped_net.transitions) == 1: pt_str = list(grouped_net.transitions)[0].label pt = pt_util.parse(pt_str) ret = pt_util.fold(pt) if fold else pt tree_sort(ret) return ret else: if debug: from pm4py.visualization.petri_net import visualizer as pn_viz pn_viz.view(pn_viz.apply(grouped_net, parameters={"format": "svg"})) raise ValueError('Parsing of WF-net Failed')
LOGS_FOLDER = "../compressed_input_data" for log_name in os.listdir(LOGS_FOLDER): if "xes" in log_name: bpmn_output_path = tempfile.NamedTemporaryFile(suffix=".bpmn") bpmn_output_path.close() bpmn_output_path = bpmn_output_path.name log_path = os.path.join(LOGS_FOLDER, log_name) print("") print(log_path) log = pm4py.read_xes(log_path) fp_log = pm4py.algo.discovery.footprints.log.variants.entire_event_log.apply( log) tree = pm4py.discover_process_tree_inductive(log) generic.tree_sort(tree) fp_tree = pm4py.algo.discovery.footprints.tree.variants.bottomup.apply( tree) fp_conf = pm4py.algo.conformance.footprints.variants.log_extensive.apply( fp_log, fp_tree) fitness0 = pm4py.algo.conformance.footprints.util.evaluation.fp_fitness( fp_log, fp_tree, fp_conf) precision0 = pm4py.algo.conformance.footprints.util.evaluation.fp_precision( fp_log, fp_tree) print("fitness 0 = ", fitness0) print("precision 0 = ", precision0) net, im, fm = pm4py.objects.conversion.process_tree.variants.to_petri_net.apply( tree) bpmn_graph = pm4py.objects.conversion.wf_net.variants.to_bpmn.apply( net, im, fm) bpmn_graph = layouter.apply(bpmn_graph)
def apply_tree_dfg(dfg, parameters=None, activities=None, contains_empty_traces=False, start_activities=None, end_activities=None): """ Apply the IMDF algorithm to a DFG graph obtaining a process tree Parameters ---------- dfg Directly-follows graph parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) activities Activities of the process (default None) contains_empty_traces Boolean value that is True if the event log from which the DFG has been extracted contains empty traces start_activities If provided, the start activities of the log end_activities If provided, the end activities of the log Returns ---------- tree Process tree """ if parameters is None: parameters = {} noise_threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD, parameters, 0.0) if type(dfg) is Counter or type(dfg) is dict: newdfg = [] for key in dfg: value = dfg[key] newdfg.append((key, value)) dfg = newdfg c = Counts() s = SubtreeDFGBased(dfg, dfg, dfg, activities, c, 0, noise_threshold=noise_threshold, initial_start_activities=start_activities, initial_end_activities=end_activities) tree_repr = get_tree_repr_dfg_based.get_repr( s, 0, contains_empty_traces=contains_empty_traces) # Ensures consistency to the parent pointers in the process tree tree_consistency.fix_parent_pointers(tree_repr) # Fixes a 1 child XOR that is added when single-activities flowers are found tree_consistency.fix_one_child_xor_flower(tree_repr) # folds the process tree (to simplify it in case fallthroughs/filtering is applied) tree_repr = generic.fold(tree_repr) # sorts the process tree to ensure consistency in different executions of the algorithm tree_sort(tree_repr) return tree_repr
def apply_tree(log, parameters): """ Apply the IM_FF algorithm to a log obtaining a process tree Parameters ---------- log Log parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ---------- process_tree Process tree """ if parameters is None: parameters = {} if pkgutil.find_loader("pandas"): import pandas as pd from pm4py.statistics.variants.pandas import get as variants_get if type(log) is pd.DataFrame: vars = variants_get.get_variants_count(log, parameters=parameters) return apply_tree_variants(vars, parameters=parameters) activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) log = converter.apply(log, parameters=parameters) # keep only the activity attribute (since the others are not used) log = filtering_utils.keep_only_one_attribute_per_event(log, activity_key) noise_threshold = exec_utils.get_param_value( Parameters.NOISE_THRESHOLD, parameters, shared_constants.NOISE_THRESHOLD_IMF) dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters=parameters).items() if v > 0] c = Counts() activities = attributes_get.get_attribute_values(log, activity_key) start_activities = list( start_activities_get.get_start_activities( log, parameters=parameters).keys()) end_activities = list( end_activities_get.get_end_activities(log, parameters=parameters).keys()) contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 # set the threshold parameter based on f and the max value in the dfg: max_value = 0 for key, value in dfg: if value > max_value: max_value = value threshold = noise_threshold * max_value recursion_depth = 0 sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth, noise_threshold, threshold, start_activities, end_activities, start_activities, end_activities, parameters=parameters) process_tree = get_tree_repr_implain.get_repr( sub, 0, contains_empty_traces=contains_empty_traces) # Ensures consistency to the parent pointers in the process tree tree_consistency.fix_parent_pointers(process_tree) # Fixes a 1 child XOR that is added when single-activities flowers are found tree_consistency.fix_one_child_xor_flower(process_tree) # folds the process tree (to simplify it in case fallthroughs/filtering is applied) process_tree = generic.fold(process_tree) # sorts the process tree to ensure consistency in different executions of the algorithm tree_sort(process_tree) return process_tree
def import_tree_from_xml_object(root, parameters=None): """ Imports a process tree from the XML object Parameters --------------- root Root of the XML object parameters Possible parameters Returns --------------- tree Process tree """ if parameters is None: parameters = {} nodes = {} for c0 in root: root = c0.get("root") for child in c0: tag = child.tag id = child.get("id") name = child.get("name") sourceId = child.get("sourceId") targetId = child.get("targetId") if name is not None: # node if tag == "and": operator = Operator.PARALLEL label = None elif tag == "sequence": operator = Operator.SEQUENCE label = None elif tag == "xor": operator = Operator.XOR label = None elif tag == "xorLoop": operator = Operator.LOOP label = None elif tag == "or": operator = Operator.OR label = None elif tag == "manualTask": operator = None label = name elif tag == "automaticTask": operator = None label = None else: raise Exception("unknown tag: " + tag) tree = ProcessTree(operator=operator, label=label) nodes[id] = tree else: nodes[sourceId].children.append(nodes[targetId]) nodes[targetId].parent = nodes[sourceId] # make sure that .PTML files having loops with 3 children are imported # into the PM4Py process tree structure # we want loops to have two children for node in nodes.values(): if node.operator == Operator.LOOP and len(node.children) == 3: if not (node.children[2].operator is None and node.children[2].label is None): parent_node = node.parent new_parent_node = ProcessTree(operator=Operator.SEQUENCE, label=None) node.parent = new_parent_node new_parent_node.children.append(node) node.children[2].parent = new_parent_node new_parent_node.children.append(node.children[2]) if parent_node is not None: new_parent_node.parent = parent_node del parent_node.children[parent_node.children.index(node)] parent_node.children.append(new_parent_node) del node.children[2] root = nodes[root] tree_sort(root) return root