def initialize_tree(self, dfg, log, initial_dfg, activities, second_iteration=False, end_call=True, parameters=None): """ Initialize the tree Parameters ----------- dfg Directly follows graph of this subtree log the event log_skeleton initial_dfg Referral directly follows graph that should be taken in account adding hidden/loop transitions activities Activities of this subtree second_iteration Boolean that indicates if we are executing this method for the second time """ self.second_iteration = second_iteration if activities is None: self.activities = get_activities_from_dfg(dfg) else: self.activities = copy(activities) if second_iteration: self.dfg = clean_dfg_based_on_noise_thresh(self.dfg, self.activities, self.noise_threshold) else: self.dfg = copy(dfg) self.initial_dfg = initial_dfg self.outgoing = get_outgoing_edges(self.dfg) self.ingoing = get_ingoing_edges(self.dfg) self.self_loop_activities = get_activities_self_loop(self.dfg) self.initial_outgoing = get_outgoing_edges(self.initial_dfg) self.initial_ingoing = get_ingoing_edges(self.initial_dfg) self.negated_dfg = negate(self.dfg) self.negated_activities = get_activities_from_dfg(self.negated_dfg) self.negated_outgoing = get_outgoing_edges(self.negated_dfg) self.negated_ingoing = get_ingoing_edges(self.negated_dfg) self.detected_cut = None self.children = [] self.log = log self.original_log = log self.parameters = parameters self.detect_cut(second_iteration=False, parameters=parameters)
def detect_concurrent(self): if self.contains_empty_trace(): return [False, []] inverted_dfg = [ ] # create an inverted dfg, the connected components of this dfg are the split for a in self.activities: for b in self.activities: if a != b: if not self.is_followed_by(self.dfg, a, b) or not self.is_followed_by( self.dfg, b, a): if ((a, b), 1) not in inverted_dfg: inverted_dfg.append(((a, b), 1)) inverted_dfg.append(((b, a), 1)) self.inverted_dfg = inverted_dfg new_ingoing = get_ingoing_edges(inverted_dfg) new_outgoing = get_outgoing_edges(inverted_dfg) conn = detection_utils.get_connected_components( new_ingoing, new_outgoing, self.activities) if len(conn) > 1: conn = parallel_cut_utils.check_par_cut(conn, self.ingoing, self.outgoing) if len(conn) > 1: if parallel_cut_utils.check_sa_ea_for_each_branch( conn, self.start_activities, self.end_activities): return [True, conn] return [False, []]
def check_for_cut(self, test_log, deleted_activity=None, parameters=None): if pkgutil.find_loader("networkx"): import networkx as nx if deleted_activity is not None: del self.activities[deleted_activity] if parameters is None: parameters = {} dfg = [(k, v) for k, v in dfg_inst.apply( test_log, parameters=parameters).items() if v > 0] self.dfg = dfg self.outgoing = get_outgoing_edges(self.dfg) self.ingoing = get_ingoing_edges(self.dfg) self.log = test_log conn_components = detection_utils.get_connected_components( self.ingoing, self.outgoing, self.activities) this_nx_graph = transform_dfg_to_directed_nx_graph( self.dfg, activities=self.activities) strongly_connected_components = [ list(x) for x in nx.strongly_connected_components(this_nx_graph) ] # search for cut and return true as soon as a cut is found: xor_cut = self.detect_xor(conn_components) if xor_cut[0]: return True else: sequence_cut = cut_detection.detect_sequential_cut( self, self.dfg, strongly_connected_components) if sequence_cut[0]: return True else: parallel_cut = self.detect_concurrent() if parallel_cut[0]: return True else: loop_cut = self.detect_loop() if loop_cut[0]: return True else: return False else: msg = "networkx is not available. inductive miner cannot be used!" logging.error(msg) raise Exception(msg)
def detect_loop(self): # p0 is part of return value, it contains the partition of activities # write all start and end activities in p1 if self.contains_empty_trace(): return [False, []] start_activities = list( start_activities_filter.get_start_activities( self.log, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( self.log, parameters=self.parameters).keys()) p1 = [] for act in start_activities: if act not in p1: p1.append(act) for act in end_activities: if act not in p1: p1.append(act) # create new dfg without the transitions to start and end activities new_dfg = copy(self.dfg) copy_dfg = copy(new_dfg) for ele in copy_dfg: if ele[0][0] in p1 or ele[0][1] in p1: new_dfg.remove(ele) # get connected components of this new dfg new_ingoing = get_ingoing_edges(new_dfg) new_outgoing = get_outgoing_edges(new_dfg) # it was a pain in the *** to get a working directory of the current_activities, as we can't iterate ove the dfg current_activities = {} for element in self.activities: if element not in p1: current_activities.update({element: 1}) p0 = detection_utils.get_connected_components(new_ingoing, new_outgoing, current_activities) p0.insert(0, p1) iterable_dfg = [] for i in range(0, len(self.dfg)): iterable_dfg.append(self.dfg[i][0]) # p0 is like P1,P2,...,Pn in line 3 on page 190 of the IM Thesis # check for subsets in p0 that have connections to and end or from a start activity p0_copy = [] for int_el in p0: p0_copy.append(int_el) for element in p0_copy: # for every set in p0 removed = False if element in p0 and element != p0[0]: for act in element: # for every activity in this set for e in end_activities: # for every end activity if e not in start_activities: if (act, e) in iterable_dfg: # check if connected # is there an element in dfg pointing from any act in a subset of p0 to an end activity for activ in element: if activ not in p0[0]: p0[0].append(activ) if element in p0: p0.remove( element ) # remove subsets that are connected to an end activity removed = True break if removed: break for s in start_activities: if s not in end_activities: if not removed: if (s, act) in iterable_dfg: for acti in element: if acti not in p0[0]: p0[0].append(acti) if element in p0: p0.remove( element ) # remove subsets that are connected to an end activity removed = True break else: break if removed: break iterable_dfg = [] for i in range(0, len(self.dfg)): iterable_dfg.append(self.dfg[i][0]) p0_copy = [] for int_el in p0: p0_copy.append(int_el) for element in p0_copy: if element in p0 and element != p0[0]: for act in element: for e in self.end_activities: if ( e, act ) in iterable_dfg: # get those act, that are connected from an end activity for e2 in self.end_activities: # check, if the act is connected from all end activities if (e2, act) not in iterable_dfg: for acti in element: if acti not in p0[0]: p0[0].append(acti) if element in p0: p0.remove( element ) # remove subsets that are connected to an end activity break for s in self.start_activities: if ( act, s ) in iterable_dfg: # same as above (in this case for activities connected to # a start activity) for s2 in self.start_activities: if (act, s2) not in iterable_dfg: for acti in element: if acti not in p0[0]: p0[0].append(acti) if element in p0: p0.remove( element ) # remove subsets that are connected to an end activity break if len(p0) > 1: return [True, p0] else: return [False, []]