def log_statistics(logpath): """ Extracts log statistics such as #Events, #Cases, #Activities and #Variants, given the path of event log. Parameters: logpath (str): Path of event log Returns: events (int): Number of events cases (int): Number of traces activities (int): Number of activities variants (int): Number of variants """ log = importer.apply(logpath) log_df = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME) return len(log_df), log_df["case:concept:name"].nunique( ), log_df["concept:name"].nunique(), len(variants_filter.get_variants(log))
def write_csv(log, file_path): """ Exports a CSV log_skeleton Parameters --------------- log Event log_skeleton file_path Destination path Returns -------------- void """ from pm4py.objects.conversion.log import converter dataframe = converter.apply(log, variant=converter.Variants.TO_DATA_FRAME) dataframe.to_csv(file_path, index=False)
def __export_log_as_string(log, parameters=None): """ Exports the given log to string format Parameters ----------- log: :class:`pm4py.log.log.EventLog` Event log. Also, can take a log and convert it to event stream parameters Possible parameters of the algorithm Returns ----------- string String representing the CSV log """ df = converter.apply(log, variant=converter.Variants.TO_DATA_FRAME) return df.to_string()
def get_stream_from_dataframe(df, parameters=None): if parameters is None: parameters = {} df_type = df.type df = df.sort_values(["event_timestamp", "event_id"]) if df_type == "succint": df = succint_mdl_to_exploded_mdl.apply(df) columns = [ x for x in df.columns if not x.startswith("event") or x == "event_activity" or x == "event_id" or x == "event_timestamp" ] df = df[columns] stream = converter.apply(df, variant=converter.Variants.TO_EVENT_STREAM) return stream
def apply(log, parameters=None, variant=CLASSIC): """ Discovers a Petri net using Heuristics Miner Parameters ------------ log Event log parameters Possible parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.TIMESTAMP_KEY - Parameters.CASE_ID_KEY - Parameters.DEPENDENCY_THRESH - Parameters.AND_MEASURE_THRESH - Parameters.MIN_ACT_COUNT - Parameters.MIN_DFG_OCCURRENCES - Parameters.DFG_PRE_CLEANING_NOISE_THRESH - Parameters.LOOP_LENGTH_TWO_THRESH variant Variant of the algorithm: - Variants.CLASSIC - Variants.PLUSPLUS Returns ------------ net Petri net im Initial marking fm Final marking """ if pkgutil.find_loader("pandas"): import pandas if isinstance(log, pandas.core.frame.DataFrame): return exec_utils.get_variant(variant).apply_pandas( log, parameters=parameters) return exec_utils.get_variant(variant).apply(log_conversion.apply( log, parameters=parameters), parameters=parameters)
def apply(obj: Union[EventLog, Trace], pt: ProcessTree, parameters: Optional[Dict[Any, Any]] = None) -> Union[typing.AlignmentResult, typing.ListAlignments]: """ Returns alignments for a process tree Parameters -------------- obj Event log or trace (a conversion is done if necessary) pt Process tree parameters Parameters of the algorithm Returns -------------- alignments Alignments """ if parameters is None: parameters = {} obj = log_converter.apply(obj, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) leaves = frozenset(pt_util.get_leaves_as_tuples(pt)) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) if type(obj) is Trace: variant = tuple(x[activity_key] for x in obj) return align_variant(variant, leaves, pt) else: from pm4py.statistics.variants.log import get as variants_get variants = variants_get.get_variants(obj, parameters=parameters) progress = _construct_progress_bar(len(variants), parameters) ret = [] bwc = align_variant([], leaves, pt)["cost"] align_dict = {} for trace in obj: variant = tuple(x[activity_key] for x in trace) if variant not in align_dict: align_dict[variant] = _apply_variant(variant, pt, leaves, bwc, parameters) if progress is not None: progress.update() ret.append(align_dict[variant]) _destroy_progress_bar(progress) return ret
def apply_heu(log: EventLog, parameters: Optional[Dict[Any, Any]] = None) -> HeuristicsNet: """ Discovers an heuristics net using the Heuristics Miner ++ algorithm Implements the approach described in Burattin, Andrea, and Alessandro Sperduti. "Heuristics Miner for Time Intervals." ESANN. 2010. https://andrea.burattin.net/public-files/publications/2010-esann-slides.pdf Parameters -------------- log Event log parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.START_TIMESTAMP_KEY - Parameters.TIMESTAMP_KEY - Parameters.DEPENDENCY_THRESH - Parameters.AND_MEASURE_THRESH - Parameters.MIN_ACT_COUNT - Parameters.MIN_DFG_OCCURRENCES - Parameters.HEU_NET_DECORATION Returns -------------- heu_net Heuristics net """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) log = interval_lifecycle.to_interval(log, parameters=parameters) start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters, None) if start_timestamp_key is None: start_timestamp_key = xes.DEFAULT_START_TIMESTAMP_KEY parameters = copy(parameters) parameters[Parameters.START_TIMESTAMP_KEY] = start_timestamp_key start_activities, end_activities, activities_occurrences, dfg, performance_dfg, sojourn_time, concurrent_activities = discover_abstraction_log( log, parameters=parameters) return discover_heu_net_plus_plus(start_activities, end_activities, activities_occurrences, dfg, performance_dfg, sojourn_time, concurrent_activities, parameters=parameters)
def apply(log_or_trace: Union[Trace, EventLog], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> float: """ Computes the cycle time starting from an event log or a trace object The definition that has been followed is the one proposed in: https://www.presentationeze.com/presentations/lean-manufacturing-just-in-time/lean-manufacturing-just-in-time-full-details/process-cycle-time-analysis/calculate-cycle-time/#:~:text=Cycle%20time%20%3D%20Average%20time%20between,is%2024%20minutes%20on%20average. So: Cycle time = Average time between completion of units. Example taken from the website: Consider a manufacturing facility, which is producing 100 units of product per 40 hour week. The average throughput rate is 1 unit per 0.4 hours, which is one unit every 24 minutes. Therefore the cycle time is 24 minutes on average. Parameters ------------------ log_or_trace Log or trace parameters Parameters of the algorithm, including: - Parameters.START_TIMESTAMP_KEY => the attribute acting as start timestamp - Parameters.TIMESTAMP_KEY => the attribute acting as timestamp Returns ------------------ cycle_time Cycle time """ if parameters is None: parameters = {} start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) if type(log_or_trace) is Trace: log = EventLog() log.append(log_or_trace) else: log = converter.apply(log_or_trace, variant=converter.Variants.TO_EVENT_LOG, parameters=parameters) events = [(x[start_timestamp_key].timestamp(), x[timestamp_key].timestamp()) for trace in log for x in trace] return compute.cycle_time(events, len(log))
def apply(log, list_activities, parameters=None): """ Finds the performance spectrum provided a log/dataframe and a list of activities Parameters ------------- log Event log/Dataframe list_activities List of activities interesting for the performance spectrum (at least two) parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.TIMESTAMP_KEY Returns ------------- ps Performance spectrum object (dictionary) """ from pm4py.objects.conversion.log import converter as log_conversion if parameters is None: parameters = {} sample_size = exec_utils.get_param_value(Parameters.PARAMETER_SAMPLE_SIZE, parameters, 10000) if len(list_activities) < 2: raise Exception("performance spectrum can be applied providing at least two activities!") points = None if pkgutil.find_loader("pandas"): import pandas as pd if type(log) is pd.DataFrame: points = exec_utils.get_variant(Variants.DATAFRAME).apply(log, list_activities, sample_size, parameters) points = exec_utils.get_variant(Variants.LOG).apply(log_conversion.apply(log), list_activities, sample_size, parameters) ps = {Outputs.LIST_ACTIVITIES.value: list_activities, Outputs.POINTS.value: points} return ps
def apply( log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]] = None ) -> Tuple[Any, List[str]]: """ Extracts all the features for the traces of an event log (each trace becomes a vector of vectors, where each event has its own vector) Parameters ----------------- log Event log parameters Parameters of the algorithm, including: - STR_EVENT_ATTRIBUTES => string event attributes to consider in the features extraction - NUM_EVENT_ATTRIBUTES => numeric event attributes to consider in the features extraction - FEATURE_NAMES => features to consider (in the given order) Returns ------------- data Data to provide for decision tree learning feature_names Names of the features, in order """ if parameters is None: parameters = {} str_ev_attr = exec_utils.get_param_value(Parameters.STR_EVENT_ATTRIBUTES, parameters, None) num_ev_attr = exec_utils.get_param_value(Parameters.NUM_EVENT_ATTRIBUTES, parameters, None) feature_names = exec_utils.get_param_value(Parameters.FEATURE_NAMES, parameters, None) log = converter.apply(log, variant=converter.Variants.TO_EVENT_LOG, parameters=parameters) if feature_names is None: feature_names = extract_all_ev_features_names_from_log( log, str_ev_attr, num_ev_attr, parameters=parameters) return extract_features(log, feature_names, parameters=parameters)
def get_decision_tree(log, net, initial_marking, final_marking, decision_point=None, attributes=None, parameters=None): """ Gets a decision tree classifier on a specific point of the model Parameters -------------- log Event log net Petri net initial_marking Initial marking final_marking Final marking decision_point Point of the process in which a decision happens: - if not specified, the method crashes, but provides a list of possible decision points - if specified, the method goes on and produce the decision tree attributes Attributes of the log. If not specified, then an automatic attribute selection is performed. parameters Parameters of the algorithm Returns --------------- clf Decision tree feature_names The names of the features classes The classes """ from sklearn import tree if parameters is None: parameters = {} log = log_converter.apply(log, parameters=parameters) X, y, targets = apply(log, net, initial_marking, final_marking, decision_point=decision_point, attributes=attributes, parameters=parameters) dt = tree.DecisionTreeClassifier() dt = dt.fit(X, y) return dt, list(X.columns.values.tolist()), targets
def apply( log: Union[EventLog, EventStream], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> List[int]: """ Counts the intersections of each interval event with the other interval events of the log (all the events are considered, not looking at the activity) Parameters ---------------- log Event log parameters Parameters of the algorithm, including: - Parameters.START_TIMESTAMP_KEY => the attribute to consider as start timestamp - Parameters.TIMESTAMP_KEY => the attribute to consider as timestamp Returns ----------------- overlap For each interval event, ordered by the order of appearance in the log, associates the number of intersecting events. """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) points = [] for trace in log: for event in trace: points.append((event[start_timestamp_key].timestamp(), event[timestamp_key].timestamp())) return compute.apply(points, parameters=parameters)
def apply(log, net, marking, final_marking, parameters=None, variant=None): """ Method to apply ET Conformance Parameters ----------- log Trace log net Petri net marking Initial marking final_marking Final marking parameters Parameters of the algorithm, including: pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Activity key variant Variant of the algorithm that should be applied: - Variants.ETCONFORMANCE_TOKEN - Variants.ALIGN_ETCONFORMANCE """ warnings.warn("Use the pm4py.algo.evaluation.precision package") if parameters is None: parameters = {} log = log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG) # execute the following part of code when the variant is not specified by the user if variant is None: if not (check_easy_soundness_net_in_fin_marking( net, marking, final_marking)): # in the case the net is not a easy sound workflow net, we must apply token-based replay variant = ETCONFORMANCE_TOKEN else: # otherwise, use the align-etconformance approach (safer, in the case the model contains duplicates) variant = ALIGN_ETCONFORMANCE return exec_utils.get_variant(variant).apply(log, net, marking, final_marking, parameters=parameters)
def __export_log(log, output_file_path, variant=Variants.ETREE, parameters=None): """ Method to export a XES from a log Parameters ----------- log Trace log output_file_path Output file path variant Selected variant of the algorithm parameters Parameters of the algorithm: Parameters.COMPRESS -> Indicates that the XES file must be compressed """ parameters = dict() if parameters is None else parameters return exec_utils.get_variant(variant).apply(log_conversion.apply(log, parameters=parameters), output_file_path, parameters=parameters)
def apply(log: EventLog, value: Any, parameters: Optional[Dict[str, Any]] = None) -> EventLog: """ Filters the trace of the log where the given attribute value is repeated (in a range of repetitions that is specified by the user) Parameters ---------------- log Event log value Value that is investigated parameters Parameters of the filter, including: - Parameters.ATTRIBUTE_KEY => the attribute key - Parameters.MIN_REP => minimum number of repetitions - Parameters.MAX_REP => maximum number of repetitions Returns ---------------- filtered_log Filtered log """ if parameters is None: parameters = {} log = converter.apply(log, parameters=parameters) attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) min_rep = exec_utils.get_param_value(Parameters.MIN_REP, parameters, 2) max_rep = exec_utils.get_param_value(Parameters.MAX_REP, parameters, sys.maxsize) filtered_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers, omni_present=log.omni_present, properties=log.properties) for trace in log: rep = 0 for event in trace: if attribute_key in event and event[attribute_key] == value: rep += 1 if min_rep <= rep <= max_rep: filtered_log.append(trace) return filtered_log
def export(log, output_file_path, parameters=None): """ Exports the given log to CSV format Parameters ---------- log: :class:`pm4py.log.log.EventLog` Event log. Also, can take a log and convert it to event stream output_file_path: Output file path parameters Possible parameters of the algorithm """ if parameters is None: parameters = {} del parameters df = converter.apply(log, variant=converter.Variants.TO_DATA_FRAME) df.to_csv(output_file_path, index=False)
def apply(con, ref_type="EKKO", gjahr="2014", min_extr_date="2014-01-01 00:00:00", mandt="800", bukrs="1000", extra_els_query=None): dataframe = p2p_1d_dataframe.apply(con, gjahr=gjahr, ref_type=ref_type, min_extr_date=min_extr_date, mandt=mandt, bukrs=bukrs, extra_els_query=extra_els_query) log = log_converter.apply(dataframe, parameters={"stream_postprocessing": True}) print("converted dataframe") log = sorting.sort_timestamp(log, "time:timestamp") return log
def get_tangible_reachability_from_log_net_im_fm(log, net, im, fm, parameters=None): """ Gets the tangible reachability graph from a log and an accepting Petri net Parameters --------------- log Event log net Petri net im Initial marking fm Final marking Returns ------------ reachab_graph Reachability graph tangible_reach_graph Tangible reachability graph stochastic_info Stochastic information """ if parameters is None: parameters = {} from pm4py.algo.simulation.montecarlo.utils import replay stochastic_info = replay.get_map_from_log_and_net(log_converter.apply( log, parameters=parameters), net, im, fm, parameters=parameters) reachability_graph, tangible_reachability_graph = get_tangible_reachability_from_net_im_sinfo( net, im, stochastic_info, parameters=parameters) return reachability_graph, tangible_reachability_graph, stochastic_info
def get_slope(start_node, end_node, log): # Here we filter for a transition withing a given (filtered) log file # and we call the slope_from_dateseries on it in order to get the slope dataframe = log_converter.apply( log, variant=log_converter.Variants.TO_DATA_FRAME) unique_cases = dataframe['case:concept:name'].unique() nunique_cases = dataframe['case:concept:name'].nunique() time_series = [] for n in range(nunique_cases): case_df = dataframe[dataframe['case:concept:name'] == unique_cases[n]] case_df.sort_values(by=['time:timestamp']) for index, row in case_df.iterrows(): #print(row) if min(case_df.index) + len(case_df.index) - 1 == index: break elif row[ACTIVITY_NAMES] == start_node and case_df.loc[ index + 1, ACTIVITY_NAMES] == end_node: time_series.append(case_df.loc[index + 1, 'time:timestamp']) del case_df day_min = min(time_series).date() day_max = max(time_series).date() df = pd.DataFrame() date_series = [] for x in time_series: date_series.append(x.date()) daterange = pd.date_range(start=day_min, end=day_max) for single_date in daterange: #print(single_date) data = [{ 'day': single_date.strftime("%Y-%m-%d"), 'count': date_series.count(single_date.date()) }] df = df.append(data, ignore_index=True) slope = slope_from_dateseries(df) return slope
def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[Dict[Union[str, Parameters], Any]] = None, variant=DEFAULT_VARIANT) -> Tuple[PetriNet, Marking, Marking]: """ Apply the Alpha Miner on top of a log Parameters ----------- log Log variant Variant of the algorithm to use: - Variants.ALPHA_VERSION_CLASSIC - Variants.ALPHA_VERSION_PLUS parameters Possible parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> Name of the attribute that contains the activity Returns ----------- net Petri net marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, pmutil.constants.CASE_CONCEPT_NAME) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_util.DEFAULT_NAME_KEY) start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters, None) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_util.DEFAULT_TIMESTAMP_KEY) if pkgutil.find_loader("pandas"): import pandas if isinstance(log, pandas.core.frame.DataFrame) and variant == ALPHA_VERSION_CLASSIC: dfg = df_statistics.get_dfg_graph(log, case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key, start_timestamp_key=start_timestamp_key) return exec_utils.get_variant(variant).apply_dfg(dfg, parameters=parameters) return exec_utils.get_variant(variant).apply(log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG), parameters)
def filter_log_on_max_no_activities( log: EventLog, max_no_activities: int = 25, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog: """ Filter a log on a maximum number of activities Parameters ------------- log Log max_no_activities Maximum number of activities parameters Parameters of the algorithm Returns ------------- filtered_log Filtered version of the event log """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key all_activities = sorted( [(x, y) for x, y in get_attribute_values(log, activity_key).items()], key=lambda x: x[1], reverse=True) activities = all_activities[:min(len(all_activities), max_no_activities)] activities = [x[0] for x in activities] if len(activities) < len(all_activities): log = apply_events(log, activities, parameters=parameters) return log
def get_prefix_matrix(log, parameters=None): """ Gets the prefix matrix from a log_skeleton object Parameters -------------- log Log parameters Parameters of the algorithm: activity_key Returns -------------- prefix_matrix Prefix matrix activities Sorted (by name) activities of the log_skeleton """ if parameters is None: parameters = {} keep_unique = parameters[ KEEP_UNIQUE] if KEEP_UNIQUE in parameters else False activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = activity_key parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key if type(log) is EventStream: log = log_conversion.apply(log, parameters=parameters) variants_list = get_variants_list(log, parameters=parameters) activities = get_activities_list(log, parameters=parameters) if keep_unique: prefix_matrix, activities = get_prefix_matrix_from_variants_list( variants_list, activities, parameters=parameters) else: prefix_matrix, activities = get_prefix_matrix_from_event_log_not_unique( log, activities, parameters=parameters) return prefix_matrix, activities
def attr_value_different_persons(log, A, parameters=None): """ Checks whether an attribute value is assumed on events done by different resources Parameters ------------ log Log A A attribute value parameters Parameters of the algorithm, including the attribute key and the positive parameter: - if True, then filters all the cases containing occurrences of A done by different resources - if False, then filters all the cases not containing occurrences of A done by different resources Returns ------------- filtered_log Filtered log_skeleton """ if parameters is None: parameters = {} if not isinstance(log, EventLog): log = log_converter.apply(log, variant=log_converter.TO_EVENT_LOG, parameters=parameters) attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY) resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, DEFAULT_RESOURCE_KEY) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) new_log = EventLog() for trace in log: occ_A = set([trace[i][resource_key] for i in range(len(trace)) if attribute_key in trace[i] and resource_key in trace[i] and trace[i][attribute_key] == A]) if len(occ_A) > 1: if positive: new_log.append(trace) elif not positive: new_log.append(trace) return new_log
def apply(log, list_activities, parameters=None): """ Finds the performance spectrum provided a log/dataframe and a list of activities Parameters ------------- log Event log/Dataframe list_activities List of activities interesting for the performance spectrum (at least two) parameters Parameters of the algorithm, including the activity key and the timestamp key Returns ------------- ps Performance spectrum object (dictionary) """ from pm4py.objects.conversion.log import converter as log_conversion if parameters is None: parameters = {} sample_size = parameters[ PARAMETER_SAMPLE_SIZE] if PARAMETER_SAMPLE_SIZE in parameters else DEFAULT_SAMPLE_SIZE if len(list_activities) < 2: raise Exception( "performance spectrum can be applied providing at least two activities!" ) if type(log) is pd.DataFrame: points = VERSIONS[DATAFRAME](log, list_activities, sample_size, parameters) else: points = VERSIONS[LOG](log_conversion.apply(log), list_activities, sample_size, parameters) ps = {"list_activities": list_activities, "points": points} return ps
def apply(log, parameters=None, variant=Variants.HANDOVER_LOG): """ Calculates a SNA metric Parameters ------------ log Log parameters Possible parameters of the algorithm variant Variant of the algorithm to apply. Possible values: - Variants.HANDOVER_LOG - Variants.WORKING_TOGETHER_LOG - Variants.SUBCONTRACTING_LOG - Variants.JOINTACTIVITIES_LOG - Variants.HANDOVER_PANDAS - Variants.WORKING_TOGETHER_PANDAS - Variants.SUBCONTRACTING_PANDAS - Variants.JOINTACTIVITIES_PANDAS Returns ----------- tuple Tuple containing the metric matrix and the resources list """ if parameters is None: parameters = {} enable_metric_normalization = exec_utils.get_param_value( Parameters.METRIC_NORMALIZATION, parameters, False) if variant in [ Variants.HANDOVER_LOG, Variants.WORKING_TOGETHER_LOG, Variants.JOINTACTIVITIES_LOG, Variants.SUBCONTRACTING_LOG ]: log = log_conversion.apply(log, parameters=parameters) sna = exec_utils.get_variant(variant).apply(log, parameters=parameters) abs_max = np.max(np.abs(sna[0])) if enable_metric_normalization and abs_max > 0: sna[0] = sna[0] / abs_max return sna
def filter_on_ncases(log: EventLog, max_no_cases: int = 1000) -> EventLog: """ Get only a specified number of traces from a log Parameters ----------- log Log max_no_cases Desidered number of traces from the log Returns ----------- filtered_log Filtered log """ log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG) filtered_log = EventLog(log[:min(len(log), max_no_cases)]) return filtered_log
def get_log_obj_type(self, objtype): columns = [ x for x in self.exploded_dataframe.columns if x.startswith("event_") ] + [objtype] dataframe = self.exploded_dataframe[columns].dropna(how="any", subset=[objtype]) dataframe = succint_mdl_to_exploded_mdl.apply(dataframe) dataframe = dataframe.rename( columns={ "event_activity": "concept:name", "event_timestamp": "time:timestamp", objtype: "case:concept:name" }) stream = EventStream(dataframe.to_dict('r')) log = log_conv_factory.apply(stream) log = sorting.sort_timestamp(log, "time:timestamp") exported_log = base64.b64encode( xes_exporter.export_log_as_string(log)).decode("utf-8") return self.name + "_" + objtype, "xes", exported_log
def import_xes(): pd.set_option('display.max_columns', None) pd.options.display.width = None global event_log parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: c.ACTIVITY_ATTRIBUTE_NAME } event_log = xes_importer.apply('input_event_logs/{}.xes'.format( c.FILE_NAME), parameters=parameters) df = log_converter.apply(event_log, variant=log_converter.Variants.TO_DATA_FRAME, parameters=parameters) attribute_list_copy = [] for attribute in c.ATTRIBUTE_LIST: if attribute in df.columns: attribute_list_copy.append(attribute) if c.ORG_RESOURCE_ATTRIBUTE_NAME in df: df[c.ORG_RESOURCE_ATTRIBUTE_NAME].fillna('missing', inplace=True) return df[attribute_list_copy]
def get_start_activities( log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]] = None ) -> Dict[str, int]: """ Get the start attributes of the log along with their count Parameters ---------- log Log parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> Attribute key (must be specified if different from concept:name) Returns ---------- start_activities Dictionary of start attributes associated with their count """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) start_activities = {} for trace in log: if len(trace) > 0: if attribute_key in trace[0]: activity_first_event = trace[0][attribute_key] if activity_first_event not in start_activities: start_activities[activity_first_event] = 0 start_activities[activity_first_event] = start_activities[ activity_first_event] + 1 return start_activities
def __export_log_as_string(log, variant=Variants.ETREE, parameters=None): """ Method to export a XES from a log as a string Parameters ----------- log Trace log variant Selected variant of the algorithm parameters Parameters of the algorithm Returns ----------- string String describing the XES """ parameters = dict() if parameters is None else parameters variant = variant if isinstance(variant, Variants) else Variants.ETREE return variant.value.apply(log_conversion.apply(log, parameters=parameters), parameters=parameters)