def test_importExportCSVtoCSV(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" event_log = csv_importer.import_event_stream( os.path.join(INPUT_DATA_DIR, "running-example.csv")) event_log = sorting.sort_timestamp(event_log) event_log = sampling.sample(event_log) event_log = index_attribute.insert_event_index_as_event_attribute( event_log) log = log_conv_fact.apply(event_log) log = sorting.sort_timestamp(log) log = sampling.sample(log) log = index_attribute.insert_trace_index_as_event_attribute(log) event_log_transformed = log_conv_fact.apply( log, variant=log_conv_fact.TO_EVENT_STREAM) csv_exporter.export( event_log_transformed, os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv")) event_log_imported_after_export = csv_importer.import_event_stream( os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv")) log_imported_after_export = log_conv_fact.apply( event_log_imported_after_export) self.assertEqual(len(log), len(log_imported_after_export)) os.remove(os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
def test_csv1documentation(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" import os from pm4py.objects.log.importer.csv import factory as csv_importer event_log = csv_importer.import_event_stream( os.path.join("input_data", "running-example.csv")) event_log_length = len(event_log) del event_log_length from pm4py.objects.log import transform log = transform.transform_event_stream_to_event_log( event_log, case_glue="case:concept:name") del log from pm4py.objects.log.importer.csv.versions import pandas_df_imp dataframe = pandas_df_imp.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) event_log = log_conv_fact.apply(dataframe, variant=log_conv_fact.TO_EVENT_STREAM) log = log_conv_fact.apply(event_log) from pm4py.objects.log.exporter.csv import factory as csv_exporter csv_exporter.export(event_log, "outputFile1.csv") os.remove("outputFile1.csv") from pm4py.objects.log.exporter.csv import factory as csv_exporter csv_exporter.export(log, "outputFile2.csv") os.remove("outputFile2.csv")
def execute_script(): log_input_directory = "xesinput" all_logs_names = os.listdir(log_input_directory) all_logs_names = [log for log in all_logs_names if ".xe" in log] for logName in all_logs_names: # logPath = os.path.join("..", "tests", "inputData", logName) log_path = log_input_directory + "\\" + logName log = xes_importer.import_log(log_path) print("\n\n") print("log loaded") print("Number of traces - ", len(log)) event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers) exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName print("exporting log", exp_log_name) xes_exporter.export_log(log, exp_log_name) print("exported log", exp_log_name) log, classifier_attr_key = insert_classifier.search_act_class_attr(log) classifiers = list(log.classifiers.keys()) if classifier_attr_key is None and classifiers: try: print(classifiers) log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute( log, classifiers[0]) print(classifier_attr_key) except: print("exception in handling classifier") if classifier_attr_key is None: classifier_attr_key = "concept:name" if len(event_log) > 0 and classifier_attr_key in event_log[0]: parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key } dfg = dfg_factory.apply(log, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency", parameters=parameters) # dfg_vis_factory.view(gviz) dfg_vis_factory.save( gviz, "xescert_images\\" + logName.replace("xes", "png")) print("Reimporting log file just exported - ", exp_log_name) log = xes_importer.import_log(exp_log_name) print("log loaded", exp_log_name) print("Number of traces - ", len(log)) event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers)
def apply_events(log, dt1, dt2, parameters=None): """ Get a new log containing all the events contained in the given interval Parameters ----------- log Log dt1 Lower bound to the interval dt2 Upper bound to the interval parameters Possible parameters of the algorithm, including: timestamp_key -> Attribute to use as timestamp Returns ------------ filtered_log Filtered log """ if parameters is None: parameters = {} timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY dt1 = get_dt_from_string(dt1) dt2 = get_dt_from_string(dt2) stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM) filtered_stream = EventStream([x for x in stream if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2]) filtered_log = log_converter.apply(filtered_stream) return filtered_log
def test_importExportXEStoCSV(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log = xes_importer.import_log(os.path.join(INPUT_DATA_DIR, "running-example.xes")) event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) csv_exporter.export(event_log, os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv")) event_log_newimport = csv_importer.import_event_stream( os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv")) log_imported_after_export = log_conv_fact.apply(event_log_newimport) self.assertEqual(len(log), len(log_imported_after_export)) os.remove(os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
def test_prefiltering_dataframe(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv") dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion(input_log, sep=',') dataframe = attributes_filter.filter_df_keeping_spno_activities(dataframe, activity_key="concept:name") dataframe = case_filter.filter_on_ncases(dataframe, case_id_glue="case:concept:name") dataframe = csv_import_adapter.convert_timestamp_columns_in_df(dataframe) dataframe = dataframe.sort_values('time:timestamp') event_log = log_conv_fact.apply(dataframe, variant=log_conv_fact.TO_EVENT_STREAM) log = log_conv_fact.apply(event_log) del log
def test_xesimp_csvexp(self): log0 = xes_importer.apply( os.path.join("input_data", "running-example.xes")) log = log_conv_factory.apply(log0, variant=log_conv_factory.TO_EVENT_LOG) stream = log_conv_factory.apply( log0, variant=log_conv_factory.TO_EVENT_STREAM) df = log_conv_factory.apply(log0, variant=log_conv_factory.TO_DATAFRAME) csv_exporter_factory.export(log, "ru.csv") csv_exporter_factory.export(stream, "ru.csv") csv_exporter_factory.export(df, "ru.csv") os.remove('ru.csv')
def test_pdimp_xesexp(self): log0 = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) log = log_conv_factory.apply(log0, variant=log_conv_factory.TO_EVENT_LOG) stream = log_conv_factory.apply( log0, variant=log_conv_factory.TO_EVENT_STREAM) df = log_conv_factory.apply(log0, variant=log_conv_factory.TO_DATAFRAME) xes_exporter_factory.apply(log, "ru.xes") xes_exporter_factory.apply(stream, "ru.xes") xes_exporter_factory.apply(df, "ru.xes") os.remove('ru.xes')
def apply_events(log, values, parameters=None): """ Filter log by keeping only events with an attribute value that belongs to the provided values list Parameters ----------- log log values Allowed attributes parameters Parameters of the algorithm, including: activity_key -> Attribute identifying the activity in the log positive -> Indicate if events should be kept/removed Returns ----------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY positive = parameters["positive"] if "positive" in parameters else True stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) if positive: stream = EventStream( list(filter(lambda x: x[attribute_key] in values, stream))) else: stream = EventStream( list(filter(lambda x: x[attribute_key] not in values, stream))) filtered_log = log_conv_fact.apply(stream) # filtered_log = EventLog() # for trace in log: # new_trace = Trace() # for j in range(len(trace)): # if attribute_key in trace[j]: # attribute_value = trace[j][attribute_key] # if (positive and attribute_value in values) or (not positive and attribute_value not in values): # new_trace.append(trace[j]) # if len(new_trace) > 0: # for attr in trace.attributes: # new_trace.attributes[attr] = trace.attributes[attr] # filtered_log.append(new_trace) return filtered_log
def apply(log, parameters=None, variant=HANDOVER): """ Calculates a SNA metric Parameters ------------ log Log parameters Possible parameters of the algorithm variant Variant of the algorithm to apply. Possible values: handover, working_together Returns ----------- tuple Tuple containing the metric matrix and the resources list """ if parameters is None: parameters = {} if variant in VERSIONS_LOG: log = conv_factory.apply(log, parameters=parameters) return VERSIONS_LOG[variant](log, parameters=parameters) raise Exception("metric not implemented yet")
def apply(obj, petri_net, initial_marking, final_marking, parameters=None, version=VERSION_STATE_EQUATION_A_STAR): if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_CASEID_KEY] = log_util.CASE_ATTRIBUTE_GLUE if isinstance(obj, pm4py.objects.log.log.Trace): return apply_trace(obj, petri_net, initial_marking, final_marking, parameters, version) else: return apply_log( log_converter.apply(obj, parameters, log_converter.TO_EVENT_LOG), petri_net, initial_marking, final_marking, parameters, version)
def apply(log, parameters=None, variant=DEFAULT_VARIANT): if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_CASEID_KEY] = log_util.CASE_ATTRIBUTE_GLUE if isinstance(log, pandas.core.frame.DataFrame): dfg = df_statistics.get_dfg_graph( log, case_id_glue=parameters[ pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY], activity_key=parameters[ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY], timestamp_key=parameters[ pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY]) return VERSIONS_DFG[variant](dfg, parameters=parameters) return VERSIONS[variant](log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG), parameters)
def apply(log, parameters=None, version=ALPHA_VERSION_CLASSIC): """ Apply the Alpha Miner on top of a trace log Parameters ----------- log Trace log version Variant of the algorithm to use (classic) parameters Possible parameters of the algorithm, including: activity key -> Name of the attribute that contains the activity Returns ----------- net Petri net marking Initial marking final_marking Final marking """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY return VERSIONS[version](log_conversion.apply(log, parameters, log_conversion.TO_TRACE_LOG), parameters)
def download_csv_log(self): """ Downloads the CSV log as string """ dataframe = conversion_factory.apply(self.log, variant=conversion_factory.TO_DATAFRAME) log_string = dataframe.to_string() return log_string
def generate_xes_from_dataframe(dataframe): try: # convert the csv imported in dataframe to xes log xes_log = conversion_factory.apply(dataframe) return xes_log except Error: print("Invalid input")
def transient_analysis_from_dataframe(df, delay, parameters=None): """ Gets the transient analysis from a dataframe and a delay Parameters ------------- df Pandas dataframe delay Time delay parameters Parameters of the algorithm Returns ------------- transient_result Transient analysis result """ if parameters is None: parameters = {} activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY case_id_glue = parameters[ constants. PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME timestamp_key = parameters[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY log = log_conv_factory.apply(df, variant=log_conv_factory.DF_TO_EVENT_LOG_1V, parameters=parameters) # gets the simple Petri net through simple miner net, im, fm = simple_factory.apply(log, parameters=parameters, classic_output=True) activities_count = dict(df.groupby(activity_key).size()) dfg_performance = df_statistics.get_dfg_graph(df, measure="performance", perf_aggregation_key="mean", case_id_glue=case_id_glue, activity_key=activity_key, timestamp_key=timestamp_key) spaths = get_shortest_paths(net) aggregated_statistics = get_decorations_from_dfg_spaths_acticount( net, dfg_performance, spaths, activities_count, variant="performance") # gets the stochastic map out of the dataframe and the Petri net s_map = smap_builder.get_map_exponential_from_aggstatistics( aggregated_statistics, parameters=parameters) return transient_analysis_from_petri_net_and_smap(net, im, s_map, delay, parameters=parameters)
def get_kde_date_attribute_json(log, attribute=DEFAULT_TIMESTAMP_KEY, parameters=None): """ Gets the KDE estimation for the distribution of a date attribute values (expressed as JSON) Parameters ------------- log Event stream object (if log, is converted) attribute Date attribute to analyse parameters Possible parameters of the algorithm, including: graph_points -> number of points to include in the graph Returns -------------- x X-axis values to represent y Y-axis values to represent """ if type(log) is EventLog: event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) else: event_log = log values = [event[attribute].replace(tzinfo=None) for event in event_log if attribute in event] return attributes_common.get_kde_date_attribute_json(values, parameters=parameters)
def get_kde_numeric_attribute(log, attribute, parameters=None): """ Gets the KDE estimation for the distribution of a numeric attribute values Parameters ------------- log Event stream object (if log, is converted) attribute Numeric attribute to analyse parameters Possible parameters of the algorithm, including: graph_points -> number of points to include in the graph Returns -------------- x X-axis values to represent y Y-axis values to represent """ if type(log) is EventLog: event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) else: event_log = log values = [event[attribute] for event in event_log if attribute in event] return attributes_common.get_kde_numeric_attribute(values, parameters=parameters)
def apply(log, parameters=None, variant=CLASSIC): """ Discovers a Petri net using Heuristics Miner Parameters ------------ log Event log parameters Possible parameters of the algorithm, including: activity_key, case_id_glue, timestamp_key, dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh, loops_length_two_thresh variant Variant of the algorithm (classic) Returns ------------ net Petri net im Initial marking fm Final marking """ if isinstance(log, pandas.core.frame.DataFrame): return VERSIONS_PANDAS[variant](log, parameters=parameters) return VERSIONS[variant](conv_factory.apply(log, parameters=parameters), parameters=parameters)
def convert_to_log(frame: DataFrame) -> EventLog: """ Converts a DataFrame to an EventLog. :param frame: the DataFrame :return: the resulting EventLog """ return event_log_factory.apply(frame)
def apply(log, parameters=None): """ Gets the roles (group of different activities done by similar resources) out of the log Parameters ------------- log Log object parameters Possible parameters of the algorithm Returns ------------ roles List of different roles inside the log """ if parameters is None: parameters = {} resource_key = parameters[ constants. PARAMETER_CONSTANT_RESOURCE_KEY] if constants.PARAMETER_CONSTANT_RESOURCE_KEY in parameters else xes.DEFAULT_RESOURCE_KEY activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY stream = log_conv_factory.apply(log, variant=log_conv_factory.TO_EVENT_STREAM) activity_resource_couples = Counter( (event[resource_key], event[activity_key]) for event in stream) return algorithm.apply(activity_resource_couples, parameters=parameters)
def apply(log, petri_net, initial_marking, final_marking, parameters=None, variant="token_replay"): if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_CASEID_KEY] = log_util.CASE_ATTRIBUTE_GLUE return VERSIONS[variant](log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG), petri_net, initial_marking, final_marking, parameters=parameters)
def get_variants_matrix(log, parameters=None): """ Gets the variants matrix from a log object Parameters ------------- log Log parameters Parameters of the algorithm: activity_key Returns ------------- variants_matrix Variants matrix activities Sorted (by name) activities of the log """ if parameters is None: parameters = {} activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = activity_key parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key if type(log) is EventStream: log = conversion_factory.apply(log, parameters=parameters) variants_list = get_variants_list(log, parameters=parameters) activities = get_activities_list(log, parameters=parameters) return get_variants_matrix_from_variants_list(variants_list, activities, parameters=parameters)
def table_to_log(table, parameters=None): """ Converts a Pyarrow table to an event log Parameters ------------ table Pyarrow table parameters Possible parameters of the algorithm """ if parameters is None: parameters = {} dict0 = table.to_pydict() keys = list(dict0.keys()) # for legacy format support if LEGACY_PARQUET_CASECONCEPTNAME in keys: for key in keys: dict0[key.replace(LEGACY_PARQUET_TP_REPLACER, ":")] = dict0.pop(key) stream = EventStream([dict(zip(dict0, i)) for i in zip(*dict0.values())]) return log_conv_factory.apply(stream, parameters=parameters)
def export_log_tree(log): """ Get XES log XML tree from a PM4Py log Parameters ----------- log PM4Py log Returns ----------- tree XML tree """ # If the log is in log_instance.EventStream, then transform it into log_instance.EventLog format if type(log) is log_instance.EventStream: log = log_converter.apply(log) root = etree.Element(xes_util.TAG_LOG) # add attributes at the log level export_attributes(log, root) # add extensions at the log level export_extensions(log, root) # add globals at the log level export_globals(log, root) # add classifiers at the log level export_classifiers(log, root) # add traces at the log level export_traces(log, root) tree = etree.ElementTree(root) return tree
def apply(log, net, initial_marking, final_marking, parameters=None, variant="token_replay"): """ Factory method to apply token-based replay Parameters ----------- log Log net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters of the algorithm, including: pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Activity key variant Variant of the algorithm to use """ return VERSIONS[variant](log_converter.apply(log, parameters, log_converter.TO_TRACE_LOG), net, initial_marking, final_marking, parameters=parameters)
def apply_numeric_events(log, int1, int2, parameters=None): """ Apply a filter on events (numerical filter) Parameters -------------- log Log int1 Lower bound of the interval int2 Upper bound of the interval parameters Possible parameters of the algorithm: PARAMETER_CONSTANT_ATTRIBUTE_KEY => indicates which attribute to filter positive => keep or remove traces with such events? Returns -------------- filtered_log Filtered log """ if parameters is None: parameters = {} attribute_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY positive = parameters["positive"] if "positive" in parameters else True stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) if positive: stream = EventStream( list( filter( lambda x: attribute_key in x and int1 <= x[attribute_key] <= int2, stream))) else: stream = EventStream( list( filter( lambda x: attribute_key in x and (x[attribute_key] < int1 or x[attribute_key] > int2), stream))) filtered_log = log_conv_fact.apply(stream) return filtered_log
def import_event_stream(path, parameters=None): """ Imports a CSV file from the given path Parameters ---------- path: Input CSV file path parameters Parameters of the algorithm, including sep -> column separator quotechar -> (if specified) Character that starts/end big strings in CSV nrows -> (if specified) Maximum number of rows to read from the CSV sort -> Boolean value that tells if the CSV should be ordered sort_field -> If sort option is enabled, then the CSV is automatically sorted by the specified column Returns ------- log : :class:`pm4py.log.log.EventLog` An event log """ sep = "," quotechar = None nrows = None sort = False sort_field = "time:timestamp" insert_event_indexes = False timest_format = None timest_columns = None if parameters is None: parameters = {} if "sep" in parameters: sep = parameters["sep"] if "quotechar" in parameters: quotechar = parameters["quotechar"] if "nrows" in parameters: nrows = parameters["nrows"] if "sort" in parameters: sort = parameters["sort"] if "sort_field" in parameters: sort_field = parameters["sort_field"] if "insert_event_indexes" in parameters: insert_event_indexes = parameters["insert_event_indexes"] if "timest_format" in parameters: timest_format = parameters["timest_format"] if "timest_columns" in parameters: timest_columns = parameters["timest_columns"] df = import_dataframe_from_path(path, sep=sep, quotechar=quotechar, nrows=nrows, sort=sort, sort_field=sort_field, timest_format=timest_format, timest_columns=timest_columns) event_log = log_conv_fact.apply(df, variant=log_conv_fact.TO_EVENT_STREAM) if insert_event_indexes: event_log.insert_event_index_as_event_attribute() return event_log
def convert(self, filePath): """ function that converts csv files into event logs :param filePath: the path to the file :return: event log """ csvEventStream = csv_importer.import_event_stream(filePath) return conversion_factory.apply(csvEventStream)
def apply(log, parameters=None, variant=DFG_NATIVE): """ Calculates DFG graph (frequency or performance) starting from a log Parameters ---------- log Log parameters Possible parameters passed to the algorithms: aggregationMeasure -> performance aggregation measure (min, max, mean, median) activity_key -> Attribute to use as activity timestamp_key -> Attribute to use as timestamp variant Variant of the algorithm to use, possible values: native, frequency, performance, frequency_greedy, performance_greedy Returns ------- dfg DFG graph """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters: parameters[ pmutil.constants. PARAMETER_CONSTANT_CASEID_KEY] = pmutil.constants.CASE_ATTRIBUTE_GLUE if isinstance(log, pandas.core.frame.DataFrame): log = csv_import_adapter.convert_timestamp_columns_in_df( log, timest_columns=[ parameters[pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] ]) dfg_frequency, dfg_performance = df_statistics.get_dfg_graph( log, measure="both", activity_key=parameters[ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY], timestamp_key=parameters[ pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY], case_id_glue=parameters[ pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY]) if 'native' in variant or 'frequency' in variant: return dfg_frequency else: return dfg_performance return VERSIONS[variant](log_conversion.apply(log, parameters, log_conversion.TO_EVENT_LOG), parameters=parameters)