def test_importExportCSVtoCSV(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     event_log = csv_importer.import_event_stream(
         os.path.join(INPUT_DATA_DIR, "running-example.csv"))
     event_log = sorting.sort_timestamp(event_log)
     event_log = sampling.sample(event_log)
     event_log = index_attribute.insert_event_index_as_event_attribute(
         event_log)
     log = log_conv_fact.apply(event_log)
     log = sorting.sort_timestamp(log)
     log = sampling.sample(log)
     log = index_attribute.insert_trace_index_as_event_attribute(log)
     event_log_transformed = log_conv_fact.apply(
         log, variant=log_conv_fact.TO_EVENT_STREAM)
     csv_exporter.export(
         event_log_transformed,
         os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
     event_log_imported_after_export = csv_importer.import_event_stream(
         os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
     log_imported_after_export = log_conv_fact.apply(
         event_log_imported_after_export)
     self.assertEqual(len(log), len(log_imported_after_export))
     os.remove(os.path.join(OUTPUT_DATA_DIR,
                            "running-example-exported.csv"))
 def test_csv1documentation(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     import os
     from pm4py.objects.log.importer.csv import factory as csv_importer
     event_log = csv_importer.import_event_stream(
         os.path.join("input_data", "running-example.csv"))
     event_log_length = len(event_log)
     del event_log_length
     from pm4py.objects.log import transform
     log = transform.transform_event_stream_to_event_log(
         event_log, case_glue="case:concept:name")
     del log
     from pm4py.objects.log.importer.csv.versions import pandas_df_imp
     dataframe = pandas_df_imp.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     event_log = log_conv_fact.apply(dataframe,
                                     variant=log_conv_fact.TO_EVENT_STREAM)
     log = log_conv_fact.apply(event_log)
     from pm4py.objects.log.exporter.csv import factory as csv_exporter
     csv_exporter.export(event_log, "outputFile1.csv")
     os.remove("outputFile1.csv")
     from pm4py.objects.log.exporter.csv import factory as csv_exporter
     csv_exporter.export(log, "outputFile2.csv")
     os.remove("outputFile2.csv")
示例#3
0
def execute_script():
    log_input_directory = "xesinput"
    all_logs_names = os.listdir(log_input_directory)
    all_logs_names = [log for log in all_logs_names if ".xe" in log]

    for logName in all_logs_names:
        # logPath = os.path.join("..", "tests", "inputData", logName)
        log_path = log_input_directory + "\\" + logName
        log = xes_importer.import_log(log_path)
        print("\n\n")
        print("log loaded")
        print("Number of traces - ", len(log))
        event_log = log_conv_fact.apply(log,
                                        variant=log_conv_fact.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
        exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName
        print("exporting log", exp_log_name)
        xes_exporter.export_log(log, exp_log_name)
        print("exported log", exp_log_name)

        log, classifier_attr_key = insert_classifier.search_act_class_attr(log)

        classifiers = list(log.classifiers.keys())
        if classifier_attr_key is None and classifiers:
            try:
                print(classifiers)
                log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute(
                    log, classifiers[0])
                print(classifier_attr_key)
            except:
                print("exception in handling classifier")

        if classifier_attr_key is None:
            classifier_attr_key = "concept:name"

        if len(event_log) > 0 and classifier_attr_key in event_log[0]:
            parameters = {
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key
            }

            dfg = dfg_factory.apply(log, parameters=parameters)
            gviz = dfg_vis_factory.apply(dfg,
                                         log=log,
                                         variant="frequency",
                                         parameters=parameters)
            # dfg_vis_factory.view(gviz)

            dfg_vis_factory.save(
                gviz, "xescert_images\\" + logName.replace("xes", "png"))

        print("Reimporting log file just exported - ", exp_log_name)

        log = xes_importer.import_log(exp_log_name)
        print("log loaded", exp_log_name)
        print("Number of traces - ", len(log))
        event_log = log_conv_fact.apply(log,
                                        variant=log_conv_fact.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
def apply_events(log, dt1, dt2, parameters=None):
    """
    Get a new log containing all the events contained in the given interval

    Parameters
    -----------
    log
        Log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM)
    filtered_stream = EventStream([x for x in stream if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2])
    filtered_log = log_converter.apply(filtered_stream)

    return filtered_log
 def test_importExportXEStoCSV(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     log = xes_importer.import_log(os.path.join(INPUT_DATA_DIR, "running-example.xes"))
     event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
     csv_exporter.export(event_log, os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
     event_log_newimport = csv_importer.import_event_stream(
         os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
     log_imported_after_export = log_conv_fact.apply(event_log_newimport)
     self.assertEqual(len(log), len(log_imported_after_export))
     os.remove(os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
示例#6
0
 def test_prefiltering_dataframe(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv")
     dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion(input_log, sep=',')
     dataframe = attributes_filter.filter_df_keeping_spno_activities(dataframe, activity_key="concept:name")
     dataframe = case_filter.filter_on_ncases(dataframe, case_id_glue="case:concept:name")
     dataframe = csv_import_adapter.convert_timestamp_columns_in_df(dataframe)
     dataframe = dataframe.sort_values('time:timestamp')
     event_log = log_conv_fact.apply(dataframe, variant=log_conv_fact.TO_EVENT_STREAM)
     log = log_conv_fact.apply(event_log)
     del log
示例#7
0
 def test_xesimp_csvexp(self):
     log0 = xes_importer.apply(
         os.path.join("input_data", "running-example.xes"))
     log = log_conv_factory.apply(log0,
                                  variant=log_conv_factory.TO_EVENT_LOG)
     stream = log_conv_factory.apply(
         log0, variant=log_conv_factory.TO_EVENT_STREAM)
     df = log_conv_factory.apply(log0,
                                 variant=log_conv_factory.TO_DATAFRAME)
     csv_exporter_factory.export(log, "ru.csv")
     csv_exporter_factory.export(stream, "ru.csv")
     csv_exporter_factory.export(df, "ru.csv")
     os.remove('ru.csv')
示例#8
0
 def test_pdimp_xesexp(self):
     log0 = csv_import_adapter.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     log = log_conv_factory.apply(log0,
                                  variant=log_conv_factory.TO_EVENT_LOG)
     stream = log_conv_factory.apply(
         log0, variant=log_conv_factory.TO_EVENT_STREAM)
     df = log_conv_factory.apply(log0,
                                 variant=log_conv_factory.TO_DATAFRAME)
     xes_exporter_factory.apply(log, "ru.xes")
     xes_exporter_factory.apply(stream, "ru.xes")
     xes_exporter_factory.apply(df, "ru.xes")
     os.remove('ru.xes')
def apply_events(log, values, parameters=None):
    """
    Filter log by keeping only events with an attribute value that belongs to the provided values list

    Parameters
    -----------
    log
        log
    values
        Allowed attributes
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    positive = parameters["positive"] if "positive" in parameters else True

    stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
    if positive:
        stream = EventStream(
            list(filter(lambda x: x[attribute_key] in values, stream)))
    else:
        stream = EventStream(
            list(filter(lambda x: x[attribute_key] not in values, stream)))

    filtered_log = log_conv_fact.apply(stream)

    # filtered_log = EventLog()
    # for trace in log:
    #    new_trace = Trace()

    #    for j in range(len(trace)):
    #        if attribute_key in trace[j]:
    #            attribute_value = trace[j][attribute_key]
    #            if (positive and attribute_value in values) or (not positive and attribute_value not in values):
    #                new_trace.append(trace[j])
    #    if len(new_trace) > 0:
    #        for attr in trace.attributes:
    #            new_trace.attributes[attr] = trace.attributes[attr]
    #        filtered_log.append(new_trace)
    return filtered_log
示例#10
0
def apply(log, parameters=None, variant=HANDOVER):
    """
    Calculates a SNA metric

    Parameters
    ------------
    log
        Log
    parameters
        Possible parameters of the algorithm
    variant
        Variant of the algorithm to apply. Possible values:
            handover, working_together

    Returns
    -----------
    tuple
        Tuple containing the metric matrix and the resources list
    """
    if parameters is None:
        parameters = {}

    if variant in VERSIONS_LOG:
        log = conv_factory.apply(log, parameters=parameters)
        return VERSIONS_LOG[variant](log, parameters=parameters)

    raise Exception("metric not implemented yet")
示例#11
0
def apply(obj,
          petri_net,
          initial_marking,
          final_marking,
          parameters=None,
          version=VERSION_STATE_EQUATION_A_STAR):
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY
    if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_CASEID_KEY] = log_util.CASE_ATTRIBUTE_GLUE
    if isinstance(obj, pm4py.objects.log.log.Trace):
        return apply_trace(obj, petri_net, initial_marking, final_marking,
                           parameters, version)
    else:
        return apply_log(
            log_converter.apply(obj, parameters, log_converter.TO_EVENT_LOG),
            petri_net, initial_marking, final_marking, parameters, version)
示例#12
0
def apply(log, parameters=None, variant=DEFAULT_VARIANT):
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY
    if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_CASEID_KEY] = log_util.CASE_ATTRIBUTE_GLUE
    if isinstance(log, pandas.core.frame.DataFrame):
        dfg = df_statistics.get_dfg_graph(
            log,
            case_id_glue=parameters[
                pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY],
            activity_key=parameters[
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY],
            timestamp_key=parameters[
                pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY])
        return VERSIONS_DFG[variant](dfg, parameters=parameters)
    return VERSIONS[variant](log_conversion.apply(log, parameters,
                                                  log_conversion.TO_EVENT_LOG),
                             parameters)
def apply(log, parameters=None, version=ALPHA_VERSION_CLASSIC):
    """
    Apply the Alpha Miner on top of a trace log

    Parameters
    -----------
    log
        Trace log
    version
        Variant of the algorithm to use (classic)
    parameters
        Possible parameters of the algorithm, including:
            activity key -> Name of the attribute that contains the activity

    Returns
    -----------
    net
        Petri net
    marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    return VERSIONS[version](log_conversion.apply(log, parameters,
                                                  log_conversion.TO_TRACE_LOG),
                             parameters)
示例#14
0
 def download_csv_log(self):
     """
     Downloads the CSV log as string
     """
     dataframe = conversion_factory.apply(self.log, variant=conversion_factory.TO_DATAFRAME)
     log_string = dataframe.to_string()
     return log_string
示例#15
0
def generate_xes_from_dataframe(dataframe):
    try:
        # convert the csv imported in dataframe to xes log
        xes_log = conversion_factory.apply(dataframe)
        return xes_log
    except Error:
        print("Invalid input")
示例#16
0
def transient_analysis_from_dataframe(df, delay, parameters=None):
    """
    Gets the transient analysis from a dataframe and a delay

    Parameters
    -------------
    df
        Pandas dataframe
    delay
        Time delay
    parameters
        Parameters of the algorithm

    Returns
    -------------
    transient_result
        Transient analysis result
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    case_id_glue = parameters[
        constants.
        PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME
    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY

    log = log_conv_factory.apply(df,
                                 variant=log_conv_factory.DF_TO_EVENT_LOG_1V,
                                 parameters=parameters)
    # gets the simple Petri net through simple miner
    net, im, fm = simple_factory.apply(log,
                                       parameters=parameters,
                                       classic_output=True)

    activities_count = dict(df.groupby(activity_key).size())
    dfg_performance = df_statistics.get_dfg_graph(df,
                                                  measure="performance",
                                                  perf_aggregation_key="mean",
                                                  case_id_glue=case_id_glue,
                                                  activity_key=activity_key,
                                                  timestamp_key=timestamp_key)

    spaths = get_shortest_paths(net)
    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(
        net, dfg_performance, spaths, activities_count, variant="performance")

    # gets the stochastic map out of the dataframe and the Petri net
    s_map = smap_builder.get_map_exponential_from_aggstatistics(
        aggregated_statistics, parameters=parameters)

    return transient_analysis_from_petri_net_and_smap(net,
                                                      im,
                                                      s_map,
                                                      delay,
                                                      parameters=parameters)
示例#17
0
def get_kde_date_attribute_json(log, attribute=DEFAULT_TIMESTAMP_KEY, parameters=None):
    """
    Gets the KDE estimation for the distribution of a date attribute values
    (expressed as JSON)

    Parameters
    -------------
    log
        Event stream object (if log, is converted)
    attribute
        Date attribute to analyse
    parameters
        Possible parameters of the algorithm, including:
            graph_points -> number of points to include in the graph


    Returns
    --------------
    x
        X-axis values to represent
    y
        Y-axis values to represent
    """

    if type(log) is EventLog:
        event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
    else:
        event_log = log

    values = [event[attribute].replace(tzinfo=None) for event in event_log if attribute in event]

    return attributes_common.get_kde_date_attribute_json(values, parameters=parameters)
示例#18
0
def get_kde_numeric_attribute(log, attribute, parameters=None):
    """
    Gets the KDE estimation for the distribution of a numeric attribute values

    Parameters
    -------------
    log
        Event stream object (if log, is converted)
    attribute
        Numeric attribute to analyse
    parameters
        Possible parameters of the algorithm, including:
            graph_points -> number of points to include in the graph


    Returns
    --------------
    x
        X-axis values to represent
    y
        Y-axis values to represent
    """

    if type(log) is EventLog:
        event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
    else:
        event_log = log

    values = [event[attribute] for event in event_log if attribute in event]

    return attributes_common.get_kde_numeric_attribute(values, parameters=parameters)
示例#19
0
def apply(log, parameters=None, variant=CLASSIC):
    """
    Discovers a Petri net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including: activity_key, case_id_glue, timestamp_key,
        dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh,
        loops_length_two_thresh
    variant
        Variant of the algorithm (classic)

    Returns
    ------------
    net
        Petri net
    im
        Initial marking
    fm
        Final marking
    """
    if isinstance(log, pandas.core.frame.DataFrame):
        return VERSIONS_PANDAS[variant](log, parameters=parameters)

    return VERSIONS[variant](conv_factory.apply(log, parameters=parameters),
                             parameters=parameters)
示例#20
0
def convert_to_log(frame: DataFrame) -> EventLog:
    """
    Converts a DataFrame to an EventLog.
    :param frame: the DataFrame
    :return: the resulting EventLog
    """
    return event_log_factory.apply(frame)
示例#21
0
def apply(log, parameters=None):
    """
    Gets the roles (group of different activities done by similar resources)
    out of the log

    Parameters
    -------------
    log
        Log object
    parameters
        Possible parameters of the algorithm

    Returns
    ------------
    roles
        List of different roles inside the log
    """
    if parameters is None:
        parameters = {}

    resource_key = parameters[
        constants.
        PARAMETER_CONSTANT_RESOURCE_KEY] if constants.PARAMETER_CONSTANT_RESOURCE_KEY in parameters else xes.DEFAULT_RESOURCE_KEY
    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    stream = log_conv_factory.apply(log,
                                    variant=log_conv_factory.TO_EVENT_STREAM)

    activity_resource_couples = Counter(
        (event[resource_key], event[activity_key]) for event in stream)

    return algorithm.apply(activity_resource_couples, parameters=parameters)
示例#22
0
def apply(log,
          petri_net,
          initial_marking,
          final_marking,
          parameters=None,
          variant="token_replay"):
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY
    if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_CASEID_KEY] = log_util.CASE_ATTRIBUTE_GLUE

    return VERSIONS[variant](log_conversion.apply(log, parameters,
                                                  log_conversion.TO_EVENT_LOG),
                             petri_net,
                             initial_marking,
                             final_marking,
                             parameters=parameters)
示例#23
0
def get_variants_matrix(log, parameters=None):
    """
    Gets the variants matrix from a log object

    Parameters
    -------------
    log
        Log
    parameters
        Parameters of the algorithm: activity_key

    Returns
    -------------
    variants_matrix
        Variants matrix
    activities
        Sorted (by name) activities of the log
    """
    if parameters is None:
        parameters = {}
    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = activity_key
    parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key

    if type(log) is EventStream:
        log = conversion_factory.apply(log, parameters=parameters)
    variants_list = get_variants_list(log, parameters=parameters)
    activities = get_activities_list(log, parameters=parameters)

    return get_variants_matrix_from_variants_list(variants_list,
                                                  activities,
                                                  parameters=parameters)
示例#24
0
def table_to_log(table, parameters=None):
    """
    Converts a Pyarrow table to an event log

    Parameters
    ------------
    table
        Pyarrow table
    parameters
        Possible parameters of the algorithm
    """
    if parameters is None:
        parameters = {}

    dict0 = table.to_pydict()
    keys = list(dict0.keys())
    # for legacy format support
    if LEGACY_PARQUET_CASECONCEPTNAME in keys:
        for key in keys:
            dict0[key.replace(LEGACY_PARQUET_TP_REPLACER,
                              ":")] = dict0.pop(key)

    stream = EventStream([dict(zip(dict0, i)) for i in zip(*dict0.values())])

    return log_conv_factory.apply(stream, parameters=parameters)
def export_log_tree(log):
    """
    Get XES log XML tree from a PM4Py log

    Parameters
    -----------
    log
        PM4Py log

    Returns
    -----------
    tree
        XML tree
    """
    # If the log is in log_instance.EventStream, then transform it into log_instance.EventLog format
    if type(log) is log_instance.EventStream:
        log = log_converter.apply(log)
    root = etree.Element(xes_util.TAG_LOG)

    # add attributes at the log level
    export_attributes(log, root)
    # add extensions at the log level
    export_extensions(log, root)
    # add globals at the log level
    export_globals(log, root)
    # add classifiers at the log level
    export_classifiers(log, root)
    # add traces at the log level
    export_traces(log, root)

    tree = etree.ElementTree(root)

    return tree
示例#26
0
def apply(log,
          net,
          initial_marking,
          final_marking,
          parameters=None,
          variant="token_replay"):
    """
    Factory method to apply token-based replay
    
    Parameters
    -----------
    log
        Log
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters of the algorithm, including:
            pm4py.util.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> Activity key

    variant
        Variant of the algorithm to use
    """
    return VERSIONS[variant](log_converter.apply(log, parameters,
                                                 log_converter.TO_TRACE_LOG),
                             net,
                             initial_marking,
                             final_marking,
                             parameters=parameters)
def apply_numeric_events(log, int1, int2, parameters=None):
    """
    Apply a filter on events (numerical filter)

    Parameters
    --------------
    log
        Log
    int1
        Lower bound of the interval
    int2
        Upper bound of the interval
    parameters
        Possible parameters of the algorithm:
            PARAMETER_CONSTANT_ATTRIBUTE_KEY => indicates which attribute to filter
            positive => keep or remove traces with such events?

    Returns
    --------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    positive = parameters["positive"] if "positive" in parameters else True

    stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
    if positive:
        stream = EventStream(
            list(
                filter(
                    lambda x: attribute_key in x and int1 <= x[attribute_key]
                    <= int2, stream)))
    else:
        stream = EventStream(
            list(
                filter(
                    lambda x: attribute_key in x and
                    (x[attribute_key] < int1 or x[attribute_key] > int2),
                    stream)))

    filtered_log = log_conv_fact.apply(stream)

    return filtered_log
示例#28
0
def import_event_stream(path, parameters=None):
    """
    Imports a CSV file from the given path

    Parameters
    ----------
    path:
        Input CSV file path
    parameters
        Parameters of the algorithm, including
            sep -> column separator
            quotechar -> (if specified) Character that starts/end big strings in CSV
            nrows -> (if specified) Maximum number of rows to read from the CSV
            sort -> Boolean value that tells if the CSV should be ordered
            sort_field -> If sort option is enabled, then the CSV is automatically sorted by the specified column

     Returns
    -------
    log : :class:`pm4py.log.log.EventLog`
        An event log
    """

    sep = ","
    quotechar = None
    nrows = None
    sort = False
    sort_field = "time:timestamp"
    insert_event_indexes = False
    timest_format = None
    timest_columns = None

    if parameters is None:
        parameters = {}
    if "sep" in parameters:
        sep = parameters["sep"]
    if "quotechar" in parameters:
        quotechar = parameters["quotechar"]
    if "nrows" in parameters:
        nrows = parameters["nrows"]
    if "sort" in parameters:
        sort = parameters["sort"]
    if "sort_field" in parameters:
        sort_field = parameters["sort_field"]
    if "insert_event_indexes" in parameters:
        insert_event_indexes = parameters["insert_event_indexes"]
    if "timest_format" in parameters:
        timest_format = parameters["timest_format"]
    if "timest_columns" in parameters:
        timest_columns = parameters["timest_columns"]

    df = import_dataframe_from_path(path, sep=sep, quotechar=quotechar, nrows=nrows, sort=sort, sort_field=sort_field,
                                    timest_format=timest_format, timest_columns=timest_columns)
    event_log = log_conv_fact.apply(df, variant=log_conv_fact.TO_EVENT_STREAM)

    if insert_event_indexes:
        event_log.insert_event_index_as_event_attribute()

    return event_log
示例#29
0
    def convert(self, filePath):
        """
        function that converts csv files into event logs

        :param filePath: the path to the file
        :return: event log
        """
        csvEventStream = csv_importer.import_event_stream(filePath)
        return conversion_factory.apply(csvEventStream)
示例#30
0
def apply(log, parameters=None, variant=DFG_NATIVE):
    """
    Calculates DFG graph (frequency or performance) starting from a log

    Parameters
    ----------
    log
        Log
    parameters
        Possible parameters passed to the algorithms:
            aggregationMeasure -> performance aggregation measure (min, max, mean, median)
            activity_key -> Attribute to use as activity
            timestamp_key -> Attribute to use as timestamp
    variant
        Variant of the algorithm to use, possible values:
            native, frequency, performance, frequency_greedy, performance_greedy

    Returns
    -------
    dfg
        DFG graph
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_util.DEFAULT_TIMESTAMP_KEY
    if pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY not in parameters:
        parameters[
            pmutil.constants.
            PARAMETER_CONSTANT_CASEID_KEY] = pmutil.constants.CASE_ATTRIBUTE_GLUE
    if isinstance(log, pandas.core.frame.DataFrame):
        log = csv_import_adapter.convert_timestamp_columns_in_df(
            log,
            timest_columns=[
                parameters[pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY]
            ])
        dfg_frequency, dfg_performance = df_statistics.get_dfg_graph(
            log,
            measure="both",
            activity_key=parameters[
                pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY],
            timestamp_key=parameters[
                pmutil.constants.PARAMETER_CONSTANT_TIMESTAMP_KEY],
            case_id_glue=parameters[
                pmutil.constants.PARAMETER_CONSTANT_CASEID_KEY])
        if 'native' in variant or 'frequency' in variant:
            return dfg_frequency
        else:
            return dfg_performance
    return VERSIONS[variant](log_conversion.apply(log, parameters,
                                                  log_conversion.TO_EVENT_LOG),
                             parameters=parameters)