Exemplo n.º 1
0
def getaverageduration2(log, logname, logtime):
    activities = attributes_filter.get_attribute_values(log, logname)
    time = attributes_filter.get_attribute_values(log, logtime)
    variants = variants_filter.get_variants(log)
    #print('\n',activities,'\n')
    #print('\n',time)
    #print('\n',variants)
    timeList = []
    tracelist = []
    variantsList = []
    activitiesList = []
    for trace in activities:
        activitiesList.append(trace)
    for trace in log:
        for event in trace:
            timeList.append(str(event[logtime]))
        #print (trace,'\n')
    for trace in log:
        variantsList = []
        for event in trace:
            variantsList.append(event[logname])
        tracelist.append(variantsList)
        #print (trace,'\n')
    #print('\n',timeList)
    #print(tracelist,'........')
    duration = []
    #start position in timestamp now
    #526000 must be replaced
    fmt = '%Y-%m-%d %H:%M:%S'
    for i, val in enumerate(activitiesList):
        count = 0
        timeSum = 0
        header = 0

        for i in range(len(tracelist)):
            for j in range(len(tracelist[i])):
                if tracelist[i][j] == val and j != len(tracelist[i]) - 1:
                    end = timeList[header + j + 1][0:19]
                    start = timeList[header + j][0:19]
                    ts = dt.datetime.strptime(end, fmt) - dt.datetime.strptime(
                        start, fmt)

                    timeSum += int(ts.total_seconds())
                    count += 1
            header = header + len(tracelist[i])
            #print(header)

        if timeSum == 0:
            duration.append(0)
        else:
            duration.append(timeSum / count)

        #print(duration,'line 235')

    #print('Here is our list of average duration:','\n',duration,'\n')

    #for i in range(len(duration)):
    #print('The average duration of activity ',activitiesList[i],' is ',duration[i],' seconds')

    return duration
Exemplo n.º 2
0
def dataPreprocess(log):
    """
        Transform every trace in the log file in a way that we will have direct
        access to every event in a trace and its time. Also returns a array
        with the initial sequence of events in a trace that will be used latter
        to create the pairs
    """
    activities_all = log_attributes_filter.get_attribute_values(
        log, "concept:name")
    activities = list(activities_all.keys())
    dataVectors = []
    theIndex = []
    for trace in log:
        k = [0 for i in range(len(activities))]
        times = [[] for i in range(len(activities))]
        previousTime = trace.attributes["REG_DATE"]
        aIndex = []
        for index, event in enumerate(trace):
            indexActivity = activities.index(event["concept:name"])
            k[indexActivity] += 1
            times[indexActivity].append(event["time:timestamp"] - previousTime)
            aIndex.append([index, indexActivity, len(times[indexActivity])])
            previousTime = event["time:timestamp"]
        timesSeconds = [[i.total_seconds() for i in x] for x in times]
        dataVectors.append(timesSeconds)
        theIndex.append(aIndex)
    return dataVectors, theIndex
Exemplo n.º 3
0
def readFromFile(log):
    """
        This functions will read the distribution evaluation from the file. 
        It will be used if we had already run the experiments, to save time.
    """
    dists = []
    with open("distributions.txt", "r") as f:
        for line in f:
            dists.append(line.split(", ")[1:-1])

    distributions = [[i.split("-") for i in d] for d in dists]
    distributions = []
    for index, d in enumerate(dists):
        distributions.append([])
        for i in d:
            k = i.split("-")
            if len(k) == 4:
                k.remove("")
                k[2] = "-" + k[2]
            distributions[index].append(k)

    p = [[[i[0], float(i[1]), float(i[2])] for i in dist]
         for dist in distributions]
    pSorted = [[sorted(i, key=lambda x: x[2], reverse=True)] for i in p]
    oneDist = [i[0][0] for i in pSorted]
    distributionsDF = pd.DataFrame()
    activities_all = log_attributes_filter.get_attribute_values(
        log, "concept:name")
    activities = list(activities_all.keys())
    distributionsDF["Activity_Name"] = activities
    distributionsDF['Distribution'] = [i[0] for i in oneDist]
    distributionsDF['RMSE'] = [i[1] for i in oneDist]
    distributionsDF["R2"] = [i[2] for i in oneDist]
    return distributionsDF
Exemplo n.º 4
0
def dataPreprocessPerActivity(log):
    activities_all = log_attributes_filter.get_attribute_values(
        log, "concept:name")
    activities = list(activities_all.keys())
    dataVectors = []
    for traceIndex, trace in enumerate(log):
        k = [0 for i in range(len(activities))]
        times = [[] for i in range(len(activities))]
        previousTime = trace.attributes["REG_DATE"]
        for index, event in enumerate(trace):
            indexActivity = activities.index(event["concept:name"])
            k[indexActivity] += 1
            times[indexActivity].append(
                [traceIndex, event["time:timestamp"] - previousTime])
            previousTime = event["time:timestamp"]
            timesSeconds = [[[i[0], i[1].total_seconds()] for i in x]
                            for x in times]
        dataVectors.append(timesSeconds)
    #Transpose dataVectors
    transposedDataVectors = [[
        dataVector[index] for dataVector in dataVectors
        if dataVector[index] != []
    ] for index in range(len(dataVectors[0]))]
    return [[event for trace in dataVector for event in trace]
            for dataVector in transposedDataVectors]
def execute_script(variant="frequency"):
    # read the log using the nonstandard importer (faster)
    log_path = os.path.join("..", "tests", "input_data", "receipt.xes")
    log = xes_importer.import_log(log_path, variant="nonstandard")
    # applies Inductive Miner on the log
    net, initial_marking, final_marking = inductive_miner.apply(log)
    # find shortest paths in the net
    spaths = get_shortest_paths(net)

    # then we start to decorate the net
    # we decide if we should decorate it with frequency or performance
    # we decide the aggregation measure (sum, min, max, mean, median, stdev)
    aggregation_measure = "mean"
    if variant == "frequency":
        aggregation_measure = "sum"
    # we find the DFG
    dfg = dfg_factory.apply(log, variant=variant)
    # we find the number of activities occurrences in the log
    activities_count = attributes_filter.get_attribute_values(log, "concept:name")
    # we calculate the statistics on the Petri net applying the greedy algorithm
    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(net, dfg, spaths,
                                                                      activities_count,
                                                                      variant=variant,
                                                                      aggregation_measure=aggregation_measure)
    # we find the gviz
    gviz = pn_vis_factory.apply(net, initial_marking, final_marking, variant=variant,
                                aggregated_statistics=aggregated_statistics, parameters={"format": "svg"})
    # we show the viz on screen
    pn_vis_factory.view(gviz)
Exemplo n.º 6
0
def get_activities_list(log, parameters=None):
    """
    Gets the activities list from a log object, sorted by activity name

    Parameters
    --------------
    log
        Log
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    activities_list
        List of activities sorted by activity name
    """
    if parameters is None:
        parameters = {}
    activity_key = parameters[
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    if type(log) is pd.DataFrame:
        activities = pd_attributes_filter.get_attribute_values(log, activity_key)
    else:
        activities = log_attributes_filter.get_attribute_values(log, activity_key)
    return sorted(list(activities.keys()))
Exemplo n.º 7
0
def apply(dfg,
          log=None,
          parameters=None,
          activities_count=None,
          measure="frequency"):
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    image_format = "png"
    max_no_of_edges_in_diagram = 75

    if "format" in parameters:
        image_format = parameters["format"]
    if "maxNoOfEdgesInDiagram" in parameters:
        max_no_of_edges_in_diagram = parameters["maxNoOfEdgesInDiagram"]

    if activities_count is None:
        activities_count = attributes_filter.get_attribute_values(
            log, activity_key, parameters=parameters)

    return graphviz_visualization(
        activities_count,
        dfg,
        image_format=image_format,
        measure=measure,
        max_no_of_edges_in_diagram=max_no_of_edges_in_diagram)
Exemplo n.º 8
0
def activities(log):
    activities = attributes_filter.get_attribute_values(log, "concept:name")
    n_unique_activities = len(activities)

    activities_occurrences = list(activities.values())
    activities_min = np.min(activities_occurrences)
    activities_max = np.max(activities_occurrences)
    activities_mean = np.mean(activities_occurrences)
    activities_median = np.median(activities_occurrences)
    activities_std = np.std(activities_occurrences)
    activities_variance = np.var(activities_occurrences)
    activities_q1 = np.percentile(activities_occurrences, 25)
    activities_q3 = np.percentile(activities_occurrences, 75)
    activities_iqr = stats.iqr(activities_occurrences)
    activities_skewness = stats.skew(activities_occurrences)
    activities_kurtosis = stats.kurtosis(activities_occurrences)

    return [
        n_unique_activities,
        activities_min,
        activities_max,
        activities_mean,
        activities_median,
        activities_std,
        activities_variance,
        activities_q1,
        activities_q3,
        activities_iqr,
        activities_skewness,
        activities_kurtosis,
    ]
Exemplo n.º 9
0
def get_activities(data):
    """
    Filteres event log to only return attribute names.
    :param data: event log
    :return: event log activities
    """
    return list(
        sorted(
            attributes_filter.get_attribute_values(data,
                                                   "concept:name").keys()))
Exemplo n.º 10
0
def filterfile(sourceFile, outputFile, patternText, inclusive):
    log = importer.apply(sourceFile)
    activities = attributes_filter.get_attribute_values(log, CONCEPT_NAME)
    filteredLog = attributes_filter.apply(
        log, [patternText],
        parameters={
            attributes_filter.Parameters.ATTRIBUTE_KEY: CONCEPT_NAME,
            attributes_filter.Parameters.POSITIVE: inclusive
        })
    xes_exporter.apply(log, outputFile)
Exemplo n.º 11
0
def stats(log):
    activities = list([i for  i in attributes_filter.get_attribute_values(log, "concept:name")])
    times=[[0 for _ in range(len(activities))] for _ in range(len(log))] 
    for index_t,trace in enumerate(log):
        previous_time=0
        for index,event in enumerate(trace):
            if index==0:
                previous_time=trace.attributes["REG_DATE"]
            time=event["time:timestamp"]
            duration=time-previous_time
            times[index_t][activities.index(event["concept:name"])]+=duration.total_seconds()
Exemplo n.º 12
0
def apply(log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a Petri net along with an initial and final marking

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    -----------
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    if pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters:
        parameters[pmutil.constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters={
        pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key}).items() if v > 0]

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(log, activity_key)

    # gets the start activities from the log
    start_activities = list(start_activities_filter.get_start_activities(log, parameters=parameters).keys())
    # gets the end activities from the log
    end_activities = list(end_activities_filter.get_end_activities(log, parameters=parameters).keys())

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    net, initial_marking, final_marking = apply_dfg(dfg, parameters=parameters, activities=activities,
                                                    contains_empty_traces=contains_empty_traces,
                                                    start_activities=start_activities, end_activities=end_activities)

    return net, initial_marking, final_marking
Exemplo n.º 13
0
def preprocess(log):
    """
        Transform every trace in the log file, which is represented as a json,
        in a array that we will have easy access to times for every event in a trace
        and the sequence of these events. Also uses standarization to transofrm
        the time values per activity.
    """
    activities_all = log_attributes_filter.get_attribute_values(
        log, "concept:name")
    activities = list(activities_all.keys())
    dataVectors = []
    sequentialData = [[] for i in range(len(log))]
    for outerIndex, trace in enumerate(log):
        times = [[] for i in range(len(activities))]
        previousTime = trace.attributes["REG_DATE"]
        for index, event in enumerate(trace):
            indexActivity = activities.index(event["concept:name"])
            time = event["time:timestamp"] - previousTime
            times[indexActivity].append(time)
            previousTime = event["time:timestamp"]
            timesSeconds = [[i.total_seconds() for i in x] for x in times]
            sequentialData[outerIndex].append(
                [indexActivity, time.total_seconds()])
        dataVectors.append(timesSeconds)
    #transofrm datavectors to contain times per activity
    timesPerActivity = [[
        k for i in [x[index] for x in dataVectors] for k in i
    ] for index in range(len(dataVectors[0]))]
    #standard scalers
    standarScalers = [
    ]  #contains all the scalers that have been fitting to the allTimesSeconds
    for index, i in enumerate(timesPerActivity):
        sc = StandardScaler()
        numpyArray = np.array(i)
        numpyArray = numpyArray.reshape(-1, 1)
        sc.fit(numpyArray)  #fit to the all of the times spend
        standarScalers.append(sc)

    #create pairwise data [traceIndex,activityA,activityB,standarizedTimeA,standarizedTimeB]
    data = []
    for traceIndex, trace in enumerate(sequentialData):
        for eventIndex, event in enumerate(trace[:-1]):
            eventNext = sequentialData[traceIndex][eventIndex + 1]
            timeA = standarScalers[event[0]].transform(
                np.array(event[1]).reshape(1, -1))
            timeB = standarScalers[eventNext[0]].transform(
                np.array(eventNext[1]).reshape(1, -1))
            data.append([
                traceIndex, event[0], eventNext[0],
                round(float(timeA), 5),
                round(float(timeB), 5), eventIndex
            ])
    return data
Exemplo n.º 14
0
def apply(log, parameters=None):
    """
    Gets the performance HNet

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(log, max_no_activities=constants.MAX_NO_ACTIVITIES,
                                                            parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(filtered_log, activity_key)
    start_activities_count = start_activities_filter.get_start_activities(filtered_log, parameters=parameters)
    end_activities_count = end_activities_filter.get_end_activities(filtered_log, parameters=parameters)
    activities = list(activities_count.keys())
    start_activities = list(start_activities_count.keys())
    end_activities = list(end_activities_count.keys())

    dfg_freq = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg_perf = dfg_factory.apply(filtered_log, variant="performance", parameters=parameters)

    heu_net = HeuristicsNet(dfg_freq, performance_dfg=dfg_perf, activities=activities, start_activities=start_activities, end_activities=end_activities, activities_occurrences=activities_count)

    heu_net.calculate(dfg_pre_cleaning_noise_thresh=constants.DEFAULT_DFG_CLEAN_MULTIPLIER * decreasingFactor)

    vis = heu_vis_factory.apply(heu_net, parameters={"format": "svg"})
    vis2 = heu_vis_factory.apply(heu_net, parameters={"format": "dot"})

    gviz_base64 = get_base64_from_file(vis2.name)

    return get_base64_from_file(vis.name), None, "", "xes", activities, start_activities, end_activities, gviz_base64, [], "heuristics", "perf", None, "", activity_key
Exemplo n.º 15
0
def dataSequence(log):
    activities = log_attributes_filter.get_attribute_values(
        log, "concept:name")
    letters = [i for i in "abcdefghijklmnopqrstuvwxyz"]
    bag = [[] for i in range(2)]
    for i in activities:
        bag[0].append(i)
    for index, activity in enumerate(activities):
        bag[1].append(getActivityLetter(index, letters))
    response = []
    for trace in log:
        response.append(transformAtrace(trace, bag))
    return response, bag
Exemplo n.º 16
0
    def test_dfdoc1(self):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"
        from pm4py.objects.log.importer.xes import factory as xes_importer
        log = xes_importer.import_log(os.path.join("input_data", "running-example.xes"))
        from pm4py.algo.discovery.dfg import factory as dfg_factory
        dfg = dfg_factory.apply(log)
        from pm4py.algo.filtering.log.attributes import attributes_filter
        activities_count = attributes_filter.get_attribute_values(log, "concept:name")

        from pm4py.visualization.dfg.versions import simple_visualize as dfg_visualize
        gviz = dfg_visualize.graphviz_visualization(activities_count, dfg)
        del gviz
Exemplo n.º 17
0
def getaverageduration3(log, logname, logtime, logstti, logcoti):

    #print('\n',activities,'\n')
    #print('\n',time)
    #print('\n',variants)
    timedict = {}
    durationdict = {}
    activities = attributes_filter.get_attribute_values(log, logname)
    fmt = '%Y-%m-%d %H:%M:%S'
    '''
    end = timeList[header+j+1][0:19]
    start = timeList[header+j][0:19]
    ts = dt.datetime.strptime(end,fmt)-dt.datetime.strptime(start,fmt)

    timeSum += int(ts.total_seconds())
    '''
    duration = []
    for trace in log:
        for event in trace:
            end = str(event[logcoti])[0:19]
            start = str(event[logstti])[0:19]
            ts = dt.datetime.strptime(end, fmt) - dt.datetime.strptime(
                start, fmt)
            #timeList.append((event[logname],int(ts.total_seconds())))
            if not event[logname] in durationdict.keys():
                durationdict[event[logname]] = [int(ts.total_seconds())]
            else:
                durationdict[event[logname]].append(int(ts.total_seconds()))
            if not event[logname] in timedict.keys():
                timedict[event[logname]] = (int(ts.total_seconds()), 1)
            else:
                timedict[event[logname]] = (timedict[event[logname]][0] +
                                            int(ts.total_seconds()),
                                            timedict[event[logname]][1] + 1)
        #print (trace,'\n')
    for key in timedict.keys():
        timedict[key] = timedict[key][0] / timedict[key][1]
    for ele in activities:
        duration.append(timedict[ele])
    deviationlist = []
    for key in durationdict.keys():
        literal = 0

        for ele in durationdict[key]:

            literal += pow(timedict[key] - ele, 2)
        deviation = pow((literal / len(durationdict[key])), 1 / 2)
        deviationlist.append((key, deviation))

    return duration
Exemplo n.º 18
0
def apply_heu(log, parameters=None):
    """
    Discovers an Heuristics Net using Heuristics Miner

    Parameters
    ------------
    log
        Event log
    parameters
        Possible parameters of the algorithm,
        including: activity_key, case_id_glue, timestamp_key,
        dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh,
        loops_length_two_thresh

    Returns
    ------------
    heu
        Heuristics Net
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    start_activities = log_sa_filter.get_start_activities(
        log, parameters=parameters)
    end_activities = log_ea_filter.get_end_activities(log,
                                                      parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(
        log, activity_key, parameters=parameters)
    activities = list(activities_occurrences.keys())
    dfg = dfg_factory.apply(log, parameters=parameters)
    parameters_w2 = deepcopy(parameters)
    parameters_w2["window"] = 2
    dfg_window_2 = dfg_factory.apply(log, parameters=parameters_w2)
    freq_triples = dfg_factory.apply(log,
                                     parameters=parameters,
                                     variant="freq_triples")

    return apply_heu_dfg(dfg,
                         activities=activities,
                         activities_occurrences=activities_occurrences,
                         start_activities=start_activities,
                         end_activities=end_activities,
                         dfg_window_2=dfg_window_2,
                         freq_triples=freq_triples,
                         parameters=parameters)
Exemplo n.º 19
0
def execFreq(clusters, activityKey):
    EF = []
    for i in range(len(clusters)):
        activities_count = attributes_filter.get_attribute_values(
            clusters[i], attribute_key=activityKey)
        EF.append(activities_count)
    EF_df = pd.DataFrame.from_dict(EF, orient='columns', dtype=None).T
    EF_df = EF_df.reset_index().melt(id_vars='index',
                                     var_name='cluster',
                                     value_name='activityCount')
    EF_df = EF_df.fillna(0)
    EF_df = EF_df.rename(columns={'index': 'activity'})
    ##############  Execution Frequency: case ############
    EF_EFc = execFreqCase(clusters, EF_df)
    return (EF_EFc)
Exemplo n.º 20
0
def transformTraces(log:EventLog) -> list:
    activities = attributes_filter.get_attribute_values(log, "concept:name")
    activity_names = [i for i in activities]
    data,data_durations=mean_value_per_Activity(log)
    log_list=[]
    for n_trace,trace in enumerate(log):
        l_trace=[0 for i in range(len(activity_names))]
        times=[0 for i in range(len(activity_names))]
        for n_event,event in enumerate(trace):
            index = activity_names.index(event["concept:name"])
            l_trace[index]+=data_durations[n_trace][n_event]
            times[index]+=1
        l_trace=[x/y if y!=0 else 0 for x,y in zip(l_trace,times)]
        log_list.append(l_trace)
    means,stdevs = meanAndstdev(data,activity_names)
    log_list= [[(x-y)/z if z!=0 else 0 for x,y,z in zip(l,means,stdevs)]for l in log_list]
    return log_list
Exemplo n.º 21
0
def apply_tree(log, parameters):
    """
    Apply the IMDF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}
    if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[pmutil.constants.
                   PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY
    activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY]

    # get the DFG
    dfg = [(k, v) for k, v in dfg_inst.apply(
        log,
        parameters={
            pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key
        }).items() if v > 0]

    # get the activities in the log
    activities = attributes_filter.get_attribute_values(log, activity_key)

    # check if the log contains empty traces
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    return apply_tree_dfg(dfg,
                          parameters,
                          activities=activities,
                          contains_empty_traces=contains_empty_traces)
Exemplo n.º 22
0
    def get_attribute_values(self, attribute_key, parameters=None):
        """
        Gets the attribute values from the log

        Returns
        -------------
        attribute_values
            List of values
        """
        if parameters is None:
            parameters = {}
        parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = self.activity_key
        parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = attribute_key
        initial_dict = attributes_filter.get_attribute_values(self.log, attribute_key, parameters=parameters)
        return_dict = {}
        for key in initial_dict:
            return_dict[str(key)] = int(initial_dict[key])
        return return_dict
Exemplo n.º 23
0
def dfg_vis(dfg, log=None, parameters=None, activities_count=None, measure="frequency"):
    if parameters is None:
        parameters = {}

    activity_key = (
        parameters[PARAMETER_CONSTANT_ACTIVITY_KEY]
        if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters
        else xes.DEFAULT_NAME_KEY
    )

    max_no_of_edges_in_diagram = 75

    if "maxNoOfEdgesInDiagram" in parameters:
        max_no_of_edges_in_diagram = parameters["maxNoOfEdgesInDiagram"]

    start_activities = (
        parameters["start_activities"] if "start_activities" in parameters else []
    )
    end_activities = (
        parameters["end_activities"] if "end_activities" in parameters else []
    )

    if activities_count is None:
        if log is not None:
            activities_count = attributes_filter.get_attribute_values(
                log, activity_key, parameters=parameters
            )
            activities_count["start"] = len(log)
        else:
            activities = dfg_utils.get_activities_from_dfg(dfg)
            activities_count = {key: 1 for key in activities}
            activities_count["start"] = None

    return graphviz_visualization(
        activities_count,
        dfg,
        measure=measure,
        max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
        start_activities=start_activities,
        end_activities=end_activities,
    )
Exemplo n.º 24
0
def dataPreprocess(log):
    """
        In this function data from log, will be transformed to a vector
    """
    activities_all = log_attributes_filter.get_attribute_values(log, "concept:name")
    activities=list(activities_all.keys())
    dataVectors=[]
    times=[[] for i in range(len(activities))]
    for trace in log:
        activitiesCounter=[0 for i in range(len(activities))]
        timesSpend=[datetime.timedelta(0) for i in range(len(activities))]
        previousTime=trace.attributes["REG_DATE"]
        for index,event in enumerate(trace):
            indexActivity=activities.index(event["concept:name"])
            activitiesCounter[indexActivity]+=1
            timesSpend[indexActivity]+=event["time:timestamp"]-previousTime
            times[indexActivity].append(event["time:timestamp"]-previousTime)
            previousTime=event["time:timestamp"]
        timesSpend=[(timesSpend[i]/activitiesCounter[i]).total_seconds() if activitiesCounter[i]!=0 else 0 for i in range(len(activities))] #contains the mo of all the activities
        dataVectors.append(activitiesCounter+timesSpend) 
    return dataVectors,times,activities
Exemplo n.º 25
0
def get_log_match_with_model(log, bpmn_graph, parameters=None):
    """
    Get log match with model

    Parameters
    ------------
    log
        Trace log
    bpmn_graph
        BPMN graph
    parameters
        Possible parameters of the algorithm

    Returns
    ------------
    model_to_log
        Correspondence between model activities and log activities
    log_to_model
        Correspondence between log activities and model activities
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    model_to_log = {}
    log_to_model = {}

    log_activities = list(attributes_filter.get_attribute_values(log, activity_key).keys())
    nodes = bpmn_graph.diagram_graph.nodes
    bpmn_activities = list([nodes[n]["node_name"] for n in nodes if "task" in nodes[n]["type"].lower()])

    for act in bpmn_activities:
        close_matches = difflib.get_close_matches(act, log_activities)
        if close_matches and close_matches[0] not in log_to_model:
            model_to_log[act] = close_matches[0]
            log_to_model[close_matches[0]] = act

    return model_to_log, log_to_model
Exemplo n.º 26
0
def dataPreprocess2017(log):
    """
        Takes the log file and transform every trace in a way, that we will keep 
        the information for the time per event and also the original sequence
        for every event in the same trace
    """
    activities_all = log_attributes_filter.get_attribute_values(
        log, "concept:name")
    activities = list(activities_all.keys())
    times = [[] for i in range(len(activities))]
    sequence = []
    for indexTrace, trace in enumerate(log):
        previousTime = trace[0]['time:timestamp']
        sequence.append([])
        for index, event in enumerate(trace):
            indexActivity = activities.index(event["concept:name"])
            time = event["time:timestamp"] - previousTime
            times[indexActivity].append(
                [indexTrace, index, time.total_seconds()])
            previousTime = event["time:timestamp"]
            sequence[-1].append([indexActivity, time.total_seconds()])
    return times, sequence
Exemplo n.º 27
0
def form_encoding_dictio_from_log(log, parameters=None):
    """
    Forms the encoding dictionary from the current log

    Parameters
    -------------
    log
        Event log
    parameters
        Parameters of the algorithm

    Returns
    -------------
    encoding_dictio
        Encoding dictionary
    """
    if parameters is None:
        parameters = {}

    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    from pm4py.algo.filtering.log.attributes import attributes_filter

    shared_obj = SharedObj()

    activities = attributes_filter.get_attribute_values(log,
                                                        activity_key,
                                                        parameters=parameters)

    mapping = {}

    for act in activities:
        get_new_char(act, shared_obj)
        mapping[act] = shared_obj.mapping_dictio[act]

    return mapping
Exemplo n.º 28
0
    def apply_fall_through(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_trace, new_log = fall_through.empty_trace(self.log)
            # if an empty trace is found, the empty trace fallthrough applies
            #
        else:
            empty_trace = False
        if empty_trace:
            logging.debug("empty_trace")
            activites_left = []
            for trace in new_log:
                for act in trace:
                    if act[activity_key] not in activites_left:
                        activites_left.append(act[activity_key])
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=parameters).items() if v > 0]
            activities = attributes_filter.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_filter.get_start_activities(
                    new_log, parameters=self.parameters).keys())
            end_activities = list(
                end_activities_filter.get_end_activities(
                    new_log, parameters=self.parameters).keys())
            self.children.append(
                SubtreePlain(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
                small_log = filtering_utils.keep_one_trace_per_variant(
                    small_log, parameters=parameters)
            else:
                activity_once = False
            if use_act_once_per_trace and activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_filter.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_filter.get_attribute_values(
                    small_log, activity_key)
                self.children.append(
                    SubtreePlain(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log_skeleton
                start_activities = list(
                    start_activities_filter.get_start_activities(
                        new_log, parameters=self.parameters).keys())
                end_activities = list(
                    end_activities_filter.get_end_activities(
                        new_log, parameters=self.parameters).keys())
                self.children.append(
                    SubtreePlain(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                    small_log = filtering_utils.keep_one_trace_per_variant(
                        small_log, parameters=parameters)
                else:
                    activity_concurrent = False
                if use_act_concurrent and activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_filter.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_filter.get_attribute_values(
                        small_log, activity_key)
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreePlain(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log_skeleton:
                    start_activities = list(
                        start_activities_filter.get_start_activities(
                            new_log, parameters=self.parameters).keys())
                    end_activities = list(
                        end_activities_filter.get_end_activities(
                            new_log, parameters=self.parameters).keys())
                    self.children.append(
                        SubtreePlain(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                        new_log = filtering_utils.keep_one_trace_per_variant(
                            new_log, parameters=parameters)
                    else:
                        strict_tau_loop = False
                    if use_strict_tau_loop and strict_tau_loop:
                        activites_left = []
                        for trace in new_log:
                            for act in trace:
                                if act[activity_key] not in activites_left:
                                    activites_left.append(act[activity_key])
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                new_log, parameters=self.parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                new_log, parameters=self.parameters).keys())
                        self.children.append(
                            SubtreePlain(new_log,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                            new_log = filtering_utils.keep_one_trace_per_variant(
                                new_log, parameters=parameters)
                        else:
                            tau_loop = False
                        if use_tau_loop and tau_loop:
                            activites_left = []
                            for trace in new_log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower model")
                            activites_left = []
                            for trace in self.log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'flower'
Exemplo n.º 29
0
    def detect_cut(self, second_iteration=False, parameters=None):
        if pkgutil.find_loader("networkx"):
            import networkx as nx

            if parameters is None:
                parameters = {}
            activity_key = exec_utils.get_param_value(
                Parameters.ACTIVITY_KEY, parameters,
                pmutil.xes_constants.DEFAULT_NAME_KEY)

            # check base cases:
            empty_log = base_case.empty_log(self.log)
            single_activity = base_case.single_activity(self.log, activity_key)
            if empty_log:
                self.detected_cut = 'empty_log'
            elif single_activity:
                self.detected_cut = 'single_activity'
            # if no base cases are found, search for a cut:
            else:
                conn_components = detection_utils.get_connected_components(
                    self.ingoing, self.outgoing, self.activities)
                this_nx_graph = transform_dfg_to_directed_nx_graph(
                    self.dfg, activities=self.activities)
                strongly_connected_components = [
                    list(x)
                    for x in nx.strongly_connected_components(this_nx_graph)
                ]
                xor_cut = self.detect_xor(conn_components)
                # the following part searches for a cut in the current log_skeleton
                # if a cut is found, the log_skeleton is split according to the cut, the resulting logs are saved in new_logs
                # recursion is used on all the logs in new_logs
                if xor_cut[0]:
                    logging.debug("xor_cut")
                    self.detected_cut = 'concurrent'
                    new_logs = split.split_xor(xor_cut[1], self.log,
                                               activity_key)
                    for i in range(len(new_logs)):
                        new_logs[
                            i] = filtering_utils.keep_one_trace_per_variant(
                                new_logs[i], parameters=parameters)
                    for l in new_logs:
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            l, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            l, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                l, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                l, parameters=parameters).keys())
                        self.children.append(
                            SubtreePlain(l,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                else:
                    sequence_cut = cut_detection.detect_sequential_cut(
                        self, self.dfg, strongly_connected_components)
                    if sequence_cut[0]:
                        logging.debug("sequence_cut")
                        new_logs = split.split_sequence(
                            sequence_cut[1], self.log, activity_key)
                        for i in range(len(new_logs)):
                            new_logs[
                                i] = filtering_utils.keep_one_trace_per_variant(
                                    new_logs[i], parameters=parameters)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    else:
                        parallel_cut = self.detect_concurrent()
                        if parallel_cut[0]:
                            logging.debug("parallel_cut")
                            new_logs = split.split_parallel(
                                parallel_cut[1], self.log, activity_key)
                            for i in range(len(new_logs)):
                                new_logs[
                                    i] = filtering_utils.keep_one_trace_per_variant(
                                        new_logs[i], parameters=parameters)
                            self.detected_cut = "parallel"
                            for l in new_logs:
                                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                    l, parameters=parameters).items() if v > 0]
                                activities = attributes_filter.get_attribute_values(
                                    l, activity_key)
                                start_activities = list(
                                    start_activities_filter.
                                    get_start_activities(
                                        l, parameters=parameters).keys())
                                end_activities = list(
                                    end_activities_filter.get_end_activities(
                                        l, parameters=parameters).keys())
                                self.children.append(
                                    SubtreePlain(
                                        l,
                                        new_dfg,
                                        self.master_dfg,
                                        self.initial_dfg,
                                        activities,
                                        self.counts,
                                        self.rec_depth + 1,
                                        noise_threshold=self.noise_threshold,
                                        start_activities=start_activities,
                                        end_activities=end_activities,
                                        initial_start_activities=self.
                                        initial_start_activities,
                                        initial_end_activities=self.
                                        initial_end_activities,
                                        parameters=parameters))
                        else:
                            loop_cut = self.detect_loop()
                            if loop_cut[0]:
                                logging.debug("loop_cut")
                                new_logs = split.split_loop(
                                    loop_cut[1], self.log, activity_key)
                                for i in range(len(new_logs)):
                                    new_logs[
                                        i] = filtering_utils.keep_one_trace_per_variant(
                                            new_logs[i], parameters=parameters)
                                self.detected_cut = "loopCut"
                                for l in new_logs:
                                    new_dfg = [
                                        (k, v) for k, v in dfg_inst.apply(
                                            l, parameters=parameters).items()
                                        if v > 0
                                    ]
                                    activities = attributes_filter.get_attribute_values(
                                        l, activity_key)
                                    start_activities = list(
                                        start_activities_filter.
                                        get_start_activities(
                                            l, parameters=parameters).keys())
                                    end_activities = list(
                                        end_activities_filter.
                                        get_end_activities(
                                            l, parameters=parameters).keys())
                                    self.children.append(
                                        SubtreePlain(
                                            l,
                                            new_dfg,
                                            self.master_dfg,
                                            self.initial_dfg,
                                            activities,
                                            self.counts,
                                            self.rec_depth + 1,
                                            noise_threshold=self.
                                            noise_threshold,
                                            start_activities=start_activities,
                                            end_activities=end_activities,
                                            initial_start_activities=self.
                                            initial_start_activities,
                                            initial_end_activities=self.
                                            initial_end_activities,
                                            parameters=parameters))

                            # if the code gets to this point, there is no base_case and no cut found in the log_skeleton
                            # therefore, we now apply fall through:
                            else:
                                self.apply_fall_through(parameters)
        else:
            msg = "networkx is not available. inductive miner cannot be used!"
            logging.error(msg)
            raise Exception(msg)
Exemplo n.º 30
0
def apply(log, parameters=None):
    """
    Gets the frequency DFG

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)

    parameters["format"] = "svg"
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities

    gviz = dfg_vis_factory.apply(dfg,
                                 log=filtered_log,
                                 variant="frequency",
                                 parameters=parameters)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities,
                                             end_activities)

    net, im, fm = dfg_conv_factory.apply(dfg,
                                         parameters={
                                             "start_activities":
                                             start_activities,
                                             "end_activities": end_activities
                                         })

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "freq", None, "", activity_key