示例#1
0
def execute_script():
    log_input_directory = "xesinput"
    all_logs_names = os.listdir(log_input_directory)
    all_logs_names = [log for log in all_logs_names if ".xe" in log]

    for logName in all_logs_names:
        # logPath = os.path.join("..", "tests", "inputData", logName)
        log_path = log_input_directory + "\\" + logName
        log = xes_importer.import_log(log_path)
        print("\n\n")
        print("log loaded")
        print("Number of traces - ", len(log))
        event_log = log_conv_fact.apply(log,
                                        variant=log_conv_fact.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
        exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName
        print("exporting log", exp_log_name)
        xes_exporter.export_log(log, exp_log_name)
        print("exported log", exp_log_name)

        log, classifier_attr_key = insert_classifier.search_act_class_attr(log)

        classifiers = list(log.classifiers.keys())
        if classifier_attr_key is None and classifiers:
            try:
                print(classifiers)
                log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute(
                    log, classifiers[0])
                print(classifier_attr_key)
            except:
                print("exception in handling classifier")

        if classifier_attr_key is None:
            classifier_attr_key = "concept:name"

        if len(event_log) > 0 and classifier_attr_key in event_log[0]:
            parameters = {
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key
            }

            dfg = dfg_factory.apply(log, parameters=parameters)
            gviz = dfg_vis_factory.apply(dfg,
                                         log=log,
                                         variant="frequency",
                                         parameters=parameters)
            # dfg_vis_factory.view(gviz)

            dfg_vis_factory.save(
                gviz, "xescert_images\\" + logName.replace("xes", "png"))

        print("Reimporting log file just exported - ", exp_log_name)

        log = xes_importer.import_log(exp_log_name)
        print("log loaded", exp_log_name)
        print("Number of traces - ", len(log))
        event_log = log_conv_fact.apply(log,
                                        variant=log_conv_fact.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
示例#2
0
def bot_dfg(chat_id):
    log = get_current_log(chat_id)
    dfg = dfg_factory.apply(log)
    gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency")
    new_file, filename = tempfile.mkstemp(suffix="png")
    dfg_vis_factory.save(gviz, filename)
    return filename
示例#3
0
def draw_DFG(dfg):
    parameters = {"format": "svg"}
    gviz = dfg_vis_factory.apply(dfg,
                                 variant="performance",
                                 parameters=parameters)
    # log = xes_importer.import_log(r"")
    # dfg = dfg_factory.apply(log)
    # gviz = dfg_vis_factory.apply(dfg, log=log, variant="performance", parameters=parameters)
    dfg_vis_factory.save(gviz, "dfg.svg")
示例#4
0
def visualize_dfg(log, filename):
    """
    Visualizes an event log as a DFG
    :param log: event log that will be visualized
    :param filename: filename for the created DFG
    """
    dfg = dfg_factory.apply(log)
    parameters = {"format": "svg"}
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 parameters=parameters,
                                 variant='frequency')
    dfg_vis_factory.save(gviz, filename)
示例#5
0
def Hueristics(file):
    #import os
    from pm4py.objects.log.importer.xes import factory as xes_importer
    log = xes_importer.import_log(file)

    from pm4py.algo.discovery.dfg import factory as dfg_factory
    dfg = dfg_factory.apply(log)

    from pm4py.visualization.dfg import factory as dfg_vis_factory
    gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency")
    location = "/mnt/c/Users/harim/Downloads/dfg.png"
    dfg_vis_factory.save(gviz, location)
    return location
示例#6
0
def export_process_model(dfgModel, log, filename):
    '''
    Description: to export graphical process model in .svg format
    using pm4py library function
    Used: generate and export process model under provided file name
    Input: dfgModel, log file, file name
    Output: N/A
    '''

    # dfg = dfg_factory.apply(log, variant="performance")
    parameters = {"format": "svg"}
    gviz = dfg_vis_factory.apply(dfgModel,
                                 log=log,
                                 variant="frequency",
                                 parameters=parameters)
    dfg_vis_factory.save(gviz, filename)
示例#7
0
def view_model(dfg, dir=""):
    #rounding for better plot
    for key in dfg.keys():
        dfg[key] = round(dfg[key])

    parameters = {"format": "svg"}

    gviz = dfg_vis_factory.apply(dfg,
                                 variant="frequency",
                                 parameters=parameters)
    # gviz = dfg_vis_factory.apply(dfg, variant="performance", parameters=parameters)# for time
    dfg_vis_factory.save(gviz, dir + ".svg")
    drawing = svg2rlg(dir + ".svg")
    renderPDF.drawToFile(drawing, dir + ".pdf")
    return


# log = xes_import_factory.apply(r"C:\Gamal Elkoumy\PhD\OneDrive - Tartu Ülikool\Data\Data XES\Sepsis Cases - Event Log.xes")
# data=get_dataframe_from_event_stream(log)
# dfg_freq = dfg_factory.apply(log,variant="frequency")
#
# view_model(dfg_freq,r"C:\Gamal Elkoumy\PhD\OneDrive - Tartu Ülikool\Differential Privacy\source code\experiment_figures\temp")
示例#8
0
    def _create_image(self, gviz, img_name, verbose=False):
        """
        Create image file of the generated diagram (DFG,BPMN,Petr net)

        :param gviz: image file generated by pm4py
        :param img_name: name of the image to be saved
        :param verbose: display log while generating images
        """
        try:
            img_path = os.path.join(self.discovery_path, f'{self.filename}_{img_name}.pdf')
            if "alpha_miner" in img_name:
                vis_factory.save(gviz, img_path)
            elif "heuristic_miner" in img_name:
                hn_vis_factory.save(gviz, img_path)
            elif "petri_net" in img_name:
                pn_vis_factory.save(gviz, img_path)
            elif "DFG" in img_name:
                dfg_vis_factory.save(gviz, img_path)
            # elif "BPMN" in img_name:
            #     bpmn_vis_factory.save(gviz, img_path)

            if verbose:
                self.status_queue.put(f"[PROCESS MINING] Generated {img_name} in {img_path}")
        except PermissionError as e:
            print(f"[PROCESS MINING] Could not save image because of permission error: {e}")
            print(f"Trying to save image on desktop")
            img_path = os.path.join(utils.utils.DESKTOP, f'{self.filename}_{img_name}.pdf')
            if "alpha_miner" in img_name:
                vis_factory.save(gviz, img_path)
            elif "heuristic_miner" in img_name:
                hn_vis_factory.save(gviz, img_path)
            elif "petri_net" in img_name:
                pn_vis_factory.save(gviz, img_path)
            elif "DFG" in img_name:
                dfg_vis_factory.save(gviz, img_path)
            # elif "BPMN" in img_name:
            #     bpmn_vis_factory.save(gviz, img_path)
        except Exception as e:
            print(f"[PROCESS MINING] Could not save image: {e}")
示例#9
0
def show(model, tel, file_name, parameters):
    '''
    Show model and its quality measures
    :param model: model type (transition system, state based region, DFG miner, alpha miner)
    :param tel: input log
    :param file_name: img file name to show model
    :param parameters: parmater for transition system (afreq, sfreq)
    :return:
    '''

    tel_flag = False
    if isinstance(tel[0][0], tel_event):
        tel_flag = True

    if model in ['ts', 'sbr']:
        if tel_flag:
            output_file_path = os.path.join(
                "static", "images", file_name[:file_name.find('.')] + '_' +
                model + '_' + str(parameters['afreq_thresh']) + '_' +
                str(parameters['sfreq_thresh']) + ".png")
        else:
            output_file_path = os.path.join(
                "static", "images",
                "2" + "_" + file_name[:file_name.find('.')] + '_' + model +
                '_' + str(parameters[PARAM_KEY_DIRECTION]) + '_' +
                str(parameters[PARAM_KEY_WINDOW]) + "_" +
                str(parameters[PARAM_KEY_VIEW]) + ".png")
        auto = utils.discover_annotated_automaton(tel, parameters=parameters)

        max_thresh = {}
        max_afreq = 0
        max_sfreq = 0

        if tel_flag:
            for trans in auto.transitions:
                max_afreq = max(max_afreq, trans.afreq)
            for state in auto.states:
                max_sfreq = max(max_sfreq, state.sfreq)
        max_thresh['afreq'] = max_afreq
        max_thresh['sfreq'] = max_sfreq

        if model == 'ts':
            result = {}
            gviz = vis_factory.apply(auto)
            vis_factory.save(gviz, output_file_path)
            result['num of transitions'] = len(auto.transitions)
            result['num of states'] = len(auto.states)

        else:
            net, im, fm = sb.petri_net_synthesis(auto)
            gviz = petri_vis_factory.apply(net, im, fm)
            petri_vis_factory.save(gviz, output_file_path)
            result = evaluation(net, im, fm, tel)

    else:
        if tel_flag:
            output_file_path = os.path.join(
                "static", "images",
                file_name[:file_name.find('.')] + '_' + model + '_' + ".png")
        else:
            output_file_path = os.path.join(
                "static", "images", "2" + file_name[:file_name.find('.')] +
                '_' + model + '_' + ".png")

        if model == 'alpha':
            if isinstance(tel[0][0], Event):
                net, im, fm = trans_alpha(tel)
            else:
                net, im, fm = alpha_miner.apply(tel)
            gviz = petri_vis_factory.apply(net, im, fm)
            petri_vis_factory.save(gviz, output_file_path)
            result = evaluation(net, im, fm, tel)

        else:
            dfg = dfg_factory.apply(tel)
            if tel_flag:
                dfg_tel = inductive_revise.get_dfg_graph_trans(tel)
                #dfg = dfg_tel + dfg
                dfg = dfg_tel

            gviz = dfg_vis_factory.apply(dfg, log=tel)
            dfg_vis_factory.save(gviz, output_file_path)
            result = dict(
                sorted(dfg.items(), key=operator.itemgetter(1), reverse=True))

        max_thresh = None

    return output_file_path, result, max_thresh
示例#10
0
 def visualize_dfg(self,
                   log,
                   save_file=False,
                   file_name="dfg",
                   variant="relevance"):
     """
     Visualises the event log as direct follower graph (DFG).
     :param log: event log as a list of traces [list].
     :param save_file: boolean flog indicating to save the DFG or not [bool].
     :param file_name: name of the file [str].
     :param variant: dfg version to be produced: "frequency", "time", "relevance" or "all" [str]
     :return: file_names [list].
     """
     parameters = {"format": "svg"}
     file_names = []
     relevance_scores = self.aggregate_relevance_scores(log)
     if variant == "relevance" or variant == "all":
         for label, items in relevance_scores.items():
             data = filter_log_by_caseid(log, items['traces'])
             dfg = dfg_factory.apply(data)
             gviz = dfg_vis_factory.apply(dfg,
                                          activities_count=items['scores'],
                                          parameters=parameters)
             if len(items['traces']) == 1:
                 title = "Prediction: " + str(
                     label) + ", Case ID: " + items['traces'][0]
             else:
                 title = "No of Service Orders: " + str(len(
                     log)) + ", Filter: Repair not on time (Label = " + str(
                         label) + ")"
             gviz.body.append('\t// title')
             gviz.body.append('\tfontsize = 50;')
             gviz.body.append('\tlabelloc = "t";')
             gviz.body.append('\tlabel = "' + title + '";')
             print("rel_sc: ", items['scores'])
             if save_file:
                 filen = file_name + "_rel_" + str(label) + ".svg"
                 dfg_vis_factory.save(gviz, filen)
                 print("Saved DFG image to: " + filen)
                 file_names.append(filen)
     if variant == "frequency" or variant == "all":
         for label, items in relevance_scores.items():
             data = filter_log_by_caseid(log, items['traces'])
             dfg = dfg_factory.apply(data)
             activities_cnt = attributes_filter.get_attribute_values(
                 log, attribute_key="concept:name")
             gviz = dfg_vis_factory.apply(dfg,
                                          activities_count=activities_cnt,
                                          parameters=parameters)
             if len(items['traces']) == 1:
                 title = "Prediction: " + str(
                     label) + ", Case ID: " + items['traces'][0]
             else:
                 title = "No of Service Orders: " + str(len(
                     log)) + ", Filter: Repair not on time (Label = " + str(
                         label) + ")"
             gviz.body.append('\t// title')
             gviz.body.append('\tfontsize = 50;')
             gviz.body.append('\tlabelloc = "t";')
             gviz.body.append('\tlabel = "' + title + '";')
             if save_file:
                 filen = file_name + "_freq_" + str(label) + ".svg"
                 dfg_vis_factory.save(gviz, filen)
                 print("Saved DFG image to: " + filen)
                 file_names.append(filen)
     if variant == "time" or variant == "all":
         for label, items in relevance_scores.items():
             data = filter_log_by_caseid(log, items['traces'])
             dfg = dfg_factory.apply(data)
             parameters = {"format": "svg", "AGGREGATION_MEASURE": "mean"}
             gviz = dfg_vis_factory.apply(dfg,
                                          variant="performance",
                                          parameters=parameters)
             if len(items['traces']) == 1:
                 title = "Prediction: " + str(
                     label) + ", Case ID: " + items['traces'][0]
             else:
                 title = "No of Service Orders: " + str(len(
                     log)) + ", Filter: Repair not on time (Label = " + str(
                         label) + ")"
             gviz.body.append('\t// title')
             gviz.body.append('\tfontsize = 50;')
             gviz.body.append('\tlabelloc = "t";')
             gviz.body.append('\tlabel = "' + title + '";')
             if save_file:
                 filen = file_name + "_time_" + str(label) + ".svg"
                 dfg_vis_factory.save(gviz, filen)
                 print("Saved DFG image to: " + filen)
                 file_names.append(filen)
         return file_names
示例#11
0
from pm4py.visualization.dfg import factory as dfg_vis_factory
from pm4pyspark.importer.csv import spark_df_imp as importer
from pm4pyspark.algo.discovery.dfg import factory as dfg_factory

parameters = {"format": "svg"}

event_stream_ex = importer.import_event_stream(os.path.join(
    INPUT_DATA_DIR, "running-example.csv"),
                                               parameters={"header": True})
log_ex = importer.transform_event_stream_to_event_log(event_stream_ex)
df_ex = importer.import_sparkdf_from_path(os.path.join(INPUT_DATA_DIR,
                                                       "running-example.csv"),
                                          header=True,
                                          inferSchema=True)

dfg_freq = dfg_factory.apply(df_ex)
gviz_freq = dfg_vis_factory.apply(dfg_freq,
                                  log=log_ex,
                                  parameters=parameters,
                                  variant="frequency")
dfg_vis_factory.save(gviz_freq,
                     os.path.join(OUTPUT_DATA_DIR, "running-example_freq.svg"))

dfg_perf = dfg_factory.apply(df_ex, variant="performance")
gviz_perf = dfg_vis_factory.apply(dfg_perf,
                                  log=log_ex,
                                  parameters=parameters,
                                  variant="performance")
dfg_vis_factory.save(gviz_perf,
                     os.path.join(OUTPUT_DATA_DIR, "running-example_perf.svg"))
def create_graphs(without_error, log, approach):
    """
    creates visualization: Directly-Follows-Graph and Heuristic Net
    """

    # create dfg frequency
    path = "common_path"
    vis_type = "dfg_frequency"
    naming_error = "with_error"
    if without_error:
        naming_error = "no_error"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    parameters = {
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name",
        "format": "svg"
    }
    variant = 'frequency'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 variant=variant,
                                 parameters=parameters)
    dfg_vis_factory.view(gviz)
    dfg_vis_factory.save(gviz, filename)
    log_info.info("DFG frequency has been stored in '%s' in file '%s'", path,
                  file)

    # create dfg performance
    vis_type = "dfg_performance"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    variant = 'performance'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 variant=variant,
                                 parameters=parameters)
    dfg_vis_factory.view(gviz)
    dfg_vis_factory.save(gviz, filename)
    log_info.info("DFG performance has been stored in '%s' in file '%s'", path,
                  file)

    # create heuristic net
    vis_type = "heuristicnet"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    heu_net = heuristics_miner.apply_heu(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.60
        })
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"})
    hn_vis.view(gviz)
    hn_vis.save(gviz, filename)
    log_info.info("Heuristic Net has been stored in '%s' in file '%s'", path,
                  file)

    # save heuristic net in plain-ext format
    file = f"{vis_type}_{approach}_{naming_error}.plain-ext"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.plain-ext"
    gviz = hn_vis.apply(heu_net,
                        parameters={
                            hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT:
                            "plain-ext"
                        })
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .plain-ext has been stored in '%s' "
        "in file '%s'", path, file)

    # save heuristic net in dot format
    file = f"{vis_type}_{approach}_{naming_error}.dot"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.dot"
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "dot"})
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .dot has been stored in '%s' "
        "in file '%s'", path, file)

    # save heuristic net in xdot format
    file = f"{vis_type}_{approach}_{naming_error}.xdot"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.xdot"
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "xdot"})
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .xdot has been stored in '%s' "
        "in file '%s'", path, file)
示例#13
0
    def makeDFG_connector(ConnectorBasicStructure, frequency_threshold,
                          dfg_path, **keyword_param):

        unique_activities = ConnectorBasicStructure['activity'].unique()
        unique_next_activities = ConnectorBasicStructure[
            'prev_activity'].unique()
        activitySet = set(unique_activities) | set(unique_next_activities)
        activityList = list(activitySet)
        activityList.sort()

        activityList.remove(':Start:')

        #edges
        groupedbyactivityPairs = ConnectorBasicStructure.groupby(
            ['prev_activity', 'activity']).size().reset_index(name='counts')

        #just to return as matrix
        ActActMatrix = np.zeros([len(activityList), len(activityList)])
        for prev_activity, activity in zip(
                ConnectorBasicStructure['prev_activity'],
                ConnectorBasicStructure['activity']):
            if (prev_activity == ":Start:"):
                continue
            ActActMatrix[activityList.index(prev_activity)][activityList.index(
                activity)] += 1

        edges_dict = {}

        sumFrequency = groupedbyactivityPairs['counts'].sum(
        ) - groupedbyactivityPairs.loc[
            groupedbyactivityPairs['prev_activity'] == ":Start:", 'counts'][0]

        #edges_list = []
        for index, row in groupedbyactivityPairs.iterrows():
            if (row['prev_activity'] == ":Start:"):
                continue
            #edge_dict = {}
            edge_list = []

            if (keyword_param['encryption']):
                edge_list.append(
                    Utilities.AES_ECB_Encrypt(
                        row['prev_activity'].encode('utf-8')[0:5],
                        keyword_param['key']))
                edge_list.append(
                    Utilities.AES_ECB_Encrypt(
                        row['activity'].encode('utf-8')[0:5],
                        keyword_param['key']))
            else:
                edge_list.append(row['prev_activity'])
                edge_list.append(row['activity'])
            edge_tuple = tuple(edge_list)
            if (row['counts'] / sumFrequency >= frequency_threshold):
                edges_dict[edge_tuple] = row['counts']
            #edges_list.append(edge_dict)
            #edges_dict.append(edge_dict)

        #nodes
        activity_frequencyDF = ConnectorBasicStructure.groupby(
            ['activity']).size().reset_index(name='counts')
        prev_activity_frequencyDF = ConnectorBasicStructure.groupby(
            ['prev_activity']).size().reset_index(name='counts')
        prev_activity_frequencyDF = prev_activity_frequencyDF.rename(
            columns={'prev_activity': 'activity'})
        final_activity_fequency = pd.concat([
            activity_frequencyDF, prev_activity_frequencyDF
        ]).drop_duplicates(subset='activity',
                           keep="first").reset_index(drop=True)

        nodes = final_activity_fequency.set_index('activity').T.to_dict(
            'records')
        nodes[0].pop(':Start:')
        #Making encrypted nodes
        nodes_new = {}
        for key, value in nodes[0].items():
            nodes_new[Utilities.AES_ECB_Encrypt(
                key.encode('utf-8'), keyword_param['key'])[0:5]] = value
        if (keyword_param['encryption']):
            gviz = dfg_vis_factory.apply(edges_dict,
                                         activities_count=nodes_new,
                                         parameters={"format": "svg"})
        else:
            gviz = dfg_vis_factory.apply(edges_dict,
                                         activities_count=nodes[0],
                                         parameters={"format": "svg"})

        if (keyword_param['visualization']):
            dfg_vis_factory.view(gviz)
            dfg_vis_factory.save(gviz, dfg_path)

        return ActActMatrix, activityList
示例#14
0
########################################

#### mean of number of days between each process #######

from pm4py.algo.discovery.dfg import factory as dfg_factory

dfg = dfg_factory.apply(conv_log, variant="performance")
parameters = {"format": "png"}

from pm4py.visualization.dfg import factory as dfg_vis_factory

gviz = dfg_vis_factory.apply(dfg,
                             log=conv_log,
                             variant="performance",
                             parameters=parameters)
dfg_vis_factory.save(gviz, "performance_invoice.png")
dfg_vis_factory.view(gviz)

##################################

#### mean of frequency between each process #######
from pm4py.algo.discovery.dfg import factory as dfg_factory

from pm4py.visualization.dfg import factory as dfg_vis_factory

dfg = dfg_factory.apply(conv_log)
parameters = {"format": "png"}

gviz1 = dfg_vis_factory.apply(dfg,
                              variant="frequency",
                              log=conv_log,