def execute_script(): stream1 = [{ "event_id": "1", "event_activity": "A", "event_timestamp": parser.parse("1970-01-01 00:00:00"), "order": ["O1"] }, { "event_id": "2", "event_activity": "B", "event_timestamp": parser.parse("1970-01-01 00:00:00"), "order": ["O1"], "item": ["I1, I2"] }] df = pd.DataFrame(stream1) df.type = "succint" stream2 = [{ "object_id": "O1", "object_type": "order", "object_buyer": "Alessandro" }, { "object_id": "I1", "object_type": "item", "object_cost": 600 }] obj_df = pd.DataFrame(stream2) mdl_exporter.apply(df, "prova.mdl", obj_df=obj_df) df, obj_df = mdl_importer.apply("prova.mdl", return_obj_dataframe=True) orders = obj_df[obj_df["object_type"] == "order"].dropna(how="all", axis=1) items = obj_df[obj_df["object_type"] == "item"].dropna(how="all", axis=1) print(df) print(orders) print(items) mdl_exporter.apply(df, "prova2.mdl", obj_df=obj_df) os.remove("prova.mdl") os.remove("prova2.mdl")
def cli(con): print("\n\nP2P - Object-Centric Log\n") dataframe = apply(con) path = input( "Insert the path where the log should be saved (default: p2p.mdl): ") if not path: path = "p2p.xmlocel" if path.endswith("mdl"): mdl_exporter.apply(dataframe, path) elif path.endswith("jsonocel") or path.endswith("xmlocel"): jmd_exporter.apply(dataframe, path)
def cli(con): print("\n\nO2C Object-Centric Log Extractor\n\n") min_extr_date = input( "Insert the minimum extraction date (default: 2020-01-01 00:00:00): ") if not min_extr_date: min_extr_date = "2020-01-01 00:00:00" gjahr = input("Insert the fiscal year (default: 2020):") if not gjahr: gjahr = "2020" dataframe = apply(con, min_extr_date=min_extr_date, gjahr=gjahr) path = input( "Insert the path where the log should be saved (default: o2c.xmlocel): " ) if not path: path = "o2c.xmlocel" if path.endswith("mdl"): mdl_exporter.apply(dataframe, path) elif path.endswith("jsonocel") or path.endswith("xmlocel"): ocel_exporter.apply(dataframe, path)
def newExtractorPerformExtraction(): parameters = request.args.get("parameters") parameters = __process_parameters(parameters) db_type = parameters["db_type"] if "db_type" in parameters else "sqlite" db_con_args = parameters[ "db_con_args"] if "db_con_args" in parameters else { "path": "sap.sqlite" } tabnames = parameters["tabnames"] key_spec = parameters["key_spec"] mandt = parameters["mandt"] c = database_factory.apply(db_type, db_con_args) from sapextractor.utils.generic_extractors import extract_table file_name = str(uuid.uuid4()) + ".parquet" df = extract_table.apply_set_tables(c, tabnames, mandt=mandt) from pm4pymdl.objects.mdl.exporter import exporter exporter.apply(df, file_name) obj_types = [x for x in df.columns if not x.startswith("event_")] return {"file_name": file_name, "obj_types": obj_types}
def execute_script(): con = example_connection.get_con() ol = sapextractor.get_ap_ar_obj_centr_log(con) mdl_exporter.apply(ol, "ap_ar.mdl")
if __name__ == "__main__": read_bseg() read_tstct() read_eban() read_bkpf() read_ekbe() read_ekpo() read_mseg() read_rseg() read_mara() read_lfa1() read_ekko() read_mkpf() read_rbkp() write_events() Shared.events = sorted(Shared.events, key=lambda x: x["event_timestamp"]) print("written events") events_df = pd.DataFrame(Shared.events) print("got dataframe") events_df.type = "exploded" ekpo_objects = pd.DataFrame(Shared.EKPO_objects) mseg_objects = pd.DataFrame(Shared.MSEG_objects) rseg_objects = pd.DataFrame(Shared.RSEG_objects) mara_objects = pd.DataFrame(Shared.MARA_objects) lfa1_objects = pd.DataFrame(Shared.LFA1_objects) object_df = pd.concat([ekpo_objects, mseg_objects, rseg_objects, mara_objects, lfa1_objects]) print("exporting") mdl_exporter.apply(events_df, "log_p2p.mdl", obj_df=object_df) print("exported") mdl_exporter.apply(events_df, "log_p2p.parquet", obj_df=object_df)
for col in succint_table.columns: if not col.startswith("event"): print(col) succint_table[col] = succint_table[col].apply(f) succint_table = succint_table.rename(columns=col_mapping) mapping = {"XK01": "Create Vendor (Centrally)", "XK02": "Change Vendor (Centrally)", "FK02": "Change Vendor (Accounting)", "MK02": "Change Vendor (Purchasing)", "VD02": "Change Customer (Sales)", "XD01": "Create Customer (Centrally)", "XD02": "Change Customer (Centrally)", "FD02": "Change Customer (Accounting)", "XD07": "Change Customer Account Group", "FK08": "Confirm Vendor Individually (Acctng)"} def f1(x): return mapping[x] succint_table["event_activity"] = succint_table["event_activity"].apply(f1) succint_table.type = "succint" mdl_exporter.apply(succint_table, "sap_withoutTrial.mdl") stream = succint_table.to_dict('r') tgroups = {} for event in stream: event_keys = list(event.keys()) for key in event_keys: if event[key] is None: del event[key] event_keys = list(event.keys()) tcode = event["event_tcode"] if not tcode in tgroups: tgroups[tcode] = set() this_list = []
from pm4pymdl.algo.mvp.utils import succint_mdl_to_exploded_mdl from pm4pymdl.objects.mdl.exporter import exporter as mdl_exporter import random import pandas as pd succint_df = mdl_importer.apply("example_logs/mdl/mdl-running-example.mdl") df = succint_mdl_to_exploded_mdl.apply(succint_df) products = df["products"].dropna().unique() customers = df["customers"].dropna().unique() objects = [] for p in products: objects.append({ "object_id": p, "object_type": "products", "object_cost": random.randrange(100, 500), "object_producer": random.choice(["A", "B", "C"]) }) for c in customers: objects.append({ "object_id": c, "object_type": "customers", "object_age": random.randrange(30, 60), "object_bankaccount": random.randrange(1000, 100000) }) print(objects) obj_df = pd.DataFrame(objects) mdl_exporter.apply(df, "mdl-running-example-w-objects.mdl", obj_df=obj_df)
def execute_script(): con = example_connection.get_con() ol = sapextractor.get_o2c_obj_centr_log(con, keep_first=True, min_extr_date="1990-01-01 00:00:00") mdl_exporter.apply(ol, "o2c.mdl")
from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory from pm4pymdl.objects.mdl.exporter import exporter as mdl_exporter # import a succint MDL table succint_table = mdl_importer.apply("../example_logs/mdl/order_management.mdl") print(len(succint_table), succint_table.type) # convert it into an exploded MDL table exploded_table = succint_mdl_to_exploded_mdl.apply(succint_table) print(len(exploded_table), exploded_table.type) # keeps only events related to orders that have a profit >= 200 # to make the filtering on the exploded table we have to follow the procedure: f0 = exploded_table[exploded_table["event_profit"] >= 200] f1 = exploded_table[exploded_table["order"].isin(f0["order"])] filtered_exploded_table = exploded_table[exploded_table["event_id"].isin( f1["event_id"])] # suppose that we want to get also the packages related to the filtered orders, then: f2 = exploded_table[exploded_table["package"].isin( filtered_exploded_table["package"])] filtered_table_2 = exploded_table[ exploded_table["event_id"].isin(filtered_exploded_table["event_id"]) | exploded_table["event_id"].isin(f2["event_id"])] # mine a process model out of the filtered table model = discovery.apply(filtered_table_2) gviz = vis_factory.apply(model) vis_factory.view(gviz) # export the filtered version mdl_exporter.apply(filtered_table_2, "filtered.mdl")
dct["G"] = "VERKBELEG" # contract dct["W"] = "INDIP_REQ" # indipendent requisition dct["I"] = "ORD_WO_CHARGE" # order without charge dct["X"] = "HANDL_UNIT" # handling unit """ activities["A"] = "Create Inquiry" activities["T"] = "Returns Delivery" activities["D"] = "Item Proposal" activities["V"] = "Create Purchase Order" activities["N"] = "Invoice Cancellation" activities["E"] = "Scheduling Agreement" activities["O"] = "Create Credit Memo" activities["K"] = "Create Credit Memo Request" activities["B"] = "Create Quotation" activities["G"] = "Create Contract" activities["W"] = "Indipendent Requisition" activities["I"] = "Create Order without Charge" activities["X"] = "Handling Unit" activities.update(Shared.tcodes) df[Shared.activity_column] = df[Shared.activity_column].map(activities) df = df.dropna(subset=[Shared.activity_column]) df = df[[x for x in df.columns if "named:" not in x]] allowed_columns = [x for x in df.columns if not x.startswith("C_") and not x.startswith("event_")] df = df.dropna(subset=allowed_columns, how="all") df = df.sort_values(Shared.timestamp_column) print(df) df.type = "exploded" from pm4pymdl.objects.mdl.exporter import exporter as mdl_exporter mdl_exporter.apply(df, "sap.mdl")
mapping["FOSH"] = "Vacancy debit position" mapping["FOUA"] = "Calculate sales settlement" mapping["MF40"] = "Final backflush for make-to-stock production" mapping["FBA8"] = "Clear Vendor Down Payment" mapping["MB11"] = "Goods Movement" mapping["MBSL"] = "Copy Material Document" mapping["MB0A"] = "Post Goods Receipt for PO" mapping["FOB6"] = "Input tax distribution" mapping["FB1S"] = "Clear G/L Account" mapping["WRX"] = "Account determination for GR/IR clearing account" mapping["GBB"] = "Offsetting entry for inventory posting" mapping["MB1B"] = "Enter Transfer Posting" df["event_activity"] = df["event_activity"].apply(lambda x: mapping[x]) from pm4pymdl.objects.mdl.exporter import exporter as mdl_exporter mdl_exporter.apply(df, "bkpf_bseg.mdl") #print(df["event_activity"].unique()) #input() #print(df) model = mvp_disc_factory.apply(df, parameters={ "min_dfg_occurrences": 3, "performance": False, "decreasing_factor_sa_ea": 0.0, "dependency_thresh": 0.3, "perspectives": ["belnr", "xblnr", "hkont"] }) gviz = mvp_vis_factory.apply(model, parameters={"format": "svg"})
for l0 in logs: df = pd.read_csv("pkdd99/"+l0, sep=";", quotechar="\"") if "date" in df.columns: df["date"] = pd.to_datetime(df["date"], format="%y%m%d") if "issued" in df.columns: df["issued"] = pd.to_datetime(df["issued"], format="%y%m%d %H:%M:%S") if "birth_number" in df.columns: df["birth_number"] = 1900 + df["birth_number"] // 10000 df["birth_number"] = pd.to_datetime(df["birth_number"], format="%Y", errors="ignore") for column in case_id_columns: if column in df.columns: df[column] = df[column].astype(str) df = do_column_mapping(df) df = set_up_activity(l0, df) if "event_activity" in df.columns and "event_timestamp" in df.columns: df = df.dropna(subset=["event_timestamp", "event_activity"], how="any") if len(df) > 0: all_df.append(df) df = pd.concat(all_df) df = df.reset_index() df["event_id"] = df.index df["event_id"] = df["event_id"].astype(str) df = df.sort_values(["event_timestamp", "event_id"]) df = df.reset_index() df.type = "exploded" del df["index"] del df["level_0"] print(df.columns) mdl_exporter.apply(df, "pkdd99.parquet")