def createFiles(logFile, kOptions): log = xes_factory.apply(logFile) dataVectors, seq = preprocess.dataPreprocess2012(log) pairWiseData = outlierPairWise.preprocess(log) #if needed to calculate time to create tree timeTreeStart = time.time() tree = outlierPairWise.createRtree(pairWiseData) #returns values orderd timeTreeEnd = time.time() with open("tests/data/timeDistancesEvent.txt", "w") as f: for k in kOptions: pairs, executionTime = outlierDistanceActivities.main( dataVectors, seq, k) f.write(str(k) + "," + str(executionTime) + "\n") with open('tests/data/timeDistancesRtree.txt', 'w') as f: for k in kOptions: startPairWist = time.time() scores = outlierPairWise.outlierScore(k, tree, pairWiseData) scoreTimeEnd = time.time() outliers = [] for s in scores: pairData = pairWiseData[s[0]] outliers.append(pairData + [s[1]]) f.write(str(k) + "," + str(scoreTimeEnd - startPairWist) + "\n")
from algorithms import outlierDistanceActivities, preprocess, outlierPairsDistribution import algorithms.outlierDistanceActivities as activities import algorithms.outlierPairsDistribution as distribution kOptions=[250,500,750,1000,1250,1500,1750,2000] thresholds=[0.0025,0.005,0.0075,0.01,0.0125,0.0150,0.0175,0.02] logFiles=["../BPI_Challenge_2012.xes","BPI Challenge 2017.xes"] from pm4py.objects.log.importer.xes import factory as xes_factory for logFile in logFiles: print("importing log") log = xes_factory.apply(logFile) # [trace,activity index,time] print("preprocess ...") if logFile=="../BPI_Challenge_2012.xes": dataVectors, seq = preprocess.dataPreprocess2012(log) identifier="2012" else: dataVectors, seq = preprocess.dataPreprocess2017(log) identifier="2017" with open("tests/data/events-{}-distance.txt".format(identifier),"w") as f: for k in kOptions: pairs,time=outlierDistanceActivities.main(dataVectors,seq,k) f.write(str(k)+","+str(time)+"\n") with open("tests/data/events-{}-distribution.txt".format(identifier),"w") as f: for threshold in thresholds: pairs,time=outlierPairsDistribution.main(log,dataVectors,seq,threshold) f.write(str(threshold)+","+str(time)+"\n") os.remove("distributions.txt") #plot the times
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from algorithms.outlierPairsCurveFitting import getDistributionsFitting from pm4py.algo.filtering.log.attributes import attributes_filter as log_attributes_filter from pm4py.objects.log.importer.xes import factory as xes_factory from algorithms import preprocess from statistics import mean import numpy as np import warnings, scipy from sklearn.preprocessing import StandardScaler #time: per activity [30] => [trace index, activity index, time in seconds] #sequence: per trace [10.000]=> [[activity index, time]] logFile = "../BPM Temporal Anomalies/scala_trace_outlier/input/outliers_30_activities_10k_0.005.xes" logFileResults = "../BPM Temporal Anomalies/scala_trace_outlier/input/results_30_activities_10k_0.005_description" log = xes_factory.apply(logFile) time, sequence = preprocess.dataPreprocess2012(log) times = [[i[2] for i in x] for x in time] means = [mean(i) for i in times] distributionsDF = getDistributionsFitting(times, log) #get the distributions in a array thresholds = [0.020, 0.01, 0.0075, 0.005, 0.0025, 0.001] for threshold in thresholds: warnings.filterwarnings("ignore") distributions = [] for index in range(len(distributionsDF)): if distributionsDF.iloc[index]["R2"] >= 0.9: dist = getattr(scipy.stats, distributionsDF.iloc[index]["Distribution"]) param = dist.fit(times[index])