示例#1
0
    def load_xes(self):
        try:
            with open(self.input_path) as log_file:
                log = XUniversalParser().parse(log_file)[0]

            # get classifiers
            classifiers = []
            for cl in log.get_classifiers():
                classifiers.append(str(cl))

            classifier = XEventAttributeClassifier("activity", [classifiers[0]])
            log_list = list(map(lambda trace: list(map(classifier.get_class_identity, trace)), log))

            self._event_log = set(tuple(trace) for trace in log_list)

        except:
            raise IOError('[ERROR]: Unable to import xes file')
示例#2
0
文件: utils.py 项目: jwllee/podspy
def read_event_log_file(log_filepath):
    if not os.path.isfile(log_filepath):
        raise ValueError('{} is not a log file!'.format(log_filepath))

    with open(log_filepath, 'r') as f:
        logs = XUniversalParser.parse(f)
        log = logs[0]

    return log
示例#3
0
def xlog():
    xlog_fp = os.path.join('.', 'tests', 'testdata', 'BPIC2012.xes.gz')
    start = time.time()
    with open(xlog_fp, 'r') as f:
        xlog = XUniversalParser.parse(f)[0]
        size = sys.getsizeof(xlog)
        print('BPIC2012 XLog size: {}b'.format(size))
    diff = time.time() - start
    print('Took {} secs to parse BPIC2012'.format(diff))
    return xlog
def read_XES_log(path):
    tic = time()

    print("Parsing log")
    with open(path) as log_file:
        log = XUniversalParser().parse(log_file)[0]  # take first log from file

    toc = time()

    print("Log parsed, took {} seconds..".format(toc - tic))

    return log
示例#5
0
 def mapXes(self, lfile, header):
     with open(lfile) as log_file:
         log = XUniversalParser().parse(log_file)[0]
     return log
示例#6
0
ext_lifecycle = XExtensionParser().parse("http://www.xes-standard.org/lifecycle.xesext")
print("[info] XExtensionParser completed the extension parsing")

# Then we register the new extension
print("[info] XExtensionManager starts the registration of extensions")
XExtensionManager().register(meta_general)
XExtensionManager().register(meta_concept)
XExtensionManager().register(meta_time)
XExtensionManager().register(ext_concept)
XExtensionManager().register(ext_time)
XExtensionManager().register(ext_lifecycle)
print("[info] XExtensionManager completed the registration of extensions")

# Now we can parse
with open("./data/bpic2012.xes") as file:
    logs = XUniversalParser().parse(file)

log = logs[0]

drift_idx = 0
for trace in log:
    # the chosen case' id = 197611
    if trace.get_attributes()['concept:name'].get_value() == '197611':
        print('drift_idx = %d' % drift_idx)
        break
    drift_idx+=1

log = log[0:drift_idx]  # select the training set before the drift occurs

log_dict = {'case_id':[], 'case_execution_time':[], 'case_execution_time_seconds':[], 'num_of_events':[]}
from opyenxes.classification.XEventAttributeClassifier import XEventAttributeClassifier
from opyenxes.data_in.XUniversalParser import XUniversalParser

file_path = "/home/bartlomiej/process-mining/process-mining-algorithms/event_logs/L1.xes"
output_directory = "./output/"

with open(file_path) as log_file:
    # Parse the log
    log = XUniversalParser().parse(log_file)[0]

# Generate the classifier
classifier = XEventAttributeClassifier("concept:name", ["concept:name"])

# Convert log object in array with only the Activity attribute of the event
log_list = list(
    map(lambda trace: list(map(classifier.get_class_identity, trace)), log))
示例#8
0
    factory = XFactory()

    print(os.listdir(datadir))

    for dir in os.listdir(datadir):
        if not os.path.isdir(os.path.join(datadir, dir)):
            continue
        outdir = os.path.join(datadir, dir, 'l1000')
        os.makedirs(outdir)

        for xlog_filepath in os.listdir(os.path.join(datadir, dir, 'l5000')):
            if '.xes.gz' not in xlog_filepath:
                continue

            print('Processing {}'.format(xlog_filepath))

            with open(os.path.join(datadir, dir, xlog_filepath), 'r') as f:
                xlog = XUniversalParser().parse(f)[0]

            assert isinstance(xlog, XLog)

            new_xlog = factory.create_log(xlog.get_attributes())
            traces = np.random.choice(xlog, nb_traces, replace=False)
            new_xlog.get_classifiers().append(xlog.get_classifiers()[0])

            for t in traces:
                new_xlog.append(t)

            with open(outdir + os.sep + xlog_filepath, 'w') as f:
                XesXmlGZIPSerializer().serialize(new_xlog, f)
示例#9
0
def readLogFile(file):
    with open(file) as log_file:
        return XUniversalParser().parse(log_file), xes_importer.apply(file)
示例#10
0
from opyenxes.model.XLog import XLog
from opyenxes.data_in.XUniversalParser import XUniversalParser
from opyenxes.classification.XEventAttributeClassifier import XEventAttributeClassifier
from opyenxes.out.XesXmlSerializer import XesXmlSerializer
from sklearn.cluster import KMeans
from opyenxes.factory.XFactory import XFactory

if __name__ == '__main__':
    path = "xes_file/log_to_cluster.xes"
    # Parse Logs
    with open(path) as log_file:
        log = XUniversalParser().parse(log_file)[0]  # type: XLog

    # Convert log object in array with only the Activity attribute of the event
    classifier = XEventAttributeClassifier("activity", ["Activity"])
    log_list = list(
        map(lambda trace: list(map(classifier.get_class_identity, trace)),
            log))

    event_set = set()
    for trace in log_list:
        for event in trace:
            event_set.add(event)

    all_event = list(event_set)

    log_vector = []
    for trace in log_list:
        trace_vector = []
        for event in all_event:
            trace_vector.append(trace.count(event))
示例#11
0
        
        return log

    filePath = sys.argv[1]
    epsilon = sys.argv[2]
    dbName = sys.argv[3]
    secure_token = sys.argv[4]
    
    #preprocess file
    #os.mkdir(secure_token)
    print("\n Starting privatize_df.py \n")
    outPath = filePath.replace(".xes","_%s.xes" % (epsilon))
    log = xes_import_factory.apply(filePath)


    parser = XUniversalParser()

    if parser.can_parse(filePath):
        print("log can be parsed.")
    else:
        print("log can not be parsed.")

    log_file = open(filePath)
    log_list = parser.parse(log_file)
    #log = log_list[0]

    event_mapping = create_event_int_mapping(log)
    privatize_df(log_list, event_mapping, epsilon, outPath)

    #write to db
    print("Writing to DB")
示例#12
0
    return merged_log


if __name__ == "__main__":

    normLogs = []
    devLogs = []

    normPathTemp = "out_xraynorm{}.xes"
    devPathTemp = "out_xraydev{}.xes"
    normLogCount = 7
    devLogCount = 1

    for i in range(1, normLogCount + 1):
        logPath = normPathTemp.format(i)
        with open(logPath) as log_file:
            normLogs.append(XUniversalParser().parse(log_file)[0])

    for i in range(1, devLogCount + 1):
        logPath = devPathTemp.format(i)
        with open(logPath) as log_file:
            devLogs.append(XUniversalParser().parse(log_file)[0])

    merged_log = merge_and_label(normLogs, devLogs)

    shuffle(merged_log)

    with open("merged_xray.xes", "w") as f:
        XesXmlSerializer().serialize(merged_log, f)
示例#13
0
"""
Anonymize a log.
All value of "resource" attribute and "org:resource" attribute will be "UNKNOWN" now:
"""

from opyenxes.data_in.XUniversalParser import XUniversalParser
from opyenxes.data_out.XesXmlSerializer import XesXmlSerializer

path = "xes_file/general_example.xes"
with open(path) as log_file:
    log = XUniversalParser().parse(log_file)[0]

# Search in global trace attribute
for attribute in log.get_global_trace_attributes():
    if attribute.get_key() in ["Resource", "org:resource"]:
        attribute.set_value("UNKNOWN")

# Search in global event attribute
for attribute in log.get_global_event_attributes():
    if attribute.get_key() in ["Resource", "org:resource"]:
        attribute.set_value("UNKNOWN")

# Search in trace
for trace in log:
    # Search in trace attribute
    for attribute in trace.get_attributes().values():
        if attribute.get_key() in ["Resource", "org:resource"]:
            attribute.set_value("UNKNOWN")
    # Search in event
    for event in trace:
        # Search in event attribute