def prod_eventlog(self, start_point, columns=['CASE_ID', 'Activity', 'Resource', 'Start', 'Complete']):
		eventlog = pd.DataFrame.from_records(self.eventlist, columns=columns)
		eventlog['StartTimestamp']=start_point
		eventlog['CompleteTimestamp']=start_point
		eventlog['StartTimestamp'] = pd.to_datetime(eventlog['StartTimestamp'], format = '%Y-%m-%d %H:%M:%S', errors='ignore')
		eventlog['StartTimestamp'] += pd.to_timedelta(eventlog['Start'], unit='m')
		eventlog['CompleteTimestamp'] = pd.to_datetime(eventlog['CompleteTimestamp'], format = '%Y-%m-%d %H:%M:%S', errors='ignore')
		eventlog['CompleteTimestamp'] += pd.to_timedelta(eventlog['Complete'], unit='m')
		eventlog.sort_values(['CASE_ID', 'StartTimestamp'], inplace=True)
		eventlog = Eventlog(eventlog)
		return eventlog
Exemplo n.º 2
0
        if row.transition == 'COMPLETE':
            if start == True:
                data[-1] = row.CompleteTimestamp
            else:
                data = list()
                data += [
                    temp_caseid, row.Activity, row.Resource, '', row.Amount,
                    row.CompleteTimestamp
                ]
            start = False
            table.append(data)
    headers = [
        'CASE_ID', 'Activity', 'Resource', 'StartTimestamp', 'Amount',
        'CompleteTimestamp'
    ]
    df = pd.DataFrame(table, columns=headers)
    eventlog = Eventlog(df)
    eventlog = eventlog.assign_timestamp(name='StartTimestamp',
                                         new_name='StartTimestamp',
                                         _format='%Y.%m.%d %H:%M:%S',
                                         errors='raise')
    eventlog = eventlog.assign_timestamp(name='CompleteTimestamp',
                                         new_name='CompleteTimestamp',
                                         _format='%Y/%m/%d %H:%M:%S',
                                         errors='raise')
    eventlog['Duration'] = (eventlog['CompleteTimestamp'] -
                            eventlog['StartTimestamp']).apply(to_minute)

    eventlog.dropna(subset=['Resource', 'StartTimestamp', 'CompleteTimestamp'],
                    inplace=True)
    eventlog.to_csv('../sample_data/BPIC2012.csv')
Exemplo n.º 3
0
from PyProM.src.data.Eventlog import Eventlog
import pandas as pd


class Preprocessor(object):
    def load_eventlog(self,
                      path,
                      case,
                      activity,
                      timestamp,
                      encoding=None,
                      clear=True):
        eventlog = pd.read_csv(path,
                               sep=',',
                               engine='python',
                               encoding=encoding)
        eventlog.sort_values([case, timestamp], inplace=True)
        eventlog.reset_index(drop=True, inplace=True)
        eventlog = Eventlog(eventlog)
        eventlog = eventlog.assign_caseid(case)
        eventlog = eventlog.assign_activity(activity)
        eventlog = eventlog.assign_timestamp(timestamp)
        if clear == True:
            eventlog = eventlog.clear_columns()
        return eventlog