Exemplo n.º 1
0
    def dataframe(self):
        """
        Return pandas DataFrame containing the event log in matrix format.

        :return: pandas.DataFrame
        """

        start_event = Event(timestamp=None,
                            **dict((a, EventLog.start_symbol)
                                   for a in self.event_attribute_keys))
        end_event = Event(timestamp=None,
                          **dict((a, EventLog.end_symbol)
                                 for a in self.event_attribute_keys))

        frames = []
        for case_id, case in enumerate(self.cases):
            if case.id is not None:
                case_id = case.id
            for event_pos, event in enumerate([start_event] + case.events +
                                              [end_event]):
                frames.append({
                    'case_id':
                    case_id,
                    'event_position':
                    event_pos,
                    'name':
                    event.name,
                    'timestamp':
                    event.timestamp,
                    **dict([
                        i for i in event.attributes.items() if not i[0].startswith('_')
                    ])
                })
Exemplo n.º 2
0
    def generate_random_event(self):
        if self.activities is None:
            raise RuntimeError('activities has not bee set.')

        event = Event(name=f'Random activity {np.random.randint(1, len(self.activities))}')
        if self.attributes is not None:
            event.attributes = dict(
                (a.name, f'Random {a.name} {np.random.randint(1, len(a.values))}') for a in self.attributes)
        return event
Exemplo n.º 3
0
 def generate_random_event_v2(self, act):
     if self.activities is None:
         raise RuntimeError('activities has not bee set.')
     actset = [x for x in self.activities if x not in act]
     event = Event(name=f'{np.random.choice(actset)}')
     if self.attributes is not None:
         event.attributes = dict(
             (a.name, f'Random {a.name} {np.random.randint(1, 3)}')
             for a in self.attributes)
     return event
Exemplo n.º 4
0
    def from_sql(server, database, resource, password, schema='pm'):
        import pyodbc
        conn = pyodbc.connect(f'DRIVER={{ODBC Driver 17 for SQL Server}};'
                              f'SERVER={{{server}}};'
                              f'DATABASE={{{database}}};'
                              f'UID={{{resource}}};'
                              f'PWD={{{password}}}')

        cursor = conn.cursor()
        cursor.execute(
            f'SELECT CaseId, ActivityName, Resource, Timestamp FROM {schema}.EventLog ORDER BY CaseId, Timestamp, SortKey, ActivityName'
        )

        event_log = EventLog()
        case = None
        current_case_id = None
        for row in cursor.fetchall():
            case_id = row[0]
            timestamp = '' if row[3] is None else row[3].strftime(
                '"%Y-%m-%d %H:%M:%S"')
            name = row[1]
            resource = row[2]

            if case_id != current_case_id:
                case = Case(id=case_id)
                event_log.add_case(case)
                current_case_id = case_id
            case.add_event(
                Event(name=name, timestamp=timestamp, resource=resource))
Exemplo n.º 5
0
    def from_csv(file_path):
        """
        Load an event log from a CSV file

        :param file_path: path to CSV file
        :return: EventLog object
        """
        # parse file as pandas dataframe
        df = pd.read_csv(file_path)

        # create event log
        event_log = EventLog()

        # iterate by distinct case id
        for case_id in np.unique(df['case_id']):
            _case = Case(id=case_id)
            # iterate over rows per case
            for index, row in df[df.case_id == case_id].iterrows():
                start_time = row['start_time']
                end_time = row['end_time']
                event_name = row['event']
                user = row['user']
                _event = Event(name=event_name,
                               timestamp=start_time,
                               end_time=end_time,
                               user=user)
                _case.add_event(_event)
            event_log.add_case(_case)

        return event_log
Exemplo n.º 6
0
class ReworkAnomaly(Anomaly):
    """Insert 1 sequence of n events coming from the case later in the case."""
    def __init__(self, max_sequence_size=2, max_distance=0):
        self.max_sequence_size = max_sequence_size
        self.max_distance = max_distance
        super(ReworkAnomaly, self).__init__()

    def apply_to_case(self, case):
        if len(case) <= 1:
            return NoneAnomaly().apply_to_case(case)

        size = np.random.randint(2, min(len(case), self.max_sequence_size) + 1)
        start = np.random.randint(0, len(case) - size + 1)
        distance = np.random.randint(
            0,
            min(len(case) - (start + size), self.max_distance) + 1)

        t = case.events
        dupe_sequence = [Event.clone(e) for e in t[start:start + size]]

        inserted = [e.json for e in dupe_sequence]

        anomalous_trace = t[:start + size +
                            distance] + dupe_sequence + t[start + size +
                                                          distance:]
        case.events = anomalous_trace

        case.attributes['label'] = dict(anomaly=str(self),
                                        attr=dict(size=int(size),
                                                  start=int(start + size +
                                                            distance),
                                                  inserted=inserted))

        return case
Exemplo n.º 7
0
 def from_dict(log):
     event_log = EventLog(**log['attributes'])
     for case in log['cases']:
         _case = Case(id=case['id'], **case['attributes'])
         for e in case['events']:
             event = Event(name=e['name'],
                           timestamp=e['timestamp'],
                           **e['attributes'])
             _case.add_event(event)
         event_log.add_case(_case)
     return event_log
Exemplo n.º 8
0
    def path_to_case(self, p, label=None):
        """
        Converts a given path to a case by traversing the graph and returning a case.

        :param p: path of node identifiers
        :param label: is used to label the case
        :return: a case
        """
        g = self.graph

        case = Case(label=label)
        for i in range(0, len(p), self.event_len):
            event = Event(name=g.node[p[i]]['value'])
            for j in range(1, self.event_len):
                att = g.node[p[i + j]]['name']
                value = g.node[p[i + j]]['value']
                event.attributes[att] = value
            case.add_event(event)

        return case
Exemplo n.º 9
0
        def parse_event(event):
            attr = dict(
                (attr.attrib['key'], attr.attrib['value']) for attr in event)

            timestamp = None
            # if 'time:timestamp' in global_attr['event'].keys():
            if 'time:timestamp' in attr:
                timestamp = attr['time:timestamp']

            name = ''
            if len(classifiers) > 0:
                keys = classifiers[0]['keys']
                check_keys = [key for key in keys if key not in attr]
                if len(check_keys) > 0:
                    print(
                        f'Classifier key(s) {", ".join(check_keys)} could not be found in event.'
                    )
                    return None
                values = [attr[key] for key in keys]
                name = '+'.join(values)

            return Event(name=name, timestamp=timestamp, **attr)
Exemplo n.º 10
0
    def from_json(file_path):
        """
        Parse event log from JSON.

        JSON can be gzipped

        :param file_path: path to json file
        :return:
        """
        if not isinstance(file_path, str):
            file_path = str(file_path)

        if file_path.endswith('gz'):
            import gzip
            open = gzip.open

        # Read the file
        with open(file_path, 'rb') as f:
            log = json.loads(f.read().decode('utf-8'))

        event_log = EventLog(**log['attributes'])

        # Compatibility: Check for traces in log
        if 'traces' in log:
            case_key = 'traces'
        else:
            case_key = 'cases'

        for case in log[case_key]:
            _case = Case(id=case['id'], **case['attributes'])
            for e in case['events']:
                event = Event(name=e['name'],
                              timestamp=e['timestamp'],
                              **e['attributes'])
                _case.add_event(event)
            event_log.add_case(_case)

        return event_log
Exemplo n.º 11
0
 def clone(trace):
     events = [Event.clone(event) for event in trace.events]
     return Case(id=trace.id, events=events, **dict(trace.attributes))