def load(self, dataset_name): """ Load dataset from disk. If there exists a cached file, load from cache. If no cache file exists, load from Event Log and cache it. :param dataset_name: :return: """ el_file = EventLogFile(dataset_name) self.dataset_name = el_file.name # Check for cache if el_file.cache_file.exists(): self._load_dataset_from_cache(el_file.cache_file) # Else generate from event log elif el_file.path.exists(): self._event_log = EventLog.load(el_file.path) self.from_event_log(self._event_log) self._cache_dataset(el_file.cache_file)
'bpic15-0.0-5.json.gz', 'bpic12-0.0-0.json.gz', 'bpic17-0.0-1.json.gz', 'bpic17-0.0-2.json.gz' ] np.random.seed(0) # This will ensure reproducibility ps = [0.3] event_log_paths = [ e.path for e in get_event_log_files(EVENTLOG_DIR) if 'bpic' in e.name and e.p == 0.0 ] combinations = list(itertools.product(event_log_paths, ps)) for event_log_path, p in tqdm(combinations, desc='Add anomalies'): event_log_file = EventLogFile(event_log_path) event_log = EventLog.from_json(event_log_path) anomalies = [ ReplaceAnomaly(max_replacements=1), SkipSequenceAnomaly(max_sequence_size=2), ReworkAnomaly(max_distance=5, max_sequence_size=3), EarlyAnomaly(max_distance=5, max_sequence_size=2), LateAnomaly(max_distance=5, max_sequence_size=2), InsertAnomaly(max_inserts=2) ] # if event_log.num_event_attributes > 0: # anomalies.append(AttributeAnomaly(max_events=3, max_attributes=min(2, event_log.num_activities))) for anomaly in anomalies: