def setUp(self): self.train_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.train_event_names = unique_events(self.train_log) self.train_add_col = get_additional_columns(self.train_log) self.test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) self.test_event_names = unique_events(self.test_log) self.test_add_col = get_additional_columns(self.test_log)
def setUp(self): self.train_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) self.encoding = create_test_encoding( value_encoding=ValueEncodings.BOOLEAN.value, prefix_length=2, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value)
def get_train_test_log(split: Split): """Returns training_log and test_log""" if split.type == SplitTypes.SPLIT_SINGLE.value and Split.objects.filter( type=SplitTypes.SPLIT_DOUBLE.value, original_log=split.original_log, test_size=split.test_size, splitting_method=split.splitting_method ).exists() and split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value: return get_train_test_log(Split.objects.filter( type=SplitTypes.SPLIT_DOUBLE.value, original_log=split.original_log, test_size=split.test_size, splitting_method=split.splitting_method )[0]) elif split.original_log is not None and (not Split.objects.filter( type=SplitTypes.SPLIT_DOUBLE.value, original_log=split.original_log, test_size=split.test_size, splitting_method=split.splitting_method ).exists() or split.splitting_method == SplitOrderingMethods.SPLIT_RANDOM.value): training_log, test_log = _split_single_log(split) additional_columns = get_additional_columns(get_log(split.original_log)) if split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value: _ = Split.objects.get_or_create( type=SplitTypes.SPLIT_DOUBLE.value, original_log=split.original_log, test_size=split.test_size, splitting_method=split.splitting_method, train_log=create_log(EventLog(training_log), '0-' + str(100 - int(split.test_size * 100)) + '.xes'), test_log=create_log(EventLog(test_log), str(100 - int(split.test_size * 100)) + '-100.xes'), additional_columns=split.additional_columns )[0] logger.info("\t\tLoaded single log from {}".format(split.original_log.path)) else: # Have to use sklearn to convert some internal data types training_log = get_log(split.train_log) additional_columns = get_additional_columns(training_log) if split.additional_columns is None: split.additional_columns = split.train_log.name + split.test_log.name + '_ac.xes' split.save() training_log, train_log_to_append = train_test_split(training_log, test_size=0, shuffle=False) test_log, test_log_to_append = train_test_split(get_log(split.test_log), test_size=0, shuffle=False) logger.info("\t\tLoaded double logs from {} and {}.".format(split.train_log.path, split.test_log.path)) if len(training_log) == 0: raise TypeError("Training log is empty. Create a new Split with better parameters") return training_log, test_log, additional_columns
def test_can_find_log_file(self): log = Log.objects.get(name="general_example.xes", path=general_example_filepath) log_file = get_log(log) self.assertEqual(6, len(log_file))
def test_global_event_attributes(self): log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) attributes = get_additional_columns(log) self.assertListEqual(attributes['event_attributes'], ['Activity', 'Costs', 'Resource', 'org:resource'])
def setUp(self): test_log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) training_log = get_log(create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.training_df, self.test_df = encode_label_logs(training_log, test_log, create_test_job( encoding=create_test_encoding( value_encoding=ValueEncodings.BOOLEAN.value, add_elapsed_time=True ), predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value ) ))
def test_can_find_split_original_file(self): log = Log.objects.get(name="general_example.xes", path=general_example_filepath) split = Split.objects.get(original_log=log) log_file = get_log(split.original_log) self.assertEqual(6, len(log_file))
def setUp(self): test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) training_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1) self.labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value) self.training_df, self.test_df = encode_label_logs( training_log, test_log, create_test_job(encoding=self.encoding, labelling=self.labelling))
def setUp(self): self.log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value) self.encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, add_elapsed_time=True, prefix_length=1)
def test_trace_attributes(self): self.log = get_log(create_test_log(log_name=financial_log_filename, log_path=financial_log_filepath)) result = trace_attributes(self.log) self.assertEqual(2, len(result)) self.assertDictEqual({'name': 'AMOUNT_REQ', 'type': 'number', 'example': '20000'}, result[0]) self.assertDictEqual({'name': 'REG_DATE', 'type': 'string', 'example': '2011-10-01 00:38:44.546000+02:00'}, result[1])
def get_log_traces_attributes(request, pk): log = Log.objects.get(pk=pk) try: log_file = get_log(log) except FileNotFoundError: logger.error("Log id: %s, path %s not found", log.id, log.path) return Response({'error': 'log file not found'}, status=status.HTTP_404_NOT_FOUND) value = get_log_trace_attributes(log_file) return Response(value, status=status.HTTP_200_OK)
def test_replay_prediction(self): job = create_test_job(create_models=True) runtime_log = create_test_log( log_name='runtime_example.xes', log_path='cache/log_cache/test_logs/runtime_test.xes') log = get_log(runtime_log) prediction_task(job.id) job.refresh_from_db() replay_prediction_task(job, job, log)
def setUp(self): self.train_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) self.add_col = get_additional_columns(self.train_log) self.encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2) self.encodingPadding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10, padding=True)
def setUp(self): self.log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) self.event_names = unique_events(self.log) self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value) self.add_col = get_additional_columns(self.log) self.encoding = create_test_encoding( value_encoding=ValueEncodings.LAST_PAYLOAD.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1)
def _split_single_log(split: Split): log = get_log(split.original_log) logger.info("\t\tExecute single split ID {}, split_type {}, test_size {}".format(split.id, split.type, split.test_size)) if split.splitting_method == SplitOrderingMethods.SPLIT_TEMPORAL.value: return _temporal_split(log, split.test_size) elif split.splitting_method == SplitOrderingMethods.SPLIT_STRICT_TEMPORAL.value: return _temporal_split_strict(log, split.test_size) elif split.splitting_method == SplitOrderingMethods.SPLIT_SEQUENTIAL.value: return _split_log(log, split.test_size, shuffle=False) elif split.splitting_method == SplitOrderingMethods.SPLIT_RANDOM.value: return _split_log(log, split.test_size, random_state=None) else: raise ValueError('splitting method {} not recognized'.format(split.splitting_method))
def test_eval(self): encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value, add_elapsed_time=True, prefix_length=12, padding=True) df = simple_index( get_log(create_test_log(log_path=general_example_filepath, log_name=general_example_filename)), create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value), encoding) self.assertEqual(df.shape, (41, 15)) row1 = df[df.trace_id == '4'].iloc[4] self.assertListEqual( ['4', 'register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request', 0, 0, 0, 0, 0, 0, 0, 520920.0, 0.0], row1.values.tolist()) self.assertFalse(df.isnull().values.any())
def replay_prediction(replay_job: Job, training_initial_job: Job, trace_id) -> list: """The function create a set with timestamps of events, then create a list of requests simulating the log in the time passing :param trace_id: :param replay_job: job dictionary :param training_initial_job: job dictionary :return: List of requests """ split = replay_job.split log = get_log(split.train_log) requests_list = list() eventlog = EventLog() trace = log[int(trace_id)] for key in log.attributes.keys(): eventlog.attributes[key] = log.attributes[key] for index in range(len(trace)): new_trace = Trace(trace[0:index]) for key in trace.attributes: new_trace.attributes[key] = trace.attributes[key] eventlog.append(new_trace) replay_job.case_id = trace_id replay_job.event_number = len(trace) replay_job.save() try: logger.error("Sending request for replay_prediction task.") r = requests.post( url="http://127.0.0.1:8000/runtime/replay_prediction/", data=export_log_as_string(eventlog), params={ 'jobId': replay_job.id, 'training_job': training_initial_job.id }, headers={ 'Content-Type': 'text/plain', 'charset': 'UTF-8' }) requests_list.append(str(r)) except Exception as e: requests_list.append(str(e)) logger.warning(str(e)) return requests_list
def replay_core(replay_job: Job, training_initial_job: Job) -> list: """The function create a set with timestamps of events, then create a list of requests simulating the log in the time passing :param replay_job: job dictionary :param training_initial_job: job dictionary :return: List of requests """ split = replay_job.split log = get_log(split.train_log) requests_list = list() eventlog = EventLog() for key in log.attributes.keys(): eventlog.attributes[key] = log.attributes[key] for trace in log: new_trace = Trace(trace) for key in trace.attributes: new_trace.attributes[key] = trace.attributes[key] eventlog.append(new_trace) times = sorted( set([event['time:timestamp'] for trace in eventlog for event in trace])) for t in times[2::int((len(times) - 2) / 5)]: filtered_eventlog = timestamp_filter.apply_events( eventlog, times[0].replace(tzinfo=None), t.replace(tzinfo=None)) trace_list = list() event_number = dict() for trace in filtered_eventlog: trace_list.append(trace.attributes['concept:name']) event_number[trace.attributes['concept:name']] = len(trace) replay_job.case_id = trace_list replay_job.event_number = event_number replay_job.save() try: #TODO check logger usage logger.info("Sending request for replay_prediction task.") r = requests.post( url="http://server:8000/runtime/replay_prediction/", data=export_log_as_string(filtered_eventlog), params={ 'jobId': replay_job.id, 'training_job': training_initial_job.id }, headers={ 'Content-Type': 'text/plain', 'charset': 'UTF-8' }) requests_list.append(str(r)) except Exception as e: requests_list.append(str(e)) logger.warning(str(e)) training_log, test_log, additional_columns = get_train_test_log( replay_job.split) training_df, _ = encode_label_logs(training_log, test_log, replay_job, additional_columns=additional_columns) gold_values = dict(zip(training_df['trace_id'], training_df['label'])) parent_id = replay_job.id # final_job = duplicate_orm_row(replay_job) #todo: replace with simple CREATE final_job = Job.objects.create( created_date=replay_job.created_date, modified_date=replay_job.modified_date, error=replay_job.error, status=replay_job.status, type=replay_job.type, create_models=replay_job.create_models, case_id=replay_job.case_id, event_number=replay_job.event_number, gold_value=replay_job.gold_value, results=replay_job.results, parent_job=replay_job.parent_job, split=replay_job.split, encoding=replay_job.encoding, labelling=replay_job.labelling, clustering=replay_job.clustering, predictive_model=replay_job.predictive_model, evaluation=replay_job.evaluation, hyperparameter_optimizer=replay_job.hyperparameter_optimizer, incremental_train=replay_job.incremental_train) final_job.parent_job = Job.objects.filter(pk=parent_id)[0] final_job.gold_value = gold_values final_job.type = JobTypes.REPLAY_PREDICT.value final_job.save() return requests_list
def setUp(self): self.log = get_log(create_test_log(log_name=general_example_filename, log_path=general_example_filepath))