print('Preparing data...') start = time.time() dataset_manager = DatasetManager(dataset_name) data = dataset_manager.read_dataset() train, test = dataset_manager.split_data( data, train_ratio, split=data_split_type ) # to reproduce results of Tax et al., use 'ordered' instead of 'temporal' dt_train = dataset_manager.encode_data_with_label_all_data(train) dt_test = dataset_manager.encode_data_with_label_all_data(test) if normalize_over == "train": dataset_manager.calculate_divisors(dt_train) elif normalize_over == "all": dt_all = dataset_manager.extract_timestamp_features(data) dt_all = dataset_manager.extract_duration_features(dt_all) dataset_manager.calculate_divisors(dt_all) else: print("unknown normalization mode") dt_test = dataset_manager.normalize_data(dt_test) print("Done: %s" % (time.time() - start)) max_len = dataset_manager.get_max_case_length(dt_train) activity_cols = [col for col in dt_train.columns if col.startswith("act")] n_activities = len(activity_cols) data_dim = dt_train.shape[1] - 3 # compile a model with same parameters that was trained, and load the weights of the trained model