embedding_dim=embedding_dim, scale_model=scale_model) dt_val = dataset_manager.encode_data_with_label_all_data_act_res_embedding( val, embedding_type=embedding_type, embedding_dim=embedding_dim, scale_model=scale_model) if "bpic2017" in dataset_name: max_len = min(20, dataset_manager.get_pos_case_length_quantile(data, 0.95)) else: max_len = min(40, dataset_manager.get_pos_case_length_quantile(data, 0.95)) data_dim = dt_train.shape[1] - 3 X, _, _, y_o = dataset_manager.generate_3d_data_with_label_all_data( dt_train, max_len) X_val, _, _, y_o_val = dataset_manager.generate_3d_data_with_label_all_data( dt_val, max_len) print(X.shape, y_o.shape, X_val.shape, y_o_val.shape) print("Done: %s" % (time.time() - start)) print('Optimizing parameters...') space = { 'dropout': hp.uniform('dropout', 0, 0.3), 'lstmsize': hp.choice('lstmsize', [str(val) for val in range(10, 151)]), 'batch_size': hp.choice('batch_size', ["8", "16", "32", "64"]),
dataset_manager.calculate_divisors(dt_train) elif normalize_over == "all": dt_all = dataset_manager.extract_timestamp_features(data) dt_all = dataset_manager.extract_duration_features(dt_all) dataset_manager.calculate_divisors(dt_all) else: print("unknown normalization mode") dt_train = dataset_manager.normalize_data(dt_train) max_len = dataset_manager.get_max_case_length(dt_train) activity_cols = [col for col in dt_train.columns if col.startswith("act")] n_activities = len(activity_cols) data_dim = dt_train.shape[1] - 3 X, y_a, y_t, y_o = dataset_manager.generate_3d_data_with_label_all_data( dt_train, max_len) print(X.shape, y_a.shape, y_t.shape, y_o.shape) sys.stdout.flush() print("Done: %s" % (time.time() - start)) # compile a model with same parameters that was trained, and load the weights of the trained model print('Training model...') start = time.time() main_input = Input(shape=(max_len, data_dim), name='main_input') # train a 2-layer LSTM with one shared layer l1 = LSTM(lstmsize, input_shape=(max_len, data_dim), consume_less='gpu', init='glorot_uniform',