# spliting train and test set # print("flag: training sets") # train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=42) # print("done") # export test data if original data is used # if status == 'o': # with open("%s/%s-%s.pkl" % (testset_dir, feature_name, "X"), 'wb') as f: # pickle.dump(test_X, f) # with open("%s/%s-%s.pkl" % (testset_dir, feature_name, "y"), 'wb') as f: # pickle.dump(test_y, f) # print("flag: test set is exported") print("flag: training model") if contextual_info: train_df = fg.queue_level(train_df) activity_list = fg.get_activities(train_df) train_context_X = fg.generate_context_feature(train_df, activity_list) model = net() if task == 'next_timestamp': model.train(train_X, train_y, regression, loss, n_epochs=num_epochs, batch_size=batch_size, num_folds=num_folds, model_name=model_name, checkpoint_dir=args.checkpoint_dir, X_train_ctx=train_context_X) elif task == 'next_activity':
regression = True batch_size = args.batch_size_train num_folds = args.num_folds # load data FG = FeatureGenerator() df = FG.create_initial_log(filename) #split train and test #train_df, test_df = FG.train_test_split(df, 0.7, 0.3) train_df = df test_df = train_df #create train train_df = FG.order_csv_time(train_df) train_df = FG.queue_level(train_df) train_df.to_csv('./training_data.csv') state_list = FG.get_states(train_df) train_X, train_Y_Event, train_Y_Time = FG.one_hot_encode_history( train_df, args.checkpoint_dir + args.data_set) if contextual_info: train_context_X = FG.generate_context_feature(train_df, state_list) model = net() if regression: model.train(train_X, train_context_X, train_Y_Time, regression, loss, batch_size=batch_size, num_folds=num_folds,