Пример #1
0
    # spliting train and test set
    # print("flag:  training sets")
    # train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=42)
    # print("done")

    # export test data if original data is used
    # if status == 'o':
    #     with open("%s/%s-%s.pkl" % (testset_dir, feature_name, "X"), 'wb') as f:
    #         pickle.dump(test_X, f)
    #     with open("%s/%s-%s.pkl" % (testset_dir, feature_name, "y"), 'wb') as f:
    #         pickle.dump(test_y, f)
    #     print("flag: test set is exported")

    print("flag: training model")
    if contextual_info:
        train_df = fg.queue_level(train_df)
        activity_list = fg.get_activities(train_df)
        train_context_X = fg.generate_context_feature(train_df, activity_list)
        model = net()
        if task == 'next_timestamp':
            model.train(train_X,
                        train_y,
                        regression,
                        loss,
                        n_epochs=num_epochs,
                        batch_size=batch_size,
                        num_folds=num_folds,
                        model_name=model_name,
                        checkpoint_dir=args.checkpoint_dir,
                        X_train_ctx=train_context_X)
        elif task == 'next_activity':
        regression = True

    batch_size = args.batch_size_train
    num_folds = args.num_folds

    # load data
    FG = FeatureGenerator()
    df = FG.create_initial_log(filename)

    #split train and test
    #train_df, test_df = FG.train_test_split(df, 0.7, 0.3)
    train_df = df
    test_df = train_df
    #create train
    train_df = FG.order_csv_time(train_df)
    train_df = FG.queue_level(train_df)
    train_df.to_csv('./training_data.csv')
    state_list = FG.get_states(train_df)
    train_X, train_Y_Event, train_Y_Time = FG.one_hot_encode_history(
        train_df, args.checkpoint_dir + args.data_set)
    if contextual_info:
        train_context_X = FG.generate_context_feature(train_df, state_list)
        model = net()
        if regression:
            model.train(train_X,
                        train_context_X,
                        train_Y_Time,
                        regression,
                        loss,
                        batch_size=batch_size,
                        num_folds=num_folds,