from riiid.validation import merge_test from riiid.utils import configure_console_logging from riiid.config import INPUT_PATH, MODELS_PATH, PARAMS configure_console_logging() # Load and preprocess data loader = DataLoader(INPUT_PATH) train, questions, lectures = loader.load_first_users(30000) questions = preprocess_questions(questions) lectures = preprocess_lectures(lectures) # Load and merge validation set test = loader.load_tests('tests_0.pkl') train = merge_test(train, test) # Compute features model = RiiidModel(questions, lectures, params=PARAMS) X, y, train, valid = model.fit_transform(train) save_pkl((X, y, train, valid), path=os.path.join(MODELS_PATH, model.get_name('data.pkl'))) # Fit models model.fit_lgbm(X[train], y[train], X[valid], y[valid]) model.fit_catboost(X[train], y[train], X[valid], y[valid]) model.fit_neural(X[train], y[train], X[valid], y[valid]) model.fit_blender(X[valid], y[valid]) # Save model model.save(os.path.join(MODELS_PATH, model.get_name()))
train, questions, lectures = loader.load() questions = preprocess_questions(questions) lectures = preprocess_lectures(lectures) test = loader.load_tests('tests_0.pkl') train = merge_test(train, test) del test PARAMS['question_embedding']['workers'] = 32 PARAMS['answers_embedding']['workers'] = 32 model = RiiidModel(questions, lectures, params=PARAMS) X, y, train, valid = model.fit_transform(train) bucket = S3Bucket(model.get_normalized_name()) logging.info('Saving data') for data, name in [(X, 'X'), (y, 'y'), (train, 'train'), (valid, 'valid')]: bucket.save_pickle_multiparts(data, name + '.pkl') model.fit_lgbm(X[train], y[train], X[valid], y[valid]) model.fit_catboost(X[train], y[train], X[valid], y[valid]) logging.info('Saving model') bucket.save_multiparts(model.save_with_source(), model.get_name()) except Exception as e: logging.info('Unexpected exception: ' + str(e)) finally: terminate(CONTEXT)