Пример #1
0
from riiid.validation import merge_test
from riiid.utils import configure_console_logging
from riiid.config import INPUT_PATH, MODELS_PATH, PARAMS

configure_console_logging()

# Load and preprocess data
loader = DataLoader(INPUT_PATH)
train, questions, lectures = loader.load_first_users(30000)
questions = preprocess_questions(questions)
lectures = preprocess_lectures(lectures)

# Load and merge validation set
test = loader.load_tests('tests_0.pkl')
train = merge_test(train, test)

# Compute features
model = RiiidModel(questions, lectures, params=PARAMS)
X, y, train, valid = model.fit_transform(train)
save_pkl((X, y, train, valid),
         path=os.path.join(MODELS_PATH, model.get_name('data.pkl')))

# Fit models
model.fit_lgbm(X[train], y[train], X[valid], y[valid])
model.fit_catboost(X[train], y[train], X[valid], y[valid])
model.fit_neural(X[train], y[train], X[valid], y[valid])
model.fit_blender(X[valid], y[valid])

# Save model
model.save(os.path.join(MODELS_PATH, model.get_name()))
Пример #2
0
    train, questions, lectures = loader.load()
    questions = preprocess_questions(questions)
    lectures = preprocess_lectures(lectures)

    test = loader.load_tests('tests_0.pkl')
    train = merge_test(train, test)
    del test

    PARAMS['question_embedding']['workers'] = 32
    PARAMS['answers_embedding']['workers'] = 32
    model = RiiidModel(questions, lectures, params=PARAMS)
    X, y, train, valid = model.fit_transform(train)

    bucket = S3Bucket(model.get_normalized_name())

    logging.info('Saving data')
    for data, name in [(X, 'X'), (y, 'y'), (train, 'train'), (valid, 'valid')]:
        bucket.save_pickle_multiparts(data, name + '.pkl')

    model.fit_lgbm(X[train], y[train], X[valid], y[valid])
    model.fit_catboost(X[train], y[train], X[valid], y[valid])

    logging.info('Saving model')
    bucket.save_multiparts(model.save_with_source(), model.get_name())

except Exception as e:
    logging.info('Unexpected exception: ' + str(e))

finally:
    terminate(CONTEXT)