def get_data(n=1000):
    loader = DataLoader(INPUT_PATH)
    train, questions, lectures = loader.load_first_users(n)
    questions = preprocess_questions(questions)
    lectures = preprocess_lectures(lectures)

    test = loader.load_tests('tests_1.pkl')
    return train, questions, lectures, test
示例#2
0
def score_params(params, n_users=30000):
    loader = DataLoader(CONTEXT.data_path())
    train, questions, lectures = loader.load_first_users(n_users)
    questions = preprocess_questions(questions)
    lectures = preprocess_lectures(lectures)

    test = loader.load_tests('tests_0.pkl')
    train = merge_test(train, test)
    del test

    model = RiiidModel(questions, lectures, params)
    X, y, train, valid = model.fit_transform(train)
    model.fit_lgbm(X[train], y[train], X[valid], y[valid])

    return model.best_score, model.best_iteration
示例#3
0
import time
import logging

from riiid.core.data import DataLoader
from riiid.saint.model import SaintModel
from riiid.utils import configure_console_logging
from riiid.config import INPUT_PATH, MODELS_PATH


configure_console_logging()

logging.info('Loading model')
MODEL_ID = 'saint_20210104_024103'
model: SaintModel = SaintModel.load(MODELS_PATH, MODEL_ID)

tests = DataLoader(INPUT_PATH).load_tests_examples()

for i, test in enumerate(tests):
    if model.test_batch == 1:
        start = time.perf_counter()

    test = model.update(test)
    _, predictions = model.predict(test)

end = time.perf_counter()
total = end - start
logging.info('Time spent: {:.1f}s ({:.3f}s by batch)'.format(total, total / model.test_batch))
示例#4
0
import os
from riiid.config import INPUT_PATH
from riiid.core.data import DataLoader, save_pkl, load_pkl
from riiid.validation import generate_test


loader = DataLoader(INPUT_PATH)
train, _, _ = loader.load()

test = generate_test(train, size=2_500_000, N=10_000, seed=0)
save_pkl(test, os.path.join(INPUT_PATH, 'tests_0.pkl'))

test = generate_test(train, size=5_000_000, N=20_000, seed=0)
save_pkl(test, os.path.join(INPUT_PATH, 'tests_1.pkl'))
import os
from riiid.core.data import DataLoader, save_pkl
from riiid.saint.model import SaintModel
from riiid.utils import configure_console_logging
from riiid.config import INPUT_PATH, MODELS_PATH


configure_console_logging()

# Load data
loader = DataLoader(INPUT_PATH)
train, questions, lectures = loader.load_first_users(30000)

# Compute features
model = SaintModel(questions, lectures)
train = model.fit_transform(train)

# Create train and validation datasets
train, test = model.split_train_test(train)
train = model.create_features(train)
test = model.create_features(test)
X_train, y_train = model.create_dataset(train)
X_test, y_test = model.create_dataset(test)
save_pkl((X_train, y_train, X_test, y_test), os.path.join(MODELS_PATH, model.get_name('data.pkl')))

# Fit model
model.fit(X_train, y_train, X_test, y_test)
model.score(X_test, y_test)

# Save model
model.save(MODELS_PATH)
示例#6
0
import os
from riiid.core.data import DataLoader, preprocess_questions, preprocess_lectures, save_pkl
from riiid.core.model import RiiidModel
from riiid.validation import merge_test
from riiid.utils import configure_console_logging
from riiid.config import INPUT_PATH, MODELS_PATH, PARAMS

configure_console_logging()

# Load and preprocess data
loader = DataLoader(INPUT_PATH)
train, questions, lectures = loader.load_first_users(30000)
questions = preprocess_questions(questions)
lectures = preprocess_lectures(lectures)

# Load and merge validation set
test = loader.load_tests('tests_0.pkl')
train = merge_test(train, test)

# Compute features
model = RiiidModel(questions, lectures, params=PARAMS)
X, y, train, valid = model.fit_transform(train)
save_pkl((X, y, train, valid),
         path=os.path.join(MODELS_PATH, model.get_name('data.pkl')))

# Fit models
model.fit_lgbm(X[train], y[train], X[valid], y[valid])
model.fit_catboost(X[train], y[train], X[valid], y[valid])
model.fit_neural(X[train], y[train], X[valid], y[valid])
model.fit_blender(X[valid], y[valid])
示例#7
0
from doppel.aws.s3 import S3Bucket

from riiid.core.data import DataLoader, preprocess_questions, preprocess_lectures
from riiid.core.model import RiiidModel
from riiid.validation import merge_test
from riiid.config import PARAMS
from riiid import cache
from riiid.aws.cache import S3CacheManager
from riiid.aws.config import CONTEXT

CONTEXT.get_logger()

try:
    cache.CACHE_MANAGER = S3CacheManager('kaggle-riiid-cache')

    loader = DataLoader(CONTEXT.data_path())
    train, questions, lectures = loader.load()
    questions = preprocess_questions(questions)
    lectures = preprocess_lectures(lectures)

    test = loader.load_tests('tests_0.pkl')
    train = merge_test(train, test)
    del test

    PARAMS['question_embedding']['workers'] = 32
    PARAMS['answers_embedding']['workers'] = 32
    model = RiiidModel(questions, lectures, params=PARAMS)
    X, y, train, valid = model.fit_transform(train)

    bucket = S3Bucket(model.get_normalized_name())
import logging

from doppel import terminate
from doppel.aws.s3 import S3Bucket

from riiid.core.data import DataLoader
from riiid.saint.model import SaintModel
from riiid.aws.config import CONTEXT

CONTEXT.get_logger()

try:
    loader = DataLoader(CONTEXT.data_path())
    train, questions, lectures = loader.load()

    model = SaintModel(questions, lectures)
    train = model.fit_transform(train)

    train, test = model.split_train_test(train)
    train = model.create_features(train)
    test = model.create_features(test)
    X_train, y_train = model.create_dataset(train)
    X_test, y_test = model.create_dataset(test)

    bucket = S3Bucket(model.get_normalized_name())
    logging.info('Saving model')
    bucket.save_pickle(model, model.get_name(ext='pkl'))

    logging.info('Saving data')
    bucket.save_pickle_multiparts((X_train, y_train, X_test, y_test),
                                  model.get_name('data.pkl'))