Exemplo n.º 1
0
def main(args):
    mod_name = args.get('-m')
    input = args.get('--splits')
    output = args.get('-o')
    n_recs = int(args.get('-n'))
    model = args.get('ALGO')

    _log.info(f'importing from module {mod_name}')
    algorithms = importlib.import_module(mod_name)

    algo = getattr(algorithms, model)
    algo = Recommender.adapt(algo)

    path = Path(input)
    dest = Path(output)
    dest.mkdir(exist_ok=True, parents=True)

    ds_def = getattr(datasets, path.name, None)

    for file in path.glob("test-*"):
        test = pd.read_csv(file, sep=',')
        suffix = file.name[5:]
        train_file = path / f'train-{suffix}'
        timer = util.Stopwatch()

        if 'index' in test.columns:
            _log.info('setting test index')
            test = test.set_index('index')
        else:
            _log.warn('no index column found in %s', file.name)

        if train_file.exists():
            _log.info('[%s] loading training data from %s', timer, train_file)
            train = pd.read_csv(path / f'train-{suffix}', sep=',')
        elif ds_def is not None:
            _log.info('[%s] extracting training data from data set %s', timer,
                      path.name)
            train = datasets.ds_diff(ds_def.ratings, test)
            train.reset_index(drop=True, inplace=True)
        else:
            _log.error('could not find training data for %s', file.name)
            continue

        _log.info('[%s] Fitting the model', timer)
        # We train isolated to manage resource use
        model = batch.train_isolated(algo, train)
        try:
            _log.info('[%s] generating recommendations for unique users',
                      timer)
            users = test.user.unique()
            recs = batch.recommend(model, users, n_recs)
            _log.info('[%s] writing recommendations to %s', timer, dest)
            recs.to_csv(dest / f'recs-{suffix}', index=False)

            if isinstance(algo, Predictor) and not args['--no-predict']:
                _log.info('[%s] generating predictions for user-item', timer)
                preds = batch.predict(model, test)
                preds.to_csv(dest / f'pred-{suffix}', index=False)
        finally:
            model.close()
Exemplo n.º 2
0
 def evaluate(self, algo, train, test, **kwargs):
     _log.info('running training')
     if self.isolate:
         algo = batch.train_isolated(algo, train)
     else:
         algo.fit(train)
     _log.info('testing %d users', test.user.nunique())
     recs = batch.recommend(algo, test.user.unique(), 100, **kwargs)
     return recs
Exemplo n.º 3
0
def test_batch_predict_preshared():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf

    algo = basic.Bias()
    splits = xf.sample_users(lktu.ml_test.ratings, 1, 100, xf.SampleN(5))
    train, test = next(splits)

    ares = lkb.train_isolated(algo, train)
    preds = lkb.predict(ares, test)
    assert len(preds) == len(test)
    assert not any(preds['prediction'].isna())
Exemplo n.º 4
0
def test_train_isolate():
    algo = Bias()
    algo = Recommender.adapt(algo)

    saved = train_isolated(algo, ml_test.ratings)
    try:
        trained = saved.get()
        assert isinstance(trained, TopN)
        recs = trained.recommend(10, 10)
        assert len(recs) == 10
        del recs, trained
    finally:
        saved.close()
Exemplo n.º 5
0
def test_train_isolate_file(tmp_path):
    fn = tmp_path / 'saved.bpk'
    algo = Bias()
    algo = Recommender.adapt(algo)

    saved = train_isolated(algo, ml_test.ratings, file=fn)
    try:
        assert saved.path == fn
        trained = saved.get()
        assert isinstance(trained, TopN)
        recs = trained.recommend(10, 10)
        assert len(recs) == 10
        del recs, trained
    finally:
        saved.close()
Exemplo n.º 6
0
def test_als_isolate(ml20m, rng):
    users = rng.choice(ml20m['user'].unique(), 5000, replace=False)
    algo = BiasedMF(20, iterations=10)
    algo = Recommender.adapt(algo)
    _log.info('training %s', algo)
    ares = batch.train_isolated(algo, ml20m)
    try:
        _log.info('recommending with %s', algo)
        recs = batch.recommend(ares, users, 10)
        assert recs['user'].nunique() == 5000
        _log.info('predicting with %s', algo)
        pairs = ml20m.sample(1000)
        preds = batch.predict(ares, pairs)
        assert len(preds) == len(pairs)
    finally:
        ares.close()