Пример #1
0
def do_prepare(opts):
    name = opts['-d']
    ml = MovieLens(f'data/{name}')

    train, test = next(sample_users(ml.ratings, 1, 10000, SampleN(5)))

    test.to_parquet(f'data/{name}-test.parquet', index=False)

    _log.info('getting popular recs')
    pop = Popular()
    pop.fit(train)
    pop_recs = recommend(pop, test['user'].unique(), 100)

    _log.info('getting ALS recs')
    als = ImplicitMF(20, iterations=10)
    als = Recommender.adapt(als)
    als.fit(train.drop(columns=['rating']))
    als_recs = recommend(als, test['user'].unique(), 100)

    _log.info('merging recs')
    recs = pd.concat({
        'Popular': pop_recs,
        'ALS': als_recs
    },
                     names=['Algorithm'])
    recs.reset_index('Algorithm', inplace=True)
    recs.to_parquet(f'data/{name}-recs.parquet', index=False)
Пример #2
0
def test_pop_recommend(ml20m, rng, n_jobs):
    users = rng.choice(ml20m['user'].unique(), 10000, replace=False)
    algo = Popular()
    _log.info('training %s', algo)
    algo.fit(ml20m)
    _log.info('recommending with %s', algo)
    recs = batch.recommend(algo, users, 10, n_jobs=n_jobs)

    assert recs['user'].nunique() == 10000
Пример #3
0
def test_store_save(store_cls):
    algo = Popular()
    algo.fit(lktu.ml_test.ratings)

    with store_cls() as store:
        k = store.put_model(algo)
        a2 = store.get_model(k)
        assert a2 is not algo
        assert a2.item_pop_ is not algo.item_pop_
        assert all(a2.item_pop_ == algo.item_pop_)
        del a2
Пример #4
0
def test_store_client_pickle(store_cls):
    algo = Popular()
    algo.fit(lktu.ml_test.ratings)

    with store_cls() as store:
        k = store.put_model(algo)
        client = store.client()
        client = pickle.loads(pickle.dumps(client))
        k = pickle.loads(pickle.dumps(k))

        a2 = client.get_model(k)
        assert a2 is not algo
        assert a2.item_pop_ is not algo.item_pop_
        assert all(a2.item_pop_ == algo.item_pop_)
        del a2
Пример #5
0
    def run(self, strategy_context: RecommenderAlgorithmStrategyContext
            ) -> np.ndarray:
        data_set_source = strategy_context.data_set_source
        data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create(
            data_set_source)
        data_set: DataFrame = data_frame_reader.parse(
            DataFrameReaderStrategyContext(data_set_source))

        partition = list(
            partition_users(data=data_set,
                            partitions=1,
                            method=crossfold.SampleFrac(0.2)))[0]
        test, train = partition.test, partition.train
        number_of_recommendations = strategy_context.number_of_recommendations
        algorithm = Popular()
        trained_algorithm = algorithm.fit(train)
        recommendations = lenskit.batch.recommend(trained_algorithm,
                                                  test['user'].unique(),
                                                  number_of_recommendations)
        return recommendations.groupby('user')['item'].apply(
            lambda x: x).to_numpy().reshape((-1, number_of_recommendations))