def test_bloom(compression_ratio, expected_rmse):

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    user_embeddings = BloomEmbedding(interactions.num_users,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    item_embeddings = BloomEmbedding(interactions.num_items,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    network = BilinearNet(interactions.num_users,
                          interactions.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ExplicitFactorizationModel(loss='regression',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-5,
                                       representation=network,
                                       use_cuda=CUDA)

    model.fit(train)
    print(model)

    rmse = rmse_score(model, test)
    print(rmse)

    assert rmse - EPSILON < expected_rmse
Пример #2
0
def factorization_model(num_embeddings, bloom):

    if bloom:
        user_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM,
                                         num_hash_functions=NUM_HASH_FUNCTIONS)
        item_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM,
                                         num_hash_functions=NUM_HASH_FUNCTIONS)
    else:
        user_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM)
        item_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM)

    network = BilinearNet(num_embeddings,
                          num_embeddings,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ImplicitFactorizationModel(loss='adaptive_hinge',
                                       n_iter=N_ITER,
                                       embedding_dim=EMBEDDING_DIM,
                                       batch_size=2048,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       representation=network,
                                       use_cuda=CUDA)

    return model
Пример #3
0
def build_sequence_model(hyperparameters, train, random_state):

    h = hyperparameters

    set_seed(42, CUDA)

    if h['compression_ratio'] < 1.0:
        item_embeddings = BloomEmbedding(
            train.num_items,
            h['embedding_dim'],
            compression_ratio=h['compression_ratio'],
            num_hash_functions=4,
            padding_idx=0)
    else:
        item_embeddings = ScaledEmbedding(train.num_items,
                                          h['embedding_dim'],
                                          padding_idx=0)

    network = LSTMNet(train.num_items,
                      h['embedding_dim'],
                      item_embedding_layer=item_embeddings)

    model = ImplicitSequenceModel(loss=h['loss'],
                                  n_iter=h['n_iter'],
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  embedding_dim=h['embedding_dim'],
                                  l2=h['l2'],
                                  representation=network,
                                  use_cuda=CUDA,
                                  random_state=np.random.RandomState(42))

    return model
Пример #4
0
def test_bloom_pooling(compression_ratio, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=1e-03,
                                      num_interactions=20000,
                                      random_state=random_state)

    embedding = BloomEmbedding(train.num_items,
                               32,
                               compression_ratio=compression_ratio,
                               num_hash_functions=2)

    representation = PoolNet(train.num_items,
                             embedding_dim=EMBEDDING_DIM,
                             item_embedding_layer=embedding)

    model = ImplicitSequenceModel(loss=LOSS,
                                  representation=representation,
                                  batch_size=BATCH_SIZE,
                                  learning_rate=1e-2,
                                  l2=1e-7,
                                  n_iter=NUM_EPOCHS * 5,
                                  random_state=random_state,
                                  use_cuda=CUDA)

    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
Пример #5
0
def sequence_model(num_embeddings, bloom):

    if bloom:
        item_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM,
                                         num_hash_functions=NUM_HASH_FUNCTIONS)
    else:
        item_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM)

    network = LSTMNet(num_embeddings, EMBEDDING_DIM,
                      item_embedding_layer=item_embeddings)

    model = ImplicitSequenceModel(loss='adaptive_hinge',
                                  n_iter=N_ITER,
                                  batch_size=512,
                                  learning_rate=1e-3,
                                  l2=1e-2,
                                  representation=network,
                                  use_cuda=CUDA)

    return model