def test_bloom(compression_ratio, expected_rmse): interactions = movielens.get_movielens_dataset('100K') train, test = random_train_test_split(interactions, random_state=RANDOM_STATE) user_embeddings = BloomEmbedding(interactions.num_users, 32, compression_ratio=compression_ratio, num_hash_functions=2) item_embeddings = BloomEmbedding(interactions.num_items, 32, compression_ratio=compression_ratio, num_hash_functions=2) network = BilinearNet(interactions.num_users, interactions.num_items, user_embedding_layer=user_embeddings, item_embedding_layer=item_embeddings) model = ExplicitFactorizationModel(loss='regression', n_iter=10, batch_size=1024, learning_rate=1e-2, l2=1e-5, representation=network, use_cuda=CUDA) model.fit(train) print(model) rmse = rmse_score(model, test) print(rmse) assert rmse - EPSILON < expected_rmse
def factorization_model(num_embeddings, bloom): if bloom: user_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM, num_hash_functions=NUM_HASH_FUNCTIONS) item_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM, num_hash_functions=NUM_HASH_FUNCTIONS) else: user_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM) item_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM) network = BilinearNet(num_embeddings, num_embeddings, user_embedding_layer=user_embeddings, item_embedding_layer=item_embeddings) model = ImplicitFactorizationModel(loss='adaptive_hinge', n_iter=N_ITER, embedding_dim=EMBEDDING_DIM, batch_size=2048, learning_rate=1e-2, l2=1e-6, representation=network, use_cuda=CUDA) return model
def build_sequence_model(hyperparameters, train, random_state): h = hyperparameters set_seed(42, CUDA) if h['compression_ratio'] < 1.0: item_embeddings = BloomEmbedding( train.num_items, h['embedding_dim'], compression_ratio=h['compression_ratio'], num_hash_functions=4, padding_idx=0) else: item_embeddings = ScaledEmbedding(train.num_items, h['embedding_dim'], padding_idx=0) network = LSTMNet(train.num_items, h['embedding_dim'], item_embedding_layer=item_embeddings) model = ImplicitSequenceModel(loss=h['loss'], n_iter=h['n_iter'], batch_size=h['batch_size'], learning_rate=h['learning_rate'], embedding_dim=h['embedding_dim'], l2=h['l2'], representation=network, use_cuda=CUDA, random_state=np.random.RandomState(42)) return model
def test_bloom_pooling(compression_ratio, expected_mrr): random_state = np.random.RandomState(RANDOM_SEED) train, test = _get_synthetic_data(randomness=1e-03, num_interactions=20000, random_state=random_state) embedding = BloomEmbedding(train.num_items, 32, compression_ratio=compression_ratio, num_hash_functions=2) representation = PoolNet(train.num_items, embedding_dim=EMBEDDING_DIM, item_embedding_layer=embedding) model = ImplicitSequenceModel(loss=LOSS, representation=representation, batch_size=BATCH_SIZE, learning_rate=1e-2, l2=1e-7, n_iter=NUM_EPOCHS * 5, random_state=random_state, use_cuda=CUDA) model.fit(train, verbose=VERBOSE) mrr = _evaluate(model, test) assert mrr.mean() > expected_mrr
def sequence_model(num_embeddings, bloom): if bloom: item_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM, num_hash_functions=NUM_HASH_FUNCTIONS) else: item_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM) network = LSTMNet(num_embeddings, EMBEDDING_DIM, item_embedding_layer=item_embeddings) model = ImplicitSequenceModel(loss='adaptive_hinge', n_iter=N_ITER, batch_size=512, learning_rate=1e-3, l2=1e-2, representation=network, use_cuda=CUDA) return model