def evaluate_cnn_model(hyperparameters, train, test, validation, random_state): h = hyperparameters net = CNNNet(train.num_items, embedding_dim=h['embedding_dim'], kernel_width=h['kernel_width'], dilation=h['dilation'], num_layers=h['num_layers'], nonlinearity=h['nonlinearity'], residual_connections=h['residual']) model = ImplicitSequenceModel(loss=h['loss'], representation=net, batch_size=h['batch_size'], learning_rate=h['learning_rate'], l2=h['l2'], n_iter=h['n_iter'], use_cuda=CUDA, random_state=random_state) model.fit(train, verbose=True) test_mrr = sequence_mrr_score(model, test) val_mrr = sequence_mrr_score(model, validation) return test_mrr, val_mrr
def test_bloom_pooling(compression_ratio, expected_mrr): random_state = np.random.RandomState(RANDOM_SEED) train, test = _get_synthetic_data(randomness=1e-03, num_interactions=20000, random_state=random_state) embedding = BloomEmbedding(train.num_items, 32, compression_ratio=compression_ratio, num_hash_functions=2) representation = PoolNet(train.num_items, embedding_dim=EMBEDDING_DIM, item_embedding_layer=embedding) model = ImplicitSequenceModel(loss=LOSS, representation=representation, batch_size=BATCH_SIZE, learning_rate=1e-2, l2=1e-7, n_iter=NUM_EPOCHS * 5, random_state=random_state, use_cuda=CUDA) model.fit(train, verbose=VERBOSE) mrr = _evaluate(model, test) assert mrr.mean() > expected_mrr
def test_implicit_cnn_dilation_synthetic(num_layers, dilation, expected_mrr): random_state = np.random.RandomState(RANDOM_SEED) train, test = _get_synthetic_data(randomness=1e-03, num_interactions=20000, random_state=random_state) model = ImplicitSequenceModel(loss=LOSS, representation=CNNNet( train.num_items, embedding_dim=EMBEDDING_DIM, kernel_width=3, dilation=dilation, num_layers=num_layers), batch_size=BATCH_SIZE, learning_rate=1e-2, l2=0.0, n_iter=NUM_EPOCHS * 5 * num_layers, random_state=random_state, use_cuda=CUDA) model.fit(train, verbose=VERBOSE) mrr = _evaluate(model, test) assert mrr.mean() > expected_mrr
def data_implicit_sequence(): max_sequence_length = 200 min_sequence_length = 20 step_size = 200 interactions = movielens.get_movielens_dataset('100K') train, test = user_based_train_test_split(interactions, random_state=RANDOM_STATE) train = train.to_sequence(max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) test = test.to_sequence(max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) model = ImplicitSequenceModel(loss='adaptive_hinge', representation='lstm', batch_size=8, learning_rate=1e-2, l2=1e-3, n_iter=2, use_cuda=CUDA, random_state=RANDOM_STATE) model.fit(train, verbose=True) return train, test, model
def test_implicit_pooling_synthetic(randomness, expected_mrr): random_state = np.random.RandomState(RANDOM_SEED) train, test = _get_synthetic_data(randomness=randomness, random_state=random_state) model = ImplicitSequenceModel(loss=LOSS, batch_size=BATCH_SIZE, embedding_dim=EMBEDDING_DIM, learning_rate=1e-1, l2=1e-9, n_iter=NUM_EPOCHS, random_state=random_state, use_cuda=CUDA) model.fit(train, verbose=VERBOSE) mrr = _evaluate(model, test) assert mrr.mean() > expected_mrr
def evaluate_pooling_model(hyperparameters, train, test, validation, random_state): h = hyperparameters model = ImplicitSequenceModel(loss=h['loss'], representation='pooling', batch_size=h['batch_size'], learning_rate=h['learning_rate'], l2=h['l2'], n_iter=h['n_iter'], use_cuda=CUDA, random_state=random_state) model.fit(train, verbose=True) test_mrr = sequence_mrr_score(model, test) val_mrr = sequence_mrr_score(model, validation) return test_mrr, val_mrr
def test_implicit_sequence_serialization(data): train, test = data train = train.to_sequence(max_sequence_length=128) test = test.to_sequence(max_sequence_length=128) model = ImplicitSequenceModel(loss='bpr', representation=CNNNet(train.num_items, embedding_dim=32, kernel_width=3, dilation=(1, ), num_layers=1), batch_size=128, learning_rate=1e-1, l2=0.0, n_iter=5, random_state=RANDOM_STATE, use_cuda=CUDA) model.fit(train) mrr_original = sequence_mrr_score(model, test).mean() mrr_recovered = sequence_mrr_score(_reload(model), test).mean() assert mrr_original == mrr_recovered