示例#1
0
def evaluate_cnn_model(hyperparameters, train, test, validation, random_state):

    h = hyperparameters

    net = CNNNet(train.num_items,
                 embedding_dim=h['embedding_dim'],
                 kernel_width=h['kernel_width'],
                 dilation=h['dilation'],
                 num_layers=h['num_layers'],
                 nonlinearity=h['nonlinearity'],
                 residual_connections=h['residual'])

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation=net,
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
示例#2
0
def test_bloom_pooling(compression_ratio, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=1e-03,
                                      num_interactions=20000,
                                      random_state=random_state)

    embedding = BloomEmbedding(train.num_items,
                               32,
                               compression_ratio=compression_ratio,
                               num_hash_functions=2)

    representation = PoolNet(train.num_items,
                             embedding_dim=EMBEDDING_DIM,
                             item_embedding_layer=embedding)

    model = ImplicitSequenceModel(loss=LOSS,
                                  representation=representation,
                                  batch_size=BATCH_SIZE,
                                  learning_rate=1e-2,
                                  l2=1e-7,
                                  n_iter=NUM_EPOCHS * 5,
                                  random_state=random_state,
                                  use_cuda=CUDA)

    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
示例#3
0
def test_implicit_cnn_dilation_synthetic(num_layers, dilation, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=1e-03,
                                      num_interactions=20000,
                                      random_state=random_state)

    model = ImplicitSequenceModel(loss=LOSS,
                                  representation=CNNNet(
                                      train.num_items,
                                      embedding_dim=EMBEDDING_DIM,
                                      kernel_width=3,
                                      dilation=dilation,
                                      num_layers=num_layers),
                                  batch_size=BATCH_SIZE,
                                  learning_rate=1e-2,
                                  l2=0.0,
                                  n_iter=NUM_EPOCHS * 5 * num_layers,
                                  random_state=random_state,
                                  use_cuda=CUDA)

    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
示例#4
0
def data_implicit_sequence():

    max_sequence_length = 200
    min_sequence_length = 20
    step_size = 200

    interactions = movielens.get_movielens_dataset('100K')

    train, test = user_based_train_test_split(interactions,
                                              random_state=RANDOM_STATE)

    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              min_sequence_length=min_sequence_length,
                              step_size=step_size)

    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            min_sequence_length=min_sequence_length,
                            step_size=step_size)

    model = ImplicitSequenceModel(loss='adaptive_hinge',
                                  representation='lstm',
                                  batch_size=8,
                                  learning_rate=1e-2,
                                  l2=1e-3,
                                  n_iter=2,
                                  use_cuda=CUDA,
                                  random_state=RANDOM_STATE)

    model.fit(train, verbose=True)

    return train, test, model
示例#5
0
def build_sequence_model(hyperparameters, train, random_state):

    h = hyperparameters

    set_seed(42, CUDA)

    if h['compression_ratio'] < 1.0:
        item_embeddings = BloomEmbedding(
            train.num_items,
            h['embedding_dim'],
            compression_ratio=h['compression_ratio'],
            num_hash_functions=4,
            padding_idx=0)
    else:
        item_embeddings = ScaledEmbedding(train.num_items,
                                          h['embedding_dim'],
                                          padding_idx=0)

    network = LSTMNet(train.num_items,
                      h['embedding_dim'],
                      item_embedding_layer=item_embeddings)

    model = ImplicitSequenceModel(loss=h['loss'],
                                  n_iter=h['n_iter'],
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  embedding_dim=h['embedding_dim'],
                                  l2=h['l2'],
                                  representation=network,
                                  use_cuda=CUDA,
                                  random_state=np.random.RandomState(42))

    return model
示例#6
0
def test_implicit_pooling_synthetic(randomness, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=randomness,
                                      random_state=random_state)

    model = ImplicitSequenceModel(loss=LOSS,
                                  batch_size=BATCH_SIZE,
                                  embedding_dim=EMBEDDING_DIM,
                                  learning_rate=1e-1,
                                  l2=1e-9,
                                  n_iter=NUM_EPOCHS,
                                  random_state=random_state,
                                  use_cuda=CUDA)
    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
示例#7
0
def evaluate_pooling_model(hyperparameters, train, test, validation,
                           random_state):

    h = hyperparameters

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation='pooling',
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
示例#8
0
def test_implicit_sequence_serialization(data):

    train, test = data
    train = train.to_sequence(max_sequence_length=128)
    test = test.to_sequence(max_sequence_length=128)

    model = ImplicitSequenceModel(loss='bpr',
                                  representation=CNNNet(train.num_items,
                                                        embedding_dim=32,
                                                        kernel_width=3,
                                                        dilation=(1, ),
                                                        num_layers=1),
                                  batch_size=128,
                                  learning_rate=1e-1,
                                  l2=0.0,
                                  n_iter=5,
                                  random_state=RANDOM_STATE,
                                  use_cuda=CUDA)
    model.fit(train)

    mrr_original = sequence_mrr_score(model, test).mean()
    mrr_recovered = sequence_mrr_score(_reload(model), test).mean()

    assert mrr_original == mrr_recovered
示例#9
0
def sequence_model(num_embeddings, bloom):

    if bloom:
        item_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM,
                                         num_hash_functions=NUM_HASH_FUNCTIONS)
    else:
        item_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM)

    network = LSTMNet(num_embeddings, EMBEDDING_DIM,
                      item_embedding_layer=item_embeddings)

    model = ImplicitSequenceModel(loss='adaptive_hinge',
                                  n_iter=N_ITER,
                                  batch_size=512,
                                  learning_rate=1e-3,
                                  l2=1e-2,
                                  representation=network,
                                  use_cuda=CUDA)

    return model