Пример #1
0
def evaluate_cnn_model(hyperparameters, train, test, validation, random_state):
    h = hyperparameters

    net = CNNNet(train.num_items,
                 kernel_width=h['kernel_width'],
                 dilation=h['dilation'],
                 num_layers=h['num_layers'],
                 nonlinearity=h['nonlinearity'],
                 residual_connections=h['residual'])

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation=net,
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_eval = {}
    test_eval['mrr'] = sequence_mrr_score(model, test).mean()

    val_eval = {}
    val_eval['mrr'] = sequence_mrr_score(model, validation).mean()

    return test_eval, val_eval
Пример #2
0
def evaluate_cnn_model(hyperparameters, train, test, validation, random_state):

    h = hyperparameters

    net = CNNNet(train.num_items,
                 embedding_dim=h['embedding_dim'],
                 kernel_width=h['kernel_width'],
                 dilation=h['dilation'],
                 num_layers=h['num_layers'],
                 nonlinearity=h['nonlinearity'],
                 residual_connections=h['residual'])

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation=net,
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
Пример #3
0
def test_bloom_pooling(compression_ratio, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=1e-03,
                                      num_interactions=20000,
                                      random_state=random_state)

    embedding = BloomEmbedding(train.num_items,
                               32,
                               compression_ratio=compression_ratio,
                               num_hash_functions=2)

    representation = PoolNet(train.num_items,
                             embedding_dim=EMBEDDING_DIM,
                             item_embedding_layer=embedding)

    model = ImplicitSequenceModel(loss=LOSS,
                                  representation=representation,
                                  batch_size=BATCH_SIZE,
                                  learning_rate=1e-2,
                                  l2=1e-7,
                                  n_iter=NUM_EPOCHS * 5,
                                  random_state=random_state,
                                  use_cuda=CUDA)

    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
Пример #4
0
def test_implicit_cnn_dilation_synthetic(num_layers, dilation, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=1e-03,
                                      num_interactions=20000,
                                      random_state=random_state)

    model = ImplicitSequenceModel(loss=LOSS,
                                  representation=CNNNet(
                                      train.num_items,
                                      embedding_dim=EMBEDDING_DIM,
                                      kernel_width=1,
                                      dilation=dilation,
                                      num_layers=num_layers),
                                  batch_size=BATCH_SIZE,
                                  learning_rate=1e-2,
                                  l2=0.0,
                                  n_iter=NUM_EPOCHS * num_layers,
                                  random_state=random_state)

    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
Пример #5
0
def data_implicit_sequence():

    max_sequence_length = 200
    min_sequence_length = 20
    step_size = 200

    interactions = movielens.get_movielens_dataset('100K')

    train, test = user_based_train_test_split(interactions,
                                              random_state=RANDOM_STATE)

    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              min_sequence_length=min_sequence_length,
                              step_size=step_size)

    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            min_sequence_length=min_sequence_length,
                            step_size=step_size)

    model = ImplicitSequenceModel(loss='adaptive_hinge',
                                  representation='lstm',
                                  batch_size=8,
                                  learning_rate=1e-2,
                                  l2=1e-3,
                                  n_iter=2,
                                  use_cuda=CUDA,
                                  random_state=RANDOM_STATE)

    model.fit(train, verbose=True)

    return train, test, model
Пример #6
0
    def objective(hyper):

        print(hyper)

        start = time.clock()

        if hyper['model']['type'] == 'lsh':
            num_hashes = int(hyper['model']['num_hash_functions'])
            num_layers = int(hyper['model']['num_layers'])
            nonlinearity = hyper['model']['nonlinearity']
            residual = hyper['model']['residual']
            embed = hyper['model']['embed']

            item_embeddings = LSHEmbedding(train.num_items,
                                           int(hyper['embedding_dim']),
                                           embed=embed,
                                           residual_connections=residual,
                                           nonlinearity=nonlinearity,
                                           num_layers=num_layers,
                                           num_hash_functions=num_hashes)
            item_embeddings.fit(train_nonsequence.tocsr().T)
        else:
            item_embeddings = ScaledEmbedding(train.num_items,
                                              int(hyper['embedding_dim']),
                                              padding_idx=0)

        network = LSTMNet(train.num_items,
                          int(hyper['embedding_dim']),
                          item_embedding_layer=item_embeddings)

        model = ImplicitSequenceModel(loss=hyper['loss'],
                                      n_iter=int(hyper['n_iter']),
                                      batch_size=int(hyper['batch_size']),
                                      learning_rate=hyper['learning_rate'],
                                      embedding_dim=int(
                                          hyper['embedding_dim']),
                                      l2=hyper['l2'],
                                      representation=network,
                                      use_cuda=CUDA,
                                      random_state=random_state)

        model.fit(train, verbose=True)

        elapsed = time.clock() - start

        print(model)

        validation_mrr = sequence_mrr_score(model, validation).mean()
        test_mrr = sequence_mrr_score(model, test).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        return {
            'loss': -validation_mrr,
            'status': STATUS_OK,
            'validation_mrr': validation_mrr,
            'test_mrr': test_mrr,
            'elapsed': elapsed,
            'hyper': hyper
        }
Пример #7
0
def evaluate_cnn_model(hyperparameters, train, test, validation, random_state):

    h = hyperparameters

    net = CNNNet(train.num_items,
                 embedding_dim=h['embedding_dim'],
                 kernel_width=h['kernel_width'],
                 dilation=h['dilation'],
                 num_layers=h['num_layers'],
                 nonlinearity=h['nonlinearity'],
                 residual_connections=h['residual'])

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation=net,
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
    def objective(space):
        """Objective function for Spotlight ImplicitFactorizationModel"""

        batch_size = int(space['batch_size'])
        embedding_dim = int(space['embedding_dim'])
        l2 = space['l2']
        learn_rate = space['learn_rate']
        loss = space['loss']
        n_iter = int(space['n_iter'])
        representation = space['representation']

        model = ImplicitSequenceModel(
            loss=loss,
            embedding_dim=embedding_dim,
            batch_size=batch_size,
            representation=representation,
            learning_rate=learn_rate,
            n_iter=n_iter,
            l2=l2,
            use_cuda=CUDA)

        start = time.clock()

        try:
            model.fit(train, verbose=True)
        except ValueError:
            elapsed = time.clock() - start
            return {'loss': 0.0,
                    'status': STATUS_FAIL,
                    'validation_mrr': 0.0,
                    'test_mrr': 0.0,
                    'elapsed': elapsed,
                    'hyper': space}
        elapsed = time.clock() - start
        print(model)

        validation_mrr = sequence_mrr_score(model, valid).mean()
        test_mrr = sequence_mrr_score(model, test).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        if np.isnan(validation_mrr):
            status = STATUS_FAIL
        else:
            status = STATUS_OK

        return {'loss': -validation_mrr,
                'status': status,
                'validation_mrr': validation_mrr,
                'test_mrr': test_mrr,
                'elapsed': elapsed,
                'hyper': space}
Пример #9
0
def build_sequence_model(hyperparameters, train, random_state):

    h = hyperparameters

    set_seed(42, CUDA)

    if h['compression_ratio'] < 1.0:
        item_embeddings = BloomEmbedding(
            train.num_items,
            h['embedding_dim'],
            compression_ratio=h['compression_ratio'],
            num_hash_functions=4,
            padding_idx=0)
    else:
        item_embeddings = ScaledEmbedding(train.num_items,
                                          h['embedding_dim'],
                                          padding_idx=0)

    network = LSTMNet(train.num_items,
                      h['embedding_dim'],
                      item_embedding_layer=item_embeddings)

    model = ImplicitSequenceModel(loss=h['loss'],
                                  n_iter=h['n_iter'],
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  embedding_dim=h['embedding_dim'],
                                  l2=h['l2'],
                                  representation=network,
                                  use_cuda=CUDA,
                                  random_state=np.random.RandomState(42))

    return model
    def obtener_modelo_gui(self, lista_param):
        """
        Método obtener_modelo_gui. Obtiene el modelo escogido según los parámetros pasados.

        Este método solo se utiliza en la interfaz web.

        Parameters
        ----------

        lista_param: list
            lista que contiene los parámetros escogidos por el usuario para crear el modelo.
        """

        global modelo

        # Se guardan los parámetros en variables para que sea más legible
        loss = lista_param[0]
        embedding_dim = lista_param[1]
        n_iter = lista_param[2]
        batch_size = lista_param[3]
        l2 = lista_param[4]
        learning_rate = lista_param[5]
        representation = lista_param[6]

        # Se instancia el modelo según los parámetros anteriores
        if self.opcion_modelo == 1:
            modelo = ExplicitFactorizationModel(loss=loss, embedding_dim=embedding_dim, n_iter=n_iter, batch_size=batch_size, 
                l2=l2, learning_rate=learning_rate, use_cuda=torch.cuda.is_available())
        elif self.opcion_modelo == 2:
            modelo = ImplicitFactorizationModel(loss=loss, embedding_dim=embedding_dim, n_iter=n_iter, batch_size=batch_size, 
                l2=l2, learning_rate=learning_rate, use_cuda=torch.cuda.is_available())
        else:
            modelo = ImplicitSequenceModel(loss=loss, representation=representation, embedding_dim=embedding_dim, n_iter=n_iter, batch_size=batch_size, 
                l2=l2, learning_rate=learning_rate, use_cuda=torch.cuda.is_available())
Пример #11
0
def test_implicit_pooling_synthetic(randomness, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=randomness,
                                      random_state=random_state)

    model = ImplicitSequenceModel(loss=LOSS,
                                  batch_size=BATCH_SIZE,
                                  embedding_dim=EMBEDDING_DIM,
                                  learning_rate=1e-1,
                                  l2=1e-9,
                                  n_iter=NUM_EPOCHS,
                                  random_state=random_state)
    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
def test_implicit_pooling_synthetic(randomness, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=randomness,
                                      random_state=random_state)

    model = ImplicitSequenceModel(loss=LOSS,
                                  batch_size=BATCH_SIZE,
                                  embedding_dim=EMBEDDING_DIM,
                                  learning_rate=1e-1,
                                  l2=1e-9,
                                  n_iter=NUM_EPOCHS,
                                  random_state=random_state)
    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
Пример #13
0
def evaluate_pooling_model(hyperparameters, train, test, validation, random_state):

    h = hyperparameters

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation='pooling',
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
Пример #14
0
def evaluate_lstm_model(hyperparameters, train, test, validation,
                        random_state):
    h = hyperparameters

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation='lstm',
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_eval = {}
    test_eval['mrr'] = sequence_mrr_score(model, test).mean()

    val_eval = {}
    val_eval['mrr'] = sequence_mrr_score(model, validation).mean()

    return test_eval, val_eval
Пример #15
0
def evaluate_pooling_model(hyperparameters, train, test, validation,
                           random_state):

    h = hyperparameters

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation='pooling',
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
Пример #16
0
def test_implicit_lstm_mixture_synthetic(randomness, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=randomness,
                                      random_state=random_state)

    model = ImplicitSequenceModel(loss=LOSS,
                                  representation='mixture',
                                  batch_size=BATCH_SIZE,
                                  embedding_dim=EMBEDDING_DIM,
                                  learning_rate=1e-2,
                                  l2=1e-7,
                                  n_iter=NUM_EPOCHS * 10,
                                  random_state=random_state,
                                  use_cuda=CUDA)

    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
def train_model(df, hyperparams):
    # Fix random_state
    seed = 42
    set_seed(seed)
    random_state = np.random.RandomState(seed)

    max_sequence_length = 15
    min_sequence_length = 2
    step_size = 1

    # create dataset using interactions dataframe and timestamps
    dataset = Interactions(user_ids=np.array(df['user_id'], dtype='int32'),
                           item_ids=np.array(df['item_id'], dtype='int32'),
                           timestamps=df['entry_at'])

    # create training and test sets using a 80/20 split
    train, test = user_based_train_test_split(dataset,
                                              test_percentage=0.2,
                                              random_state=random_state)
    # convert to sequences
    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              min_sequence_length=min_sequence_length,
                              step_size=step_size)
    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            min_sequence_length=min_sequence_length,
                            step_size=step_size)

    print('data: {}'.format(train))

    # initialize and train model
    model = ImplicitSequenceModel(**hyperparams,
                                  use_cuda=CUDA,
                                  random_state=random_state)
    model.fit(train, verbose=True)

    # compute mrr score on test set
    test_mrr = sequence_mrr_score(model, test).mean()
    print('MRR score on test set: {}'.format(test_mrr))

    return model
Пример #18
0
    def model_implicit_sequence(
            self,
            train: Interactions,
            random_state: np.random.RandomState,
            representation: str = None,
            hyperparameters: dict = None) -> ImplicitSequenceModel:
        logger = logging.getLogger()
        if not representation:
            if hyperparameters:
                net = CNNNet(train.num_items,
                             embedding_dim=hyperparameters['embedding_dim'],
                             kernel_width=hyperparameters['kernel_width'],
                             dilation=hyperparameters['dilation'],
                             num_layers=hyperparameters['num_layers'],
                             nonlinearity=hyperparameters['nonlinearity'],
                             residual_connections=hyperparameters['residual'])
            else:
                net = CNNNet(train.num_items)

            representation = net

        out_string = 'CNN' if isinstance(representation,
                                         CNNNet) else representation.upper()
        if hyperparameters:
            logger.info(
                "Beginning fitting implicit sequence {0} model... \n Hyperparameters: \n {1}"
                .format(
                    out_string,
                    json.dumps({
                        i: hyperparameters[i]
                        for i in hyperparameters if i != 'use_cuda'
                    })))
            model = ImplicitSequenceModel(
                loss=hyperparameters['loss'],
                representation=representation,
                batch_size=hyperparameters['batch_size'],
                learning_rate=hyperparameters['learning_rate'],
                l2=hyperparameters['l2'],
                n_iter=hyperparameters['n_iter'],
                use_cuda=True,
                random_state=random_state)
        else:
            model = ImplicitSequenceModel(use_cuda=True)
            logger.info(
                "Beginning fitting implicit sequence {} model with default hyperparameters..."
                .format(out_string))

        model.fit(train, verbose=True)
        model.predict(train.sequences)
        return model
def test_implicit_cnn_synthetic(randomness, expected_mrr):

    random_state = np.random.RandomState(RANDOM_SEED)
    train, test = _get_synthetic_data(randomness=randomness,
                                      random_state=random_state)

    model = ImplicitSequenceModel(loss=LOSS,
                                  representation=CNNNet(train.num_items,
                                                        embedding_dim=EMBEDDING_DIM,
                                                        kernel_width=5,
                                                        num_layers=1),
                                  batch_size=BATCH_SIZE,
                                  learning_rate=1e-2,
                                  l2=0.0,
                                  n_iter=NUM_EPOCHS * 5,
                                  random_state=random_state)

    model.fit(train, verbose=VERBOSE)

    mrr = _evaluate(model, test)

    assert mrr.mean() > expected_mrr
def evaluate_lstm_model(hyperparameters, train, test, validation,
                        random_state):

    h = hyperparameters

    model = ImplicitSequenceModel(
        loss=h['loss'],
        representation='lstm',
        batch_size=h['batch_size'],
        learning_rate=h['learning_rate'],
        l2=h['l2'],
        n_iter=h['n_iter'],
        use_cuda=CUDA,
        random_state=random_state,
        num_negative_samples=h["num_negative_samples"]  ## new
    )

    model.fit(train, verbose=False)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
Пример #21
0
def test_implicit_sequence_serialization(data):

    train, test = data
    train = train.to_sequence(max_sequence_length=128)
    test = test.to_sequence(max_sequence_length=128)

    model = ImplicitSequenceModel(loss='bpr',
                                  representation=CNNNet(train.num_items,
                                                        embedding_dim=32,
                                                        kernel_width=3,
                                                        dilation=(1, ),
                                                        num_layers=1),
                                  batch_size=128,
                                  learning_rate=1e-1,
                                  l2=0.0,
                                  n_iter=5,
                                  random_state=RANDOM_STATE,
                                  use_cuda=CUDA)
    model.fit(train)

    mrr_original = sequence_mrr_score(model, test).mean()
    mrr_recovered = sequence_mrr_score(_reload(model), test).mean()

    assert mrr_original == mrr_recovered
Пример #22
0
def sequence_model(num_embeddings, bloom):

    if bloom:
        item_embeddings = BloomEmbedding(num_embeddings, EMBEDDING_DIM, num_hash_functions=NUM_HASH_FUNCTIONS)
    else:
        item_embeddings = ScaledEmbedding(num_embeddings, EMBEDDING_DIM)

    network = LSTMNet(num_embeddings, EMBEDDING_DIM, item_embedding_layer=item_embeddings)

    model = ImplicitSequenceModel(
        loss='adaptive_hinge',
        n_iter=N_ITER,
        batch_size=512,
        learning_rate=1e-3,
        l2=1e-2,
        representation=network,
        use_cuda=CUDA)

    return model
    def obtener_modelos(self):
        """
        Método obtener_modelos. Obtiene, entrena y guarda el modelo escogido.

        Este método solo se utiliza en la interfaz de texto.
        """
        
        global train, modelo
        
        # Se obtiene el modelo, se entrena con parámetros por defecto y se guarda
        if self.opcion_modelo == 1:
            modelo = ExplicitFactorizationModel(loss='logistic', use_cuda=torch.cuda.is_available())
            modelo.fit(train, verbose=True)
            guardar_modelos_dl(modelo, 'el modelo de factorización explícito')
        elif self.opcion_modelo == 2:
            modelo = ImplicitFactorizationModel(loss='bpr', use_cuda=torch.cuda.is_available())
            modelo.fit(train, verbose=True)
            guardar_modelos_dl(modelo, 'el modelo de factorización implícito')
        else:
            modelo = ImplicitSequenceModel(loss='bpr',  representation='pooling', use_cuda=torch.cuda.is_available())
            modelo.fit(train, verbose=True)
            guardar_modelos_dl(modelo, 'el modelo de secuencia explícito')
Пример #24
0
train_csv = abspath("../../../resources/train_small_no_header.csv")
test_csv = abspath("../../../resources/test.csv")
subm_csv = abspath("../../../resources/myoutput.csv")

print(f"Reading {train_csv} ...")
df_train = pd.read_csv(train_csv)

train, test = user_based_train_test_split(train_csv)
train = train.to_sequence()
test = test.to_sequence()

#print(f"Reading {test_csv} ...")
#df_test = pd.read_csv(test_csv)

print("Build and Fit Implicit Sequence Model")
model = ImplicitSequenceModel(n_iter=3, representation='cnn', loss='bpr')
#model.fit(df_train)

model.fit(train)

print("Calculate MRR Score")
mrr = sequence_mrr_score(model, test_csv)
print("MRR Result: ", mrr)

print("Calculate Recommendations")
# get data into dataframe for extracting user ids (I think we need the testset here?)
df_test = pd.read_csv(test_csv)
user_ids = df_test[['user_id']]
# call recommendation algorithm
df_out = pd.DataFrame()
df_out = recommendation(model, df_out, df_test, user_ids)
Пример #25
0
                           num_items=int(foods_items),
                           timestamps=timeStamps)

    if name == "test":
        dataset_test = dataset
    elif name == "train":
        dataset_train = dataset

if model_mode.lower() == "ifm":
    model = ImplicitFactorizationModel(n_iter=n_iter)
if model_mode.lower() == "efm":
    model = ExplicitFactorizationModel(n_iter=n_iter)
if model_mode.lower() == "cnn":
    net = CNNNet(num_items=int(foods_items))
    model = ImplicitSequenceModel(n_iter=n_iter,
                                  use_cuda=torch.cuda.is_available(),
                                  representation=net)

model.fit(dataset_train)

with open(save_file, 'wb') as f:
    pickle.dump(model, f, pickle.HIGHEST_PROTOCOL)

if model_mode.lower() == "cnn":
    mrr = sequence_mrr_score(model, dataset_test)
else:
    mrr = mrr_score(model, dataset_test)

print("mrr = ", len(mrr))
print("mean mrr = ", sum(mrr) / len(mrr))
rank = 1 / (sum(mrr) / len(mrr))
Пример #26
0

print("Load Data")
#train_csv = abspath("../../../resources/train.csv")
#test_csv = abspath("../../../resources/test.csv")
#subm_csv = abspath("../../../resources/myoutput.csv")
own_dataset = abspath("../../../resources/min_trivago.csv")
dataset = get_own_dataset('min_trivago')
print(dataset)

train, test = random_train_test_split(dataset,
                                      random_state=np.random.RandomState(42))
print('Split into \n {} and \n {}.'.format(train, test))

print("Build and Fit Implicit Sequence Model")
model = ImplicitSequenceModel(n_iter=3, representation='lstm', loss='bpr')
model.fit(train, verbose=True)

print("Calculate MRR Score")
mrr = sequence_mrr_score(model, test)
print("MRR Result: ", mrr)

print("Calculate Recommendations")
# get data into dataframe for extracting user ids (I think we need the testset here?)
df_test = pd.read_csv(own_dataset)
user_ids = df_test[['user_id']]
# call recommendation algorithm
df_out = pd.DataFrame()
df_out = recommendation(model, df_out, df_test, user_ids)

# write result to csv file
Пример #27
0
sales_categorical['product_id'] = sales_categorical['product_id'] + 1
sales_categorical['timestep_id'] = sales_categorical['timestep_id'] + 1

#%%

from spotlight.interactions import Interactions
from spotlight.sequence.implicit import ImplicitSequenceModel

implicit_interactions = Interactions(
    sales_categorical['user_id'].astype('int32').values,
    sales_categorical['product_id'].astype('int32').values,
    timestamps=sales_categorical['timestep_id'].astype('int32').values)

sequential_interaction = implicit_interactions.to_sequence()

implicit_sequence_model = ImplicitSequenceModel()

#%%
start = datetime.now()
implicit_sequence_model = ImplicitSequenceModel(embedding_dim=100,
                                                representation='lstm',
                                                n_iter=5,
                                                use_cuda=True)
implicit_sequence_model.fit(sequential_interaction)
print(datetime.now() - start)

#%%

prediction = pd.DataFrame(implicit_sequence_model.predict([1337],
                                                          item_ids=None),
                          columns=['probability'])
Пример #28
0
class SequenceEmbeddingRecommender(BaseDFSparseRecommender):

    default_model_params = dict(
        loss='adaptive_hinge',  # 'pointwise', 'bpr', 'hinge', 'adaptive_hinge'
        representation='lstm',  # 'pooling', 'cnn', 'lstm', 'mixture'
        embedding_dim=32,
        n_iter=4,
        batch_size=64,
        l2=0.0,
        learning_rate=2e-3,
        num_negative_samples=25)

    default_fit_params = dict(max_sequence_length=200,
                              timestamp_col='first_timestamp')

    def _interactions_sequence_from_obs(self,
                                        obs,
                                        timestamp_col='first_timestamp',
                                        max_sequence_length=10,
                                        min_sequence_length=None,
                                        step_size=None,
                                        **kwargs):

        obs.timestamp_col = timestamp_col

        return Interactions(
            user_ids=self.sparse_mat_builder.uid_encoder.
                transform(obs.user_ids.astype(str)).astype('int32'),
            item_ids=self.sparse_mat_builder.iid_encoder.
                transform(obs.item_ids.astype(str)).astype('int32') + 1,
            ratings=obs.ratings,
            timestamps=obs.timestamps
        ). \
            to_sequence(
            max_sequence_length=max_sequence_length,
            min_sequence_length=min_sequence_length,
            step_size=step_size
        )

    def _prep_for_fit(self, train_obs, **fit_params):
        # self.toggle_mkl_blas_1_thread(False)
        self._set_data(train_obs)
        self._set_fit_params(fit_params)
        self.sequence_interactions = \
            self._interactions_sequence_from_obs(train_obs, **self.fit_params)

    def fit(self, train_obs, **fit_params):
        self._prep_for_fit(train_obs, **fit_params)
        self.model = ImplicitSequenceModel(**self.model_params)
        self.model.fit(self.sequence_interactions)

    def _get_recommendations_flat(self,
                                  user_ids,
                                  n_rec,
                                  item_ids=None,
                                  exclusions=True,
                                  **kwargs):

        return self._get_recommendations_exact(user_ids=user_ids,
                                               item_ids=item_ids,
                                               n_rec=n_rec,
                                               exclusions=exclusions,
                                               results_format='flat')

    def _predict_on_inds_dense(self, user_inds, item_inds):
        sequences = self.sequence_interactions.sequences

        pred_mat = np.zeros((len(user_inds), len(item_inds)))

        # TODO: very SLOW, try multiproc (batched from caller)

        item_inds_spot = item_inds.reshape(-1, 1) + 1

        for i_row, user_ind in enumerate(user_inds):
            pred_mat[i_row, :] = self.model.predict(sequences[user_ind],
                                                    item_ids=item_inds_spot)

        return pred_mat
Пример #29
0
 def fit(self, train_obs, **fit_params):
     self._prep_for_fit(train_obs, **fit_params)
     self.model = ImplicitSequenceModel(**self.model_params)
     self.model.fit(self.sequence_interactions)
 def fit(self, train_obs, **fit_params):
     self._prep_for_fit(train_obs, **fit_params)
     self.model = ImplicitSequenceModel(**self.model_params)
     self.model.fit(self.sequence_interactions,
                    verbose=self.fit_params.get('verbose', False))
Пример #31
0
                          step_size=step_size)
test = test.to_sequence(max_sequence_length=max_sequence_length,
                        min_sequence_length=min_sequence_length,
                        step_size=step_size)
validation = validation.to_sequence(max_sequence_length=max_sequence_length,
                                    min_sequence_length=min_sequence_length,
                                    step_size=step_size)

net = LSTMNet(len(set(item2idx)),
              embedding_dim=32,
              item_embedding_layer=None,
              sparse=False)
model = ImplicitSequenceModel(loss='adaptive_hinge',
                              representation=net,
                              batch_size=32,
                              learning_rate=0.01,
                              l2=10e-6,
                              n_iter=10,
                              use_cuda=False,
                              random_state=random_state)
model.fit(train, verbose=True)

test_mrr = sequence_mrr_score(model, test)
val_mrr = sequence_mrr_score(model, validation)
train_mrr = sequence_mrr_score(model, train)

print(test_mrr.mean(), val_mrr.mean(), train_mrr.mean())

for (split, split_name) in ((train, "train"), (validation, "validation"),
                            (test, "test")):
    for k in (5, 10, 50, 100):
        precision, recall = sequence_precision_recall_score(
Пример #32
0
random_state = np.random.RandomState(100)

net = CNNNet(num_items,
                 embedding_dim=h['emb_dim'],
                 kernel_width=h['kernel'],
                 dilation=1,
                 num_layers=h['layers'],
                 nonlinearity=h['nonlin'],
                 residual_connections=True)

model = ImplicitSequenceModel(loss=h['loss'],
                              representation=net,
                              batch_size=h['batch'],
                              learning_rate=h['lr'],
                              l2=h['l2'],
                              n_iter=h['n_iter'],
                              embedding_dim=h['emb_dim'],
                              use_cuda=use_cuda,
                              random_state=random_state,
                              notify_loss_completion=notify_loss_completion,
                              notify_batch_eval_completion=notify_batch_eval_completion,
                              notify_epoch_completion=notify_epoch_completion,
                              log_loss_interval=5000,
                              log_eval_interval=20000,
                              amsgrad=h['amsgrad'],
                              adamw=h['adamw'],
                              betas=betas,
                              num_negative_samples=h['neg'])

logger.info("Model is initialized, now fitting..")
model.fit(train_seq)
Пример #33
0
    def objective(space):
        batch_size = int(space['batch_size'])
        learn_rate = space['learn_rate']
        loss = space['loss']
        n_iter = int(space['n_iter'])
        embedding_dim = int(space['embedding_dim'])
        l2 = space['l2']

        if space['type'] == 'mlstm':
            representation = mLSTMNet(
                train.num_items,
                embedding_dim=embedding_dim)
            model = ImplicitSequenceModel(
                loss=loss,
                batch_size=batch_size,
                representation=representation,
                learning_rate=learn_rate,
                n_iter=n_iter,
                l2=l2,
                use_cuda=CUDA,
                random_state=random_state)
        elif space['type'] == 'lstm':
            representation = space['representation']
            model = ImplicitSequenceModel(
                loss=loss,
                embedding_dim=embedding_dim,
                batch_size=batch_size,
                representation=representation,
                learning_rate=learn_rate,
                n_iter=n_iter,
                l2=l2,
                use_cuda=CUDA,
                random_state=random_state)
        else:
            raise ValueError('Unknown model type {}'.format(space.get('type', 'NA')))

        start = time.clock()
        try:
            model.fit(train, verbose=True)
        except ValueError:
            elapsed = time.clock() - start
            return {'loss': 0.0,
                    'status': STATUS_FAIL,
                    'validation_mrr': 0.0,
                    'test_mrr': 0.0,
                    'elapsed': elapsed,
                    'hyper': space}
        elapsed = time.clock() - start
        print(model)

        validation_mrr = sequence_mrr_score(
            model,
            valid,
            exclude_preceding=True
        ).mean()
        test_mrr = sequence_mrr_score(
            model,
            test,
            exclude_preceding=True
        ).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        if np.isnan(validation_mrr):
            status = STATUS_FAIL
        else:
            status = STATUS_OK

        return {'loss': -validation_mrr,
                'status': status,
                'validation_mrr': validation_mrr,
                'test_mrr': test_mrr,
                'elapsed': elapsed,
                'hyper': space}
Пример #34
0
from spotlight.evaluation import sequence_mrr_score
from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.factorization.implicit import ImplicitFactorizationModel
from spotlight.sequence.implicit import ImplicitSequenceModel

dataset = get_movielens_dataset(variant='100K')
train, test = random_train_test_split(dataset)


def train_and_test(model, train, test, score):
    print('Train and test {}'.format(model))
    model.fit(train, verbose=True)

    _score = score(model, test)
    print('score({}): {}'.format(score, _score))


explicit_model = ExplicitFactorizationModel(n_iter=1)
train_and_test(explicit_model, train, test, rmse_score)

implicit_model = ImplicitFactorizationModel(n_iter=3, loss='bpr')
train_and_test(implicit_model, train, test, rmse_score)

train = train.to_sequence()
test = test.to_sequence()

implicit_cnn_model = ImplicitSequenceModel(n_iter=3,
                                           representation='cnn',
                                           loss='bpr')
train_and_test(implicit_cnn_model, train, test, sequence_mrr_score)