Exemplo n.º 1
0
    def __init__(self,
                 loss='regression',
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 representation=None,
                 sparse=False,
                 random_state=None):

        assert loss in ('regression', 'poisson', 'logistic')

        self._loss = loss
        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._representation = representation
        self._sparse = sparse
        self._optimizer_func = optimizer_func
        self._random_state = random_state or np.random.RandomState()

        self._num_users = None
        self._num_items = None
        self._net = None
        self._optimizer = None
        self._loss_func = None

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)
Exemplo n.º 2
0
def main(args):
    status = 'available' if CUDA else 'not available'
    print("CUDA is {}!".format(status))
    args = parse_args(args)

    # Fix random_state
    seed = 72
    set_seed(seed)
    random_state = np.random.RandomState(seed)

    max_sequence_length = 100
    min_sequence_length = 20
    step_size = max_sequence_length

    if args.dataset == 'amazon':
        max_sequence_length = 50
        min_sequence_length = 5
        step_size = max_sequence_length
        dataset = get_amazon_dataset()
    elif args.dataset == 'goodbooks':
        dataset = get_goodbooks_dataset()
    else:
        dataset = get_movielens_dataset(args.dataset.upper())

    args.variant = args.dataset
    train, rest = user_based_train_test_split(
        dataset,
        test_percentage=0.2,
        random_state=random_state)
    test, valid = user_based_train_test_split(
        rest,
        test_percentage=0.5,
        random_state=random_state)
    train = train.to_sequence(
        max_sequence_length=max_sequence_length,
        min_sequence_length=min_sequence_length,
        step_size=step_size)
    test = test.to_sequence(
        max_sequence_length=max_sequence_length,
        min_sequence_length=min_sequence_length,
        step_size=step_size)
    valid = valid.to_sequence(
        max_sequence_length=max_sequence_length,
        min_sequence_length=min_sequence_length,
        step_size=step_size)

    print('model: {}, data: {}'.format(args.model, train))

    fname = 'experiment_{}_{}.pickle'.format(args.model, args.dataset)
    objective = get_objective(train, valid, test, random_state)
    space = hyperparameter_space(args.model)

    for iteration in range(args.num_trials):
        print('Iteration {}'.format(iteration))
        trials = optimize(objective,
                          space,
                          trials_fname=fname,
                          max_evals=iteration + 1)

        summarize_trials(trials)
Exemplo n.º 3
0
    def __init__(self,
                 loss='pointwise',
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 sparse=False,
                 random_state=None):

        assert loss in ('pointwise', 'bpr', 'hinge', 'adaptive_hinge')

        self._loss = loss
        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._sparse = sparse
        self._optimizer_func = optimizer_func
        self._random_state = random_state or np.random.RandomState()

        self._num_users = None
        self._num_items = None
        self._net = None
        self._optimizer = None

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)
Exemplo n.º 4
0
def build_sequence_model(hyperparameters, train, random_state):

    h = hyperparameters

    set_seed(42, CUDA)

    if h['compression_ratio'] < 1.0:
        item_embeddings = BloomEmbedding(
            train.num_items,
            h['embedding_dim'],
            compression_ratio=h['compression_ratio'],
            num_hash_functions=4,
            padding_idx=0)
    else:
        item_embeddings = ScaledEmbedding(train.num_items,
                                          h['embedding_dim'],
                                          padding_idx=0)

    network = LSTMNet(train.num_items,
                      h['embedding_dim'],
                      item_embedding_layer=item_embeddings)

    model = ImplicitSequenceModel(loss=h['loss'],
                                  n_iter=h['n_iter'],
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  embedding_dim=h['embedding_dim'],
                                  l2=h['l2'],
                                  representation=network,
                                  use_cuda=CUDA,
                                  random_state=np.random.RandomState(42))

    return model
Exemplo n.º 5
0
    def __init__(self,
                 loss='regression',
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 sparse=False,
                 random_state=None):

        assert loss in ('regression',
                        'poisson')

        self._loss = loss
        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._sparse = sparse
        self._optimizer_func = optimizer_func
        self._random_state = random_state or np.random.RandomState()

        self._num_users = None
        self._num_items = None
        self._net = None
        self._optimizer = None

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)
Exemplo n.º 6
0
    def __init__(self,
                 loss='bce',
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 representation=None,
                 sparse=False,
                 random_state=None,
                 num_negative_samples=5,
                 k_sample=-1,
                 inputSample = -1):

        assert loss in ('pointwise',
                        'bpr',
                        'hinge',
                        'adaptive_hinge','bce')

        self._loss = loss
        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._representation = representation
        self._sparse = sparse
        self._optimizer_func = optimizer_func
        self._random_state = random_state or np.random.RandomState()
        self._num_negative_samples = num_negative_samples

        self._num_users = None
        self._num_items = None
        self._net = None
        self._optimizer = None
        self._loss_func = None
        self.k_sample = k_sample
        self.inputSample = inputSample


        tmp = []
        for ii1 in range(self._batch_size):
            for ii2 in range(ii1+1, self._batch_size):
                tmp.append((ii1, ii2))
        self.combPos = torch.zeros((len(tmp), 2))
        for i in range(len(tmp)):
            self.combPos[i][0] = tmp[i][0]
            self.combPos[i][1] = tmp[i][1]

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)
Exemplo n.º 7
0
    def __init__(self,
                 loss='pointwise',
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 representation=None,
                 sparse=False,
                 random_state=None,
                 num_negative_samples=5,
                 betas=(0.9, 0.999),
                 log_loss_interval=500,
                 log_eval_interval=5000,
                 notify_loss_completion=None,
                 notify_batch_eval_completion=None,
                 notify_epoch_completion=None,
                 amsgrad=False,
                 adamw=None):

        assert loss in ('pointwise', 'bpr', 'hinge', 'adaptive_hinge',
                        'adaptive_bpr')

        self._loss = loss
        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._representation = representation
        self._sparse = sparse
        self._optimizer_func = optimizer_func
        self._random_state = random_state or np.random.RandomState()
        self._num_negative_samples = num_negative_samples

        self._num_users = None
        self._num_items = None
        self._net = None
        self._optimizer = None
        self._loss_func = None
        self._log_eval_interval = log_eval_interval
        self._betas = betas
        self._log_loss_interval = log_loss_interval
        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)
        self._notify_loss_completion = notify_loss_completion
        self._notify_batch_eval_completion = notify_batch_eval_completion
        self._notify_epoch_completion = notify_epoch_completion
        self._adamw = adamw
        self._amsgrad = amsgrad
Exemplo n.º 8
0
    def __init__(self,
                 loss='pointwise',
                 representation='pooling',
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 sparse=False,
                 random_state=None,
                 num_negative_samples=5):

        assert loss in ('pointwise',
                        'bpr',
                        'hinge',
                        'adaptive_hinge')

        if isinstance(representation, str):
            assert representation in ('pooling',
                                      'cnn',
                                      'lstm')

        self._loss = loss
        self._representation = representation
        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._sparse = sparse
        self._optimizer_func = optimizer_func
        self._random_state = random_state or np.random.RandomState()
        self._num_negative_samples = num_negative_samples

        self._num_items = None
        self._net = None
        self._optimizer = None
        self._loss_func = None

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)
Exemplo n.º 9
0
    def __init__(self,
                 loss='pointwise',
                 embedding_dim=32,
                 memory_dim=10,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 representation=None,
                 sparse=False,
                 random_state=None,
                 num_negative_samples=5,
                 margin=None,
                 cov_reg=None):

        assert loss in ('pointwise', 'bpr', 'hinge', 'adaptive_hinge', 'warp')

        self._loss = loss
        self._embedding_dim = embedding_dim
        self._memory_dim = memory_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._representation = representation
        self._sparse = sparse
        self._optimizer_func = optimizer_func
        self._random_state = random_state or np.random.RandomState()
        self._num_negative_samples = num_negative_samples
        self._margin = margin
        self._cov_reg = cov_reg

        self._num_users = None
        self._num_items = None
        self._net = None
        self._optimizer = None
        self._loss_func = None

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)
Exemplo n.º 10
0
    def __init__(self,
                 loss='pointwise',
                 representation='pooling',
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 sparse=False,
                 random_state=None):

        assert loss in ('pointwise',
                        'bpr',
                        'hinge',
                        'adaptive_hinge')

        if isinstance(representation, str):
            assert representation in ('pooling',
                                      'cnn',
                                      'lstm')

        self._loss = loss
        self._representation = representation
        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._sparse = sparse
        self._optimizer_func = optimizer_func
        self._random_state = random_state or np.random.RandomState()

        self._num_items = None
        self._net = None
        self._optimizer = None

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)
def train_model(df, hyperparams):
    # Fix random_state
    seed = 42
    set_seed(seed)
    random_state = np.random.RandomState(seed)

    max_sequence_length = 15
    min_sequence_length = 2
    step_size = 1

    # create dataset using interactions dataframe and timestamps
    dataset = Interactions(user_ids=np.array(df['user_id'], dtype='int32'),
                           item_ids=np.array(df['item_id'], dtype='int32'),
                           timestamps=df['entry_at'])

    # create training and test sets using a 80/20 split
    train, test = user_based_train_test_split(dataset,
                                              test_percentage=0.2,
                                              random_state=random_state)
    # convert to sequences
    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              min_sequence_length=min_sequence_length,
                              step_size=step_size)
    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            min_sequence_length=min_sequence_length,
                            step_size=step_size)

    print('data: {}'.format(train))

    # initialize and train model
    model = ImplicitSequenceModel(**hyperparams,
                                  use_cuda=CUDA,
                                  random_state=random_state)
    model.fit(train, verbose=True)

    # compute mrr score on test set
    test_mrr = sequence_mrr_score(model, test).mean()
    print('MRR score on test set: {}'.format(test_mrr))

    return model
def main(max_evals):
    status = 'available' if CUDA else 'not available'
    print("CUDA is {}!".format(status))

    # Fix random_state
    seed = 42
    set_seed(seed)
    random_state = np.random.RandomState(seed)

    max_sequence_length = 15
    min_sequence_length = 2
    step_size = 1

    df = pd.read_csv(FILE_PATH)
    if 'time_of_day' in df.columns:
        df = df.drop(columns=['time_of_day', 'time_of_year', 'is_content_block'])
    if 'Unnamed: 0' in df.columns:
        df = df.drop(columns=['Unnamed: 0', 'js_key'])

    sub_col = 'subscriber_id'
    block_col = 'ddi_id'
    time_col = 'entry_at'

    # preprocess dataframe
    df[time_col] = pd.to_datetime(df[time_col])
    df.sort_values(by=time_col, inplace=True)
    df.reset_index(inplace=True)
    df.drop(columns='index', inplace=True)

    # create idx mapping compatible with spotlight, map users and items
    sub_mapping = {k:v for v, k in enumerate(df[sub_col].unique())}
    block_mapping = {k:v for v, k in enumerate(df[block_col].unique(), 1)}
    df['user_id'] = df[sub_col].map(sub_mapping)
    df['item_id'] = df[block_col].map(block_mapping)

    # create dataset using interactions and timestamps
    dataset = Interactions(user_ids=np.array(df['user_id'], dtype='int32'),
                           item_ids=np.array(df['item_id'], dtype='int32'),
                           timestamps=df[time_col])

    # create training, validation and test sets using a 80/10/10 split
    train, rest = user_based_train_test_split(
        dataset,
        test_percentage=0.2,
        random_state=random_state)
    test, valid = user_based_train_test_split(
        rest,
        test_percentage=0.5,
        random_state=random_state)
    # convert to sequences
    train = train.to_sequence(
        max_sequence_length=max_sequence_length,
        min_sequence_length=min_sequence_length,
        step_size=step_size)
    test = test.to_sequence(
        max_sequence_length=max_sequence_length,
        min_sequence_length=min_sequence_length,
        step_size=step_size)
    valid = valid.to_sequence(
        max_sequence_length=max_sequence_length,
        min_sequence_length=min_sequence_length,
        step_size=step_size)

    print('data: {}'.format(train))

    dtime = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    fname = './experiment_{}.pickle'.format(dtime)
    objective = get_objective(train, valid, test, random_state)
    space = hyperparameter_space()

    trials = optimize(objective,
                      space,
                      trials_fname=fname,
                      max_evals=max_evals)

    summarize_trials(trials)

    return trials