def recommend_k_fastai(model, test, train, top_k=DEFAULT_K, remove_seen=True): with Timer() as t: total_users, total_items = model.data.train_ds.x.classes.values() total_items = total_items[1:] total_users = total_users[1:] test_users = test[DEFAULT_USER_COL].unique() test_users = np.intersect1d(test_users, total_users) users_items = cartesian_product(test_users, total_items) users_items = pd.DataFrame( users_items, columns=[DEFAULT_USER_COL, DEFAULT_ITEM_COL]) training_removed = pd.merge( users_items, train.astype(str), on=[DEFAULT_USER_COL, DEFAULT_ITEM_COL], how="left", ) training_removed = training_removed[ training_removed[DEFAULT_RATING_COL].isna()][[ DEFAULT_USER_COL, DEFAULT_ITEM_COL ]] topk_scores = score( model, test_df=training_removed, user_col=DEFAULT_USER_COL, item_col=DEFAULT_ITEM_COL, prediction_col=DEFAULT_PREDICTION_COL, top_k=top_k, ) return topk_scores, t
def train_fastai(params, data): model = collab_learner(data, n_factors=params["n_factors"], y_range=params["y_range"], wd=params["wd"]) with Timer() as t: model.fit_one_cycle(cyc_len=params["epochs"], max_lr=params["max_lr"]) return model, t
def predict_fastai(model, test): with Timer() as t: preds = score( model, test_df=test, user_col=DEFAULT_USER_COL, item_col=DEFAULT_ITEM_COL, prediction_col=DEFAULT_PREDICTION_COL, ) return preds, t
def predict_svd(model, test): with Timer() as t: preds = predict( model, test, usercol=DEFAULT_USER_COL, itemcol=DEFAULT_ITEM_COL, predcol=DEFAULT_PREDICTION_COL, ) return preds, t
def recommend_k_lightgcn(model, test, train, top_k=DEFAULT_K, remove_seen=True): with Timer() as t: topk_scores = model.recommend_k_items(test, top_k=top_k, remove_seen=remove_seen) return topk_scores, t
def recommend_k_cornac(model, test, train, top_k=DEFAULT_K, remove_seen=True): with Timer() as t: topk_scores = predict_ranking( model, train, usercol=DEFAULT_USER_COL, itemcol=DEFAULT_ITEM_COL, predcol=DEFAULT_PREDICTION_COL, remove_seen=remove_seen, ) return topk_scores, t
def test_timer(t): t.start() assert t.running is True time.sleep(1) t.stop() assert t.running is False assert t.interval == pytest.approx(1, abs=TOL) with Timer() as t2: assert t2.running is True time.sleep(1) assert t2.interval == pytest.approx(1, abs=TOL) assert t2.running is False
def recommend_k_ncf(model, test, train, top_k=DEFAULT_K, remove_seen=True): with Timer() as t: users, items, preds = [], [], [] item = list(train[DEFAULT_ITEM_COL].unique()) for user in train[DEFAULT_USER_COL].unique(): user = [user] * len(item) users.extend(user) items.extend(item) preds.extend(list(model.predict(user, item, is_list=True))) topk_scores = pd.DataFrame( data={ DEFAULT_USER_COL: users, DEFAULT_ITEM_COL: items, DEFAULT_PREDICTION_COL: preds, }) merged = pd.merge(train, topk_scores, on=[DEFAULT_USER_COL, DEFAULT_ITEM_COL], how="outer") topk_scores = merged[merged[DEFAULT_RATING_COL].isnull()].drop( DEFAULT_RATING_COL, axis=1) return topk_scores, t
def recommend_k_als(model, test, train, top_k=DEFAULT_K, remove_seen=True): with Timer() as t: # Get the cross join of all user-item pairs and score them. users = train.select(DEFAULT_USER_COL).distinct() items = train.select(DEFAULT_ITEM_COL).distinct() user_item = users.crossJoin(items) dfs_pred = model.transform(user_item) # Remove seen items dfs_pred_exclude_train = dfs_pred.alias("pred").join( train.alias("train"), (dfs_pred[DEFAULT_USER_COL] == train[DEFAULT_USER_COL]) & (dfs_pred[DEFAULT_ITEM_COL] == train[DEFAULT_ITEM_COL]), how="outer", ) topk_scores = dfs_pred_exclude_train.filter( dfs_pred_exclude_train["train." + DEFAULT_RATING_COL].isNull()).select( "pred." + DEFAULT_USER_COL, "pred." + DEFAULT_ITEM_COL, "pred." + DEFAULT_PREDICTION_COL, ) return topk_scores, t
def predict_als(model, test): with Timer() as t: preds = model.transform(test) return preds, t
def train_als(params, data): symbol = ALS(**params) with Timer() as t: model = symbol.fit(data) return model, t
def train_ncf(params, data): model = NCF(n_users=data.n_users, n_items=data.n_items, **params) with Timer() as t: model.fit(data) return model, t
def train_lightgcn(params, data): hparams = prepare_hparams(**params) model = LightGCN(hparams, data) with Timer() as t: model.fit() return model, t
def train_sar(params, data): model = SARSingleNode(**params) model.set_index(data) with Timer() as t: model.fit(data) return model, t
def train_bivae(params, data): model = cornac.models.BiVAECF(**params) with Timer() as t: model.fit(data) return model, t
def train_svd(params, data): model = surprise.SVD(**params) with Timer() as t: model.fit(data) return model, t
def t(): return Timer()
def train_bpr(params, data): model = cornac.models.BPR(**params) with Timer() as t: model.fit(data) return model, t
def train(self, dataset, sampler, **kwargs): """ High level function for model training as well as evaluation on the validation and test dataset """ num_epochs = kwargs.get("num_epochs", 10) batch_size = kwargs.get("batch_size", 128) lr = kwargs.get("learning_rate", 0.001) val_epoch = kwargs.get("val_epoch", 5) num_steps = int(len(dataset.user_train) / batch_size) optimizer = tf.keras.optimizers.Adam(learning_rate=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-7) loss_function = self.loss_function train_loss = tf.keras.metrics.Mean(name="train_loss") train_step_signature = [ { "users": tf.TensorSpec(shape=(None, 1), dtype=tf.int64), "input_seq": tf.TensorSpec(shape=(None, self.seq_max_len), dtype=tf.int64), "positive": tf.TensorSpec(shape=(None, self.seq_max_len), dtype=tf.int64), "negative": tf.TensorSpec(shape=(None, self.seq_max_len), dtype=tf.int64), }, tf.TensorSpec(shape=(None, 1), dtype=tf.int64), ] @tf.function(input_signature=train_step_signature) def train_step(inp, tar): with tf.GradientTape() as tape: pos_logits, neg_logits, loss_mask = self(inp, training=True) loss = loss_function(pos_logits, neg_logits, loss_mask) gradients = tape.gradient(loss, self.trainable_variables) optimizer.apply_gradients(zip(gradients, self.trainable_variables)) train_loss(loss) return loss T = 0.0 t0 = Timer() t0.start() for epoch in range(1, num_epochs + 1): step_loss = [] train_loss.reset_states() for step in tqdm(range(num_steps), total=num_steps, ncols=70, leave=False, unit="b"): u, seq, pos, neg = sampler.next_batch() inputs, target = self.create_combined_dataset(u, seq, pos, neg) loss = train_step(inputs, target) step_loss.append(loss) if epoch % val_epoch == 0: t0.stop() t1 = t0.interval T += t1 print("Evaluating...") t_test = self.evaluate(dataset) t_valid = self.evaluate_valid(dataset) print( f"\nepoch: {epoch}, time: {T}, valid (NDCG@10: {t_valid[0]}, HR@10: {t_valid[1]})" ) print( f"epoch: {epoch}, time: {T}, test (NDCG@10: {t_test[0]}, HR@10: {t_test[1]})" ) t0.start() t_test = self.evaluate(dataset) print( f"\nepoch: {epoch}, test (NDCG@10: {t_test[0]}, HR@10: {t_test[1]})" ) return t_test