def test_known_output_step_1(): interactions = Interactions(np.zeros(5), np.arange(5) + 1, timestamps=np.arange(5)) sequences = interactions.to_sequence(max_sequence_length=5, step_size=1).sequences expected = np.array([[1, 2, 3, 4, 5], [0, 1, 2, 3, 4], [0, 0, 1, 2, 3], [0, 0, 0, 1, 2], [0, 0, 0, 0, 1]]) assert np.all(sequences == expected)
def test_known_output_step_2(): interactions = Interactions(np.zeros(5), np.arange(5) + 1, timestamps=np.arange(5)) sequences = interactions.to_sequence(max_sequence_length=5, step_size=2).sequences expected = np.array([ [1, 2, 3, 4, 5], [0, 0, 1, 2, 3], [0, 0, 0, 0, 1], ]) assert np.all(sequences == expected)
def individual_predictions(df, model): num_users = len(df['user_id'].unique()) num_items = len(df['item_id'].unique()) predictions = np.zeros(shape=(num_users, num_items + 1)) dataset = Interactions(user_ids=np.array(df['user_id'], dtype='int32'), item_ids=np.array(df['item_id'], dtype='int32'), timestamps=df['entry_at']) sequences = dataset.to_sequence(max_sequence_length=15) user_id = 0 for user, sequence in zip(sequences.user_ids, sequences.sequences): if user == user_id: predictions[user] = model.predict(sequence) user_id += 1 return predictions
def interactions_to_sequence(f_train: Interactions, f_test: Interactions): train, test = f_train.to_sequence(), f_test.to_sequence() return train, test
interactions = Interactions(train_data["uindex"].to_numpy(), train_data["vindex"].to_numpy(), train_data["pct_cvt"].to_numpy(), train_data["latest_watch_time"].to_numpy(), num_users=len(original_train_data["uindex"].unique()), num_items=num_items) # if "1500K" in suffix: # logger.info("Increasing step size and max_sequence_length") # step_size = 2 # min_sequence_length = 2 # max_sequence_length = 50 train_seq = interactions.to_sequence(max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) logger.info("Data is loaded and converted to sequences..") writer = SummaryWriter(log_dir='{}/{}'.format(tensorboard_base_dir, model_alias)) writer.add_text('alias', model_alias, 0) writer.add_text('hyperparameters', str(h), 0) def notify_loss_completion(epoch_id, batch_id, loss, net, model): # print("notify_loss_completion") writer.add_scalar("Batch/loss", loss, batch_id) logging.info('[Epoch {}] Batch {}, Loss {}'.format(epoch_id, batch_id, loss))
sales_categorical['user_id'] = sales_categorical['user_id'] + 1 sales_categorical['product_id'] = sales_categorical['product_id'] + 1 sales_categorical['timestep_id'] = sales_categorical['timestep_id'] + 1 #%% from spotlight.interactions import Interactions from spotlight.sequence.implicit import ImplicitSequenceModel implicit_interactions = Interactions( sales_categorical['user_id'].astype('int32').values, sales_categorical['product_id'].astype('int32').values, timestamps=sales_categorical['timestep_id'].astype('int32').values) sequential_interaction = implicit_interactions.to_sequence() implicit_sequence_model = ImplicitSequenceModel() #%% start = datetime.now() implicit_sequence_model = ImplicitSequenceModel(embedding_dim=100, representation='lstm', n_iter=5, use_cuda=True) implicit_sequence_model.fit(sequential_interaction) print(datetime.now() - start) #%% prediction = pd.DataFrame(implicit_sequence_model.predict([1337],
# create train models from spotlight.sequence.implicit import ImplicitSequenceModel model = ImplicitSequenceModel(embedding_dim=128) preserving_25_percent_model = ImplicitSequenceModel(embedding_dim=128) preserving_50_percent_model = ImplicitSequenceModel(embedding_dim=128) preserving_75_percent_model = ImplicitSequenceModel(embedding_dim=128) # fit models model.fit(train.to_sequence(), verbose=True) preserving_25_percent_model.fit(preserving_25_percent_train.to_sequence(), verbose=True) preserving_50_percent_model.fit(preserving_50_percent_train.to_sequence(), verbose=True) preserving_75_percent_model.fit(preserving_75_percent_train.to_sequence(), verbose=True) import torch torch.save(preserving_25_percent_model, './preserving_25_percent_model.model') torch.save(preserving_50_percent_model, './preserving_50_percent_model.model') torch.save(preserving_75_percent_model, './preserving_75_percent_model.model') # result evaluation from spotlight.evaluation import mrr_score train_mrrs = mrr_score(model, train) preserving_25_train_mrrs = mrr_score(preserving_25_percent_model, preserving_25_percent_train) preserving_50_train_mrrs = mrr_score(preserving_50_percent_model, preserving_50_percent_train) preserving_75_train_mrrs = mrr_score(preserving_75_percent_model, preserving_75_percent_train)
def main(args): df_rate, user_ids, neg_prob = get_data(args) load_model = False print("seq_len {}, epoch {}".format(args.seq_len, args.epoch)) dataset = Interactions(df_rate.mapped_userid.values.astype("int32"), \ df_rate.foodid.values.astype("int32"), \ timestamps=df_rate.timestamp.values.astype("int32"), \ weights=df_rate.norm_cnt.values.astype("float32")) train, test = timestamp_based_train_test_split(dataset, test_percentage=0.2) train = train.to_sequence(max_sequence_length=args.seq_len, min_sequence_length=args.min_seq_len, step_size=args.step_size, mode=args.mode) test = test.to_sequence(max_sequence_length=args.seq_len, min_sequence_length=args.min_seq_len, step_size=args.step_size, mode=args.mode) dataset = dataset.to_sequence(max_sequence_length=args.seq_len, min_sequence_length=args.min_seq_len, step_size=args.step_size, mode=args.mode) model = ImplicitSequenceModel( loss=args.loss, representation=args.representation, embedding_dim=args.embedding_dim, n_iter=args.epoch, batch_size=256, l2=0.0, learning_rate=0.001, optimizer_func=None, use_cuda=True, sparse=False, random_state=None, num_negative_samples=args.num_negative_samples, test_data=test, neg_prob=neg_prob) print("train.shape", train.sequences.shape) print("test.shape", test.sequences.shape) print("Fitting model") for i in range(5): model.fit(dataset, verbose=True, calc_map=args.calc_map, neg_mode=args.neg_mode) model.fit(test, verbose=True, calc_map=args.calc_map, neg_mode=args.neg_mode) if args.save_model: torch.save(model, args.model_path) if load_model == True: model = torch.load(args.model_path) if args.calc_map == False: ap = NewAveragePrecision(model, test, k=20) print("map: ", ap.mean()) predict(args, model, df_rate, neg_prob)