from openrec import ImplicitModelTrainer from openrec.utils import ImplicitDataset from openrec.recommenders import VisualCML from openrec.utils.evaluators import AUC, Recall from openrec.utils.samplers import PairwiseSampler from config import sess_config import dataloader raw_data = dataloader.load_amazon_book() batch_size = 1000 test_batch_size = 100 item_serving_size = 1000 display_itr = 10000 train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') model = VisualCML(batch_size=batch_size, max_user=raw_data['max_user'], max_item=raw_data['max_item'], l2_reg=0.001, l2_reg_mlp=0.001,
import pandas as pd if __name__ == "__main__": # We'll need to do a lot more wrangling/cleaning up - but this seems to AT LEAST WORK WITH DIFF DATA. max_users = 10000 max_items = 200000 csv = np.genfromtxt('Movies_ratings_small_merged_larger2.csv', delimiter=",", dtype=(int, int, float, int, int, float, int, int, float), names=True, encoding=None) print(csv) train_dataset = ImplicitDataset(raw_data=csv, max_user=max_users, max_item=max_items, name='Train') val_dataset = ImplicitDataset(raw_data=csv, max_user=max_users, max_item=max_items, name='Val') test_dataset = ImplicitDataset(raw_data=csv, max_user=max_users, max_item=max_items, name='Test') bpr_model = BPR(batch_size=1000, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), dim_embed=20, opt='Adam')
import os import sys sys.path.append(os.getcwd()) from openrec import ImplicitModelTrainer from openrec.utils import ImplicitDataset from openrec.recommenders import WCML from openrec.utils.evaluators import AUC from openrec.utils.samplers import NPairwiseSampler import dataloader raw_data = dataloader.load_citeulike() batch_size = 2000 test_batch_size = 100 display_itr = 500 train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') model = WCML(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), dim_embed=20, neg_num=5, l2_reg=None, opt='Adam', sess_config=None) sampler = NPairwiseSampler(batch_size=batch_size, dataset=train_dataset, negativenum=5, num_process=5) model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, train_dataset=train_dataset, model=model, sampler=sampler) auc_evaluator = AUC() model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset], evaluators=[auc_evaluator], num_negatives=200)
""" def get_focused(dataset, focus_bound, interactions_per_item): focused_indices = [] for i, interaction in enumerate(dataset): user_id, item_id = interaction if interactions_per_item[item_id] < focus_bound[ 1] and interactions_per_item[item_id] >= focus_bound[0]: focused_indices.append(i) return np.take(dataset, focused_indices) raw_data = get_raw_data(sys.argv[1]) val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') #Getting Focused sets full_dataset = np.concatenate( (raw_data['train_data'], raw_data['val_data'], raw_data['test_data']), axis=0) interactions_per_item = get_item_interaction_dict(full_dataset) #focus_bound, percent_focused = get_focus_bound(full_dataset, interactions_per_item) focused_val_set = get_focused(raw_data['val_data'], focus_bound, interactions_per_item) focused_test_set = get_focused(raw_data['test_data'], focus_bound,