def main(): print('Loading data...') train, valid, test = load_data(args.dataset_path, valid_portion=0) train_data = RecSysDataset(train) valid_data = RecSysDataset(valid) test_data = RecSysDataset(test) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn) valid_loader = DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) ## Load Transition Matrix M2 = pd.read_csv('datasets/transition/final2_transition_gowalla_narm.csv') M2 = M2.T[1:].T M2.index = M2.columns n_items = 38575 # 38575, 3271, 8487 model = NARM(n_items, M2, args.hidden_size, args.embed_dim, args.batch_size).to(device) optimizer = optim.Adam(model.parameters(), args.lr) criterion = nn.CrossEntropyLoss() scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc) for epoch in tqdm(range(args.epoch)): # train for one epoch scheduler.step(epoch=epoch) trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, log_aggr=512) recall10, mrr10, recall20, mrr20, recall50, mrr50 = validate( test_loader, model) print( 'Epoch {} validation: Recall@{}: {:.4f}, MRR@{}: {:.4f}, Recall@{}: {:.4f}, MRR@{}: {:.4f}, Recall@{}: {:.4f}, MRR@{}: {:.4f} \n' .format(epoch, 10, recall10, 10, mrr10, 20, recall20, 20, mrr20, 50, recall50, 50, mrr50)) # store best loss and save a model checkpoint ckpt_dict = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } if epoch % 10 == 0: torch.save(ckpt_dict, 'latest_checkpoint.pth.tar')
def main(): print('Loading data...') train, valid, test = load_data(args.dataset_path, valid_portion=args.valid_portion) train_data = RecSysDataset(train) valid_data = RecSysDataset(valid) test_data = RecSysDataset(test) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn) valid_loader = DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) if args.dataset_path.split('/')[-2] == 'diginetica': n_items = 43098 elif args.dataset_path.split('/')[-2] in ['yoochoose1_64', 'yoochoose1_4']: n_items = 37484 else: raise Exception('Unknown Dataset!') model = DPAN(n_items, args.hidden_size, args.embed_dim, args.batch_size, args.alpha_pool, args.beta_pool).to(device) if args.test: ckpt = torch.load('latest_checkpoint.pth.tar') model.load_state_dict(ckpt['state_dict']) recall, mrr = validate(test_loader, model) print("Test: Recall@{}: {:.4f}, MRR@{}: {:.4f}".format(args.topk, recall, args.topk, mrr)) return optimizer = optim.Adam(model.parameters(), args.lr) criterion = nn.CrossEntropyLoss() scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc) for epoch in tqdm(range(args.epoch)): # train for one epoch scheduler.step(epoch=epoch) begin_time = time.time() trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, log_aggr=200) end_time = time.time() run_time = end_time - begin_time print('Epoch {} 运行时间:{:.4f}s\n'.format(epoch, run_time)) recall, mrr = validate(valid_loader, model) print('Epoch {} validation: Recall@{}: {:.4f}, MRR@{}: {:.4f} \n'.format(epoch, args.topk, recall, args.topk, mrr)) # store best loss and save a model checkpoint ckpt_dict = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(ckpt_dict, 'latest_checkpoint.pth.tar')
def run_bb_examples_per_step_exp(): dataset_name = 'Clothing_Shoes_and_Jewelry' algo_name = 'vbpr' dataset = RecSysDataset(dataset_name) model = load_model(dataset_name, algo_name, dataset) exp = Experimentation(model, dataset_name) seed = 0 random.seed(seed) args = { 'seed': seed, 'experiment': 'single_user', 'dataset_name': dataset_name, 'epsilon': 1/255, 'gamma': 7, 'blackbox': 1, 'by_rank': 1, 'steps': 20, } args.update(default_args) exp_folder = f"exp_ex2step" if not os.path.exists(exp_folder): os.makedirs(exp_folder) repeat = 100 budget = 4096 # random_users = random.sample(range(exp.model.n_users), k=repeat) # random_items = random.sample(range(exp.model.n_items), k=repeat) # for u, i in zip(random_users, random_items): # args['user'] = u # args['item'] = i # for p in range(1, int(math.log2(budget))): # steps = 2**p # examples = budget // steps # args['examples'] = examples # args['steps'] = steps # name = f"main[{examples}, {steps}, {u}, {i}]" # im = exp.run(f"{exp_folder}/{name}", Args(args)) # im.save(f"{exp_folder}/{name}.jpeg", "JPEG") random_users = random.sample(range(exp.model.n_users), k=repeat) random_items = random.sample(range(exp.model.n_items), k=repeat) for u, i in zip(random_users, random_items): args['user'] = u args['item'] = i for examples in [2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2]: steps = args['steps'] args['examples'] = examples name = f"main[{examples}, {steps}, {u}, {i}]" im = exp.run(f"{exp_folder}/{name}", Args(args))
def run_main_table_exp(): dataset_name = 'Electronics' algo_name = 'deepstyle' dataset = RecSysDataset(dataset_name) model = load_model(dataset_name, algo_name, dataset) exp = Experimentation(model, dataset_name) seed = 0 random.seed(seed) args = { 'seed': seed, 'experiment': 'single_user', 'dataset_name': dataset_name, 'steps': 20, 'epsilon': 1/255, 'gamma': 7, 'examples': 32, } args.update(default_args) exp_folder = f"exp_{dataset_name}_{algo_name}" if not os.path.exists(exp_folder): os.makedirs(exp_folder) repeat = 30 random_users = random.sample(range(exp.model.n_users), k=repeat) random_items = random.sample(range(exp.model.n_items), k=repeat) for u, i in zip(random_users, random_items): args['user'] = u args['item'] = i for blackbox in [0, 1]: args['blackbox'] = blackbox if blackbox: for by_rank in [0, 1]: args['by_rank'] = by_rank name = f"main[{blackbox}:{by_rank}, {u}, {i}]" im = exp.run(f"{exp_folder}/{name}", Args(args)) im.save(f"{exp_folder}/{name}.jpeg", "JPEG") else: name = f"main[{blackbox}, {u}, {i}]" im = exp.run(f"{exp_folder}/{name}", Args(args)) im.save(f"{exp_folder}/{name}.jpeg", "JPEG")
def run_from_rank_exp(): dataset_name = 'Clothing_Shoes_and_Jewelry' algo_name = 'vbpr' dataset = RecSysDataset(dataset_name) model = load_model(dataset_name, algo_name, dataset) exp = Experimentation(model, dataset_name) seed = 0 random.seed(seed) args = { 'seed': seed, 'experiment': 'single_user', 'dataset_name': dataset_name, 'steps': 20, 'epsilon': 1/255, 'gamma': 7, 'by_rank': 1, 'examples': 32, } args.update(default_args) exp_folder = f"exp_from_rank_range" if not os.path.exists(exp_folder): os.makedirs(exp_folder) random_users = random.sample(range(exp.model.n_users), k=50) rank_ranges = [1, 1000, 10000, 100000, 455412] for min_rank, max_rank in zip(rank_ranges[:-1], rank_ranges[1:]): from_rank = random.randint(min_rank, max_rank) args['from_rank'] = from_rank for blackbox in [0, 1]: args['blackbox'] = blackbox for user in random_users: args['user'] = user name = f"from_rank[{from_rank}, {blackbox}, {user}]" exp.run(f"{exp_folder}/{name}", Args(args))
def run_general_pop_exp(): dataset_name = 'Clothing_Shoes_and_Jewelry' algo_name = 'vbpr' dataset = RecSysDataset(dataset_name) model = load_model(dataset_name, algo_name, dataset) exp = Experimentation(model, dataset_name) seed = 0 args = { 'seed': None, 'experiment': 'general', 'dataset_name': dataset_name, 'epsilon': 1/255, 'gamma': 7, 'by_rank': 1, "examples": 32, "steps": 30, } random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) args.update(default_args) exp_folder = f"exp_genpop_pre25_{dataset_name}_{algo_name}" if not os.path.exists(exp_folder): os.makedirs(exp_folder) random_items = random.sample(range(exp.model.n_items), k=30) for i in random_items: args['item'] = i for blackbox in [0, 1]: args['blackbox'] = blackbox name = f"[{i, blackbox}]" exp.run(f"{exp_folder}/{name}", Args(args)) exp.model = load_model(dataset_name, algo_name, dataset)
def single_user_log(): dataset_name = 'Clothing_Shoes_and_Jewelry' algo_name = 'vbpr' dataset = RecSysDataset(dataset_name) model = load_model(dataset_name, algo_name, dataset) exp = Experimentation(model, dataset_name) seed = 0 random.seed(seed) args = { 'seed': seed, 'experiment': 'single_user', 'dataset_name': dataset_name, 'steps': 30, 'epsilon': 1/255, 'gamma': 7, 'by_rank': 1, 'blackbox': 1, 'examples': 64, } args.update(default_args) exp_folder = f"exp_from_rank_range" if not os.path.exists(exp_folder): os.makedirs(exp_folder) random_users = random.sample(range(exp.model.n_users), k=150) random_items = random.sample(range(exp.model.n_items), k=150) for u, i in zip(random_users, random_items): args['user'] = u args['item'] = i name = f"[{u, i}]" exp.run(f"{exp_folder}/{name}", Args(args))
from dataset import RecSysDataset from train import Trainer from models import VBPR import torch if __name__ == '__main__': k=10 k2=20 batch_size=128 n_epochs=20 dataset = RecSysDataset() vbpr = VBPR( dataset.n_users, dataset.n_items, dataset.corpus.image_features, k, k2) tr = Trainer(vbpr, dataset) tr.train(n_epochs, batch_size) torch.save(vbpr, 'vbpr_resnet50_v1.pth')
parser.add_argument('--patience', type=int, default=2) parser.add_argument('--lambda_w', type=float, default=0.01) parser.add_argument('--lambda_b', type=float, default=0.01) parser.add_argument('--lambda_e', type=float, default=0.0001) parser.add_argument('--algorithm', type=str, default='deepstyle') # vbpr, deepstyle parser.add_argument('--dataset', type=str, default='Electronics') args = parser.parse_args() print(args) np.random.seed(args.seed) torch.manual_seed(args.seed) dataset = RecSysDataset(args.dataset) if args.algorithm == "vbpr": model = VBPR(dataset.n_users, dataset.n_items, dataset.corpus.image_features, args.k, args.k2, args.lambda_w, args.lambda_b, args.lambda_e) elif args.algorithm == "deepstyle": model = DeepStyle(dataset.n_users, dataset.n_items, dataset.n_categories, dataset.corpus.image_features, dataset.corpus.item_category, args.k, args.lambda_w, args.lambda_e) elif args.algorithm == "bpr": model = BPR(dataset.n_users, dataset.n_items, args.k, args.lambda_w, args.lambda_b)
def run_bb_single_user_restrict_exp(): dataset_name = 'Clothing_Shoes_and_Jewelry' algo_name = 'vbpr' dataset = RecSysDataset(dataset_name) model = load_model(dataset_name, algo_name, dataset) exp = Experimentation(model, dataset_name) seed = 0 random.seed(seed) args = { 'seed': seed, 'experiment': 'single_user_restrict', 'dataset_name': dataset_name, 'epsilon': 1/255, 'gamma': 7, 'blackbox': 1, 'by_rank': 1, 'steps': 20, 'examples': 32, } args.update(default_args) exp_folder = f"exp_restrict" if not os.path.exists(exp_folder): os.makedirs(exp_folder) user_items = """110680, 358983, 198980 50494, 181343, 78779 99346, 103652, 178736 116686, 105072, 175653 55125, 193068, 182942 5306, 316952, 79752 33936, 68717, 218461 126545, 446829, 40044 67013, 282366, 129428 63691, 427098, 177546 53075, 234852, 323772 120354, 230606, 408602 102734, 142737, 62278 108770, 442049, 284216 39755, 82429, 311327 126851, 199249, 121946 62468, 211997, 29955 46930, 17209, 65192 76465, 239979, 218461 116871, 266575, 41210 119028, 262485, 125305 28631, 416912, 112862 66150, 71677, 443957 18254, 206620, 247487 36941, 169961, 408602 18316, 249120, 294815 99064, 176886, 115981 12429, 26161, 132444 81050, 290802, 319654 104779, 67934, 171678 32834, 217176, 323772 119242, 165728, 129428 69804, 21000, 84504 92428, 57480, 48070 106196, 92620, 284216 78892, 353985, 443957 118248, 271611, 31783 19262, 369153, 276431 40651, 448284, 354434 12945, 403923, 319030 95660, 26658, 364632 9665, 337449, 2359 117812, 267384, 408602 111473, 379750, 71288 89651, 407907, 302410 43279, 43924, 302410 61884, 55078, 198980 73375, 452746, 1270 13199, 199837, 364632 46372, 201706, 98940 56907, 72103, 64782 41444, 418994, 354066 80070, 48648, 284216 83941, 361106, 282048 119670, 30279, 94791 26801, 167055, 291390 126695, 421024, 253221 72420, 312971, 101043 62522, 136923, 424684 58024, 8549, 132548 113417, 87558, 302410 68334, 50863, 190043 34143, 20684, 263813 8163, 169657, 136482 105516, 342821, 198980 120385, 251989, 155477 71919, 106883, 263813 120061, 290530, 431949 1840, 45189, 182933 12225, 78503, 307606 94333, 410342, 302410 110112, 279824, 111106 52274, 154682, 129428 93094, 173811, 358612 108114, 436320, 189303 102897, 443663, 182933 87576, 152149, 130704 81954, 354735, 129428 149, 60954, 52300 80202, 256969, 302410 64694, 304643, 52300 108535, 256138, 253697 113718, 257299, 129428 43664, 249473, 146615 31969, 174893, 104513 95719, 158561, 361643 42625, 56205, 107749 92227, 107229, 194319 114094, 43563, 447975 8255, 450688, 333316 25043, 227106, 96823 120195, 196420, 360 74384, 304817, 245385 29059, 356727, 245385 31275, 80284, 167168 105296, 89162, 243781 126817, 94328, 183185 18677, 183543, 198980 105274, 18042, 358612 71170, 257054, 44373""" for u_i_tag in user_items.split('\n'): u, i, i_tag = eval(u_i_tag) args['user'], args['item'], args['i_tag'] = u, i, i_tag name = f"main[{u}, {i}, {i_tag}]" im = exp.run(f"{exp_folder}/{name}", Args(args))
parser.add_argument('--by-rank', type=int, default=1) parser.add_argument('--n-components', type=int, default=150) parser.add_argument('--rank-distribution', type=str, default='uniform') parser.add_argument('--examples', type=int, default=8) parser.add_argument('--k', type=int, default=10) parser.add_argument('--k2', type=int, default=10) parser.add_argument('--algorithm', type=str, default='vbpr') # vbpr, deepstyle parser.add_argument('--experiment_name', type=str, default='exp_defualt') args = parser.parse_args() dataset_name = "Clothing_Shoes_and_Jewelry" dataset = RecSysDataset(dataset_name) if args.algorithm == "vbpr": model = VBPR(dataset.n_users, dataset.n_items, dataset.corpus.image_features, args.k, args.k2) elif args.algorithm == "deepstyle": model = DeepStyle(dataset.n_users, dataset.n_items, dataset.n_categories, dataset.corpus.image_features, dataset.corpus.item_category, args.k) model.load( f'../data/dataset/{dataset_name}/models/{args.algorithm}_resnet50.pth') print(args)
def main(): print('Loading data...') train, valid, test = load_data(args.dataset_path, valid_portion=args.valid_portion) train_data = RecSysDataset(train) valid_data = RecSysDataset(valid) test_data = RecSysDataset(test) train_loader = DataLoader(train_data, batch_size = args.batch_size, shuffle = True, collate_fn = collate_fn) valid_loader = DataLoader(valid_data, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn) test_loader = DataLoader(test_data, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn) n_items = 37484 if args.test: for i in range(5): results=np.zeros((3,3)) model = NARM(n_items, args.hidden_size, args.embed_dim, args.batch_size).to(device) optimizer = optim.Adam(model.parameters(), args.lr) criterion = nn.CrossEntropyLoss() scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc) for epoch in tqdm(range(args.epoch)): # train for one epoch scheduler.step(epoch=epoch) trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, log_aggr=1000) model.eval() recalls5,recalls10,recalls20 = [],[],[] mrrs5,mrrs10,mrrs20 = [],[],[] ndcgs5,ndcgs10,ndcgs20 = [],[],[] with torch.no_grad(): for seq, target, lens in tqdm(valid_loader): seq = seq.to(device) target = target.to(device) outputs = model(seq, lens) logits = F.softmax(outputs, dim=1) recall5, mrr5, ndcg5 = metric.evaluate(logits, target, k=5) recall10, mrr10, ndcg10 = metric.evaluate(logits, target, k=10) recall20, mrr20, ndcg20 = metric.evaluate(logits, target, k=args.topk) recalls5.append(recall5) mrrs5.append(mrr5) ndcgs5.append(ndcg5) recalls10.append(recall10) mrrs10.append(mrr10) ndcgs10.append(ndcg10) recalls20.append(recall20) mrrs20.append(mrr20) ndcgs20.append(ndcg20) results[0,0]=np.mean(recalls5) results[0,1]=np.mean(mrrs5) results[0,2]=np.mean(ndcgs5) results[1, 0] = np.mean(recalls10) results[1, 1] = np.mean(mrrs10) results[1, 2] = np.mean(ndcgs10) results[2, 0] = np.mean(recalls20) results[2, 1] = np.mean(mrrs20) results[2, 2] = np.mean(ndcgs20) with open('recsys19/test_performances_on.txt', 'a') as f: f.write( str(results) + '\n') model = NARM(n_items, args.hidden_size, args.embed_dim, args.batch_size).to(device) optimizer = optim.Adam(model.parameters(), args.lr) criterion = nn.CrossEntropyLoss() scheduler = StepLR(optimizer, step_size = args.lr_dc_step, gamma = args.lr_dc) for epoch in tqdm(range(args.epoch)): # train for one epoch scheduler.step(epoch = epoch) trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, log_aggr = 1000) recall, mrr ,ndcg= validate(test_loader, model) print('Epoch {} validation: Recall@{}: {:.4f}, MRR@{}: {:.4f} \n'.format(epoch, args.topk, recall, args.topk, mrr)) # store best loss and save a model checkpoint ckpt_dict = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(ckpt_dict, 'latest_checkpoint.pth.tar')