def __init__(self, num_recs, dataset_args, model_args, train_args): self.num_recs = num_recs self.train_args = train_args self.data = MovieLens(**dataset_args).data.to(train_args['device']) model = PGATNetEx(self.data.num_nodes[0], self.data.num_relations[0], **model_args) if not dataset_args['debug']: model.load_state_dict(torch.load(train_args)) self.model = model.to(train_args['device'])
def __init__(self, num_recs, dataset_args, model_args, device_args): self.num_recs = num_recs self.device_args = device_args self.dataset = MovieLens(**dataset_args) self.data = self.dataset.data.to(device_args['device']) model_path = model_args['model_path'] model_path = os.path.join( model_path, 'weights{}.pkl'.format(self.dataset.build_suffix())) del model_args['model_path'] self.model = PAGATNet(num_nodes=self.data.num_nodes[0], **model_args).to(device_args['device']) self.model.eval() try: self.model.load_state_dict( torch.load(model_path, map_location=torch.device('cpu'))) print("Model from {} successfully loaded!".format(model_path)) except: print("No weights found in {}! Try use random initialized model.". format(model_path)) self.recommended = []
class PGATRecSys(object): def __init__(self, num_recs, dataset_args, model_args, device_args): self.num_recs = num_recs self.device_args = device_args self.dataset = MovieLens(**dataset_args) self.data = self.dataset.data.to(device_args['device']) model_path = model_args['model_path'] model_path = os.path.join( model_path, 'weights{}.pkl'.format(self.dataset.build_suffix())) del model_args['model_path'] self.model = PAGATNet(num_nodes=self.data.num_nodes[0], **model_args).to(device_args['device']) self.model.eval() try: self.model.load_state_dict( torch.load(model_path, map_location=torch.device('cpu'))) print("Model from {} successfully loaded!".format(model_path)) except: print("No weights found in {}! Try use random initialized model.". format(model_path)) self.recommended = [] def get_top_n_popular_items(self, n=10): """ Get the top n movies from self.data.ratings. Remove the duplicates in self.data.ratings and sort it by movie count. After you find the top N popular movies' item id, look over the details information of item in self.data.movies :param n: the number of items, int :return: df: popular item dataframe, df """ ratings_df = self.data.ratings[0][['iid', 'movie_count']] ratings_df = ratings_df.sort_values(by='movie_count', ascending=False) ratings_df = ratings_df.drop_duplicates(subset=['iid']) popular_iids = [iid for iid in ratings_df.iid][:n] item_df = self.data.items[0] popular_item_df = item_df[item_df.iid.isin(popular_iids)] return popular_item_df def build_user(self, iids, demographic_info): """ Build user profiles given the historical user interactions :param iids: user selected item ids, list :param demographic_info: (gender, occupation), tuple :return: """ self.base_iids = iids self.demographic_info = demographic_info # Build edges for new user self.new_user_nid = self.model.node_emb.weight.shape[0] new_user_gender_nid = self.data.e2nid[0]['gender'][demographic_info[0]] new_user_occ_nid = self.data.e2nid[0]['occ'][int(demographic_info[1])] i_nids = [self.data.e2nid[0]['iid'][iid] for iid in iids] row = i_nids + [new_user_gender_nid, new_user_occ_nid] col = [self.new_user_nid for i in range(len(iids) + 2)] self.new_edge_index = torch.from_numpy(np.array([row, col])).long().to( self.device_args['device']) # Build path begins and ends with new_path_np = utils.path.join(self.data.edge_index, self.new_edge_index) self.new_path = torch.from_numpy(new_path_np).long().to( self.device_args['device']) # Get new user embedding by applying message passing self.new_user_emb = torch.nn.Embedding( 1, self.model.node_emb.weight.shape[1], max_norm=1, norm_type=2.0) new_node_emb = torch.cat( (self.model.node_emb.weight, self.new_user_emb.weight), dim=0) self.propagated_new_user_emb = self.model(new_node_emb, self.new_path)[0][-1, :] print('user building done...') def get_recommendations(self, rs_proportion): iids = self.get_top_n_popular_items(200).iid iids = [iid for iid in iids if iid not in self.recommended] rec_iids = [iid for iid in iids if iid not in self.base_iids] rec_nids = [self.data.e2nid[0]['iid'][iid] for iid in rec_iids] mask = np.isin(self.data.path_np[0][-1, :], rec_nids) full_path_index = torch.from_numpy(self.data.path_np[0][:, mask]).to( self.device_args['device']) propagated_node_emb = self.model(self.model.node_emb.weight, full_path_index)[0] rec_item_emb = propagated_node_emb[rec_nids, :] est_feedback = torch.sum(self.propagated_new_user_emb * rec_item_emb, dim=1).reshape(-1).cpu().detach().numpy() rec_iid_idx = [i for i in np.argsort(est_feedback)] # [:self.num_recs] rec_iids = [rec_iids[idx] for idx in rec_iid_idx] # how to know what is the explanation type of rec_iids exp_tuple = [self.get_explanation(iid) for iid in rec_iids] exp, expl_types = [_[0] for _ in exp_tuple], [_[1] for _ in exp_tuple] iui_rec_index = [ idx for idx, expl_type in enumerate(expl_types) if expl_type == 'IUI' ][:rs_proportion['IUI']] iui_rec_iids = [rec_iids[idx] for idx in iui_rec_index] iui_rec_exp = [exp[idx] for idx in iui_rec_index] uiu_rec_index = [ idx for idx, expl_type in enumerate(expl_types) if expl_type == 'UIU' ][:rs_proportion['UIU']] uiu_rec_iids = [rec_iids[idx] for idx in uiu_rec_index] uiu_rec_exp = [exp[idx] for idx in uiu_rec_index] iudd_rec_index = [ idx for idx, expl_type in enumerate(expl_types) if expl_type == 'IUDD' ][:rs_proportion['IUDD']] iudd_rec_iids = [rec_iids[idx] for idx in iudd_rec_index] iudd_rec_exp = [exp[idx] for idx in iudd_rec_index] uicc_rec_index = [ idx for idx, expl_type in enumerate(expl_types) if expl_type == 'UICC' ][:rs_proportion['UICC']] uicc_rec_iids = [rec_iids[idx] for idx in uicc_rec_index] uicc_rec_exp = [exp[idx] for idx in uicc_rec_index] # iui_rec_padded_index = [idx for idx, expl_type in enumerate(expl_types) if expl_type == 'IUI'][rs_proportion['IUI']:] # iui_rec_padded_iids = [rec_iids[idx] for idx in iui_rec_padded_index] # iui_rec_padded_exp = [exp[idx] for idx in iui_rec_padded_index] temp_final_rec_iids = iui_rec_iids + uiu_rec_iids + iudd_rec_iids + uicc_rec_iids padded_rec_index = [idx for idx, expl_type in enumerate(expl_types)] padded_rec_iids = [ iid for iid in rec_iids if iid not in temp_final_rec_iids ] padded_rec_exp = [exp[idx] for idx in padded_rec_index] final_rec_iids = (temp_final_rec_iids + padded_rec_iids)[:10] temp_final_exp = iui_rec_exp + uiu_rec_exp + iudd_rec_exp + uicc_rec_exp final_exp = (temp_final_exp + padded_rec_exp)[:10] self.recommended += final_rec_iids item_df = self.data.items[0] rec_item_df = item_df[item_df.iid.isin(final_rec_iids)] return rec_item_df, final_exp def get_explanation(self, iid): movie_nid = self.data.e2nid[0]['iid'][iid] row = [movie_nid, self.new_user_nid] col = [self.new_user_nid, movie_nid] expl_edge_index = torch.from_numpy(np.array([row, col])).long().to( self.device_args['device']) exist_edge_index = torch.cat( (self.data.edge_index, self.new_edge_index), dim=1) new_path_np = utils.path.join(exist_edge_index, expl_edge_index) new_path = torch.from_numpy(new_path_np).long().to( self.device_args['device']) new_node_emb = torch.cat( (self.model.node_emb.weight, self.new_user_emb.weight), dim=0) att = self.model.forward(new_node_emb, new_path)[1] opt_path = new_path[:, torch.argmax(att)].numpy() e = self.data.nid2e[0][opt_path[0]] if e[0] == 'uid': expl = 'Uid0--Iid{}--Uid{}'.format(iid, e[1]) expl_type = 'UIU' elif e[0] == 'iid': expl = 'Iid{}--Uid0--Iid{}'.format(iid, e[1]) expl_type = 'IUI' elif e[0] == 'gender' or e[0] == 'occ': expl = 'Iid{}--Uid0--DFType{}--DFValue{}'.format(iid, e[0], e[1]) expl_type = 'IUDD' else: expl = 'Uid0--Iid{}--CFType{}--CFValue{}'.format(iid, e[0], e[1]) expl_type = 'UICC' return expl, expl_type
'batch_size': args.batch_size, 'weight_decay': args.weight_decay, 'lr': args.lr, 'device': device, 'weights_folder': weights_folder, 'logger_folder': logger_folder } rec_args = {'num_recs': args.num_recs} print('dataset params: {}'.format(dataset_args)) print('task params: {}'.format(model_args)) print('train params: {}'.format(train_args)) print('rec params: {}'.format(rec_args)) if __name__ == '__main__': randomizer = random.Random(2019) dataset = MovieLens(**dataset_args) dataset.data = dataset.data.to(train_args['device']) data = dataset.data train_pos_unid_inid_map, test_pos_unid_inid_map, neg_unid_inid_map = \ data.train_pos_unid_inid_map[0], data.test_pos_unid_inid_map[0], data.neg_unid_inid_map[0] model = PAGATNet(num_nodes=dataset.data.num_nodes[0], **model_args).to(train_args['device']) optimizer = Adam(model.parameters(), lr=train_args['lr'], weight_decay=train_args['weight_decay']) if torch.cuda.is_available(): torch.cuda.synchronize() t_start = time.perf_counter()
from torch.nn import Linear import torch_geometric.transforms as T from torch_geometric.datasets import MovieLens from torch_geometric.nn import SAGEConv, to_hetero parser = argparse.ArgumentParser() parser.add_argument('--use_weighted_loss', action='store_true', help='Whether to use weighted MSE loss.') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/MovieLens') dataset = MovieLens(path, model_name='all-MiniLM-L6-v2') data = dataset[0].to(device) # Add user node features for message passing: data['user'].x = torch.eye(data['user'].num_nodes, device=device) del data['user'].num_nodes # Add a reverse ('movie', 'rev_rates', 'user') relation for message passing: data = T.ToUndirected()(data) del data['movie', 'rev_rates', 'user'].edge_label # Remove "reverse" label. # Perform a link-level split into training, validation, and test edges: train_data, val_data, test_data = T.RandomLinkSplit( num_val=0.1, num_test=0.1, neg_sampling_ratio=0.0,
import json import os import pandas as pd from torch_geometric.datasets import MovieLens apikey = '' key1 = 'e760129c' key2 = 'e44e5305' key3 = '8403a97b' key4 = '192c6b0e' root = osp.join('.', 'tmp', 'ml') data = MovieLens(root=root, name='1m', num_core=10).data movies = data.items[0] director_list = [] actor_list = [] for i, (title, year) in enumerate(zip(movies.title, movies.year)): if i in range(0, 1000): apikey = key1 if i in range(1000, 2000): apikey = key2 if i in range(2000, 3000): apikey = key3 if i in range(3000, 4000): apikey = key4