def _generate_enc_graph(self, rating_pairs, rating_values, add_support=False): user_movie_R = np.zeros((self._num_user, self._num_movie), dtype=np.float32) user_movie_R[rating_pairs] = rating_values data_dict = dict() num_nodes_dict = {'user': self._num_user, 'movie': self._num_movie} rating_row, rating_col = rating_pairs for rating in self.possible_rating_values: ridx = np.where(rating_values == rating) rrow = rating_row[ridx] rcol = rating_col[ridx] rating = to_etype_name(rating) data_dict.update({ ('user', str(rating), 'movie'): (rrow, rcol), ('movie', 'rev-%s' % str(rating), 'user'): (rcol, rrow) }) graph = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict) # sanity check assert len(rating_pairs[0]) == sum( [graph.number_of_edges(et) for et in graph.etypes]) // 2 if add_support: def _calc_norm(x): x = x.numpy().astype('float32') x[x == 0.] = np.inf x = th.FloatTensor(1. / np.sqrt(x)) return x.unsqueeze(1) user_ci = [] user_cj = [] movie_ci = [] movie_cj = [] for r in self.possible_rating_values: r = to_etype_name(r) user_ci.append(graph['rev-%s' % r].in_degrees()) movie_ci.append(graph[r].in_degrees()) if self._symm: user_cj.append(graph[r].out_degrees()) movie_cj.append(graph['rev-%s' % r].out_degrees()) else: user_cj.append(th.zeros((self.num_user, ))) movie_cj.append(th.zeros((self.num_movie, ))) user_ci = _calc_norm(sum(user_ci)) movie_ci = _calc_norm(sum(movie_ci)) if self._symm: user_cj = _calc_norm(sum(user_cj)) movie_cj = _calc_norm(sum(movie_cj)) else: user_cj = th.ones(self.num_user, ) movie_cj = th.ones(self.num_movie, ) graph.nodes['user'].data.update({'ci': user_ci, 'cj': user_cj}) graph.nodes['movie'].data.update({'ci': movie_ci, 'cj': movie_cj}) return graph
def flatten_etypes(pair_graph, dataset, segment): n_users = pair_graph.number_of_nodes('user') n_movies = pair_graph.number_of_nodes('movie') src = [] dst = [] labels = [] ratings = [] for rating in dataset.possible_rating_values: src_etype, dst_etype = pair_graph.edges(order='eid', etype=to_etype_name(rating)) src.append(src_etype) dst.append(dst_etype) label = np.searchsorted(dataset.possible_rating_values, rating) ratings.append(th.LongTensor(np.full_like(src_etype, rating))) labels.append(th.LongTensor(np.full_like(src_etype, label))) src = th.cat(src) dst = th.cat(dst) ratings = th.cat(ratings) labels = th.cat(labels) flattened_pair_graph = dgl.heterograph( {('user', 'rate', 'movie'): (src, dst)}, num_nodes_dict={ 'user': n_users, 'movie': n_movies }) flattened_pair_graph.edata['rating'] = ratings flattened_pair_graph.edata['label'] = labels return flattened_pair_graph
def forward(self, graph, ufeat=None, ifeat=None): in_feats = {'user': ufeat, 'item': ifeat} mod_args = {} for i, rating in enumerate(self.rating_vals): rating = to_etype_name(rating) rev_rating = 'rev-%s' % rating mod_args[rating] = (self.W_r[rating] if self.W_r is not None else None, ) mod_args[rev_rating] = (self.W_r[rev_rating] if self.W_r is not None else None, ) out_feats = self.conv(graph, in_feats, mod_args=mod_args) # out_feats['item'] = out_feats['item'].squeeze(1) # out_feats['user'] = out_feats['user'].squeeze(1) # out_feats = self.conv2(graph,out_feats, mod_args=mod_args) ufeat = out_feats['user'] ifeat = out_feats['item'] ufeat = ufeat.view(ufeat.shape[0], -1) ifeat = ifeat.view(ifeat.shape[0], -1) # fc and non-linear ufeat = self.agg_act(ufeat) ifeat = self.agg_act(ifeat) ufeat = self.dropout(ufeat) ifeat = self.dropout(ifeat) ufeat = self.ufc(ufeat) ifeat = self.ifc(ifeat) return self.out_act(ufeat), self.out_act(ifeat)
def forward(self, graph, ufeat=None, ifeat=None, Two_Stage=False): """Forward function Parameters ---------- graph : DGLHeteroGraph User-movie rating graph. It should contain two node types: "user" and "movie" and many edge types each for one rating value. ufeat : torch.Tensor, optional User features. If None, using an identity matrix. ifeat : torch.Tensor, optional Movie features. If None, using an identity matrix. Returns ------- new_ufeat : torch.Tensor New user features new_ifeat : torch.Tensor New movie features """ in_feats = {'user': ufeat, 'movie': ifeat} mod_args = {} self.W = th.matmul(self.att, self.basis.view(self.basis_units, -1)) self.W = self.W.view(-1, self.user_in_units, self.msg_units) for i, rating in enumerate(self.rating_vals): rating = to_etype_name(rating) rev_rating = 'rev-%s' % rating #mod_args[rating] = (self.basis,) #mod_args[rev_rating] = (self.basis,) mod_args[rating] = (self.W[i, :, :] if self.W_r is not None else None, Two_Stage) mod_args[rev_rating] = (self.W[i, :, :] if self.W_r is not None else None, Two_Stage) #mod_args[rating] = (self.W_r[rating] if self.W_r is not None else None, Two_Stage) #mod_args[rev_rating] = (self.W_r[rev_rating] if self.W_r is not None else None, Two_Stage) out_feats = self.conv(graph, in_feats, mod_args=mod_args) ufeat = out_feats['user'] ifeat = out_feats['movie'] if in_feats['user'].shape == ufeat.shape: ufeat = ufeat.view(ufeat.shape[0], -1) #+ 0.1 * in_feats['user'] ifeat = ifeat.view(ifeat.shape[0], -1) #+ 0.1 * in_feats['movie'] # fc and non-linear ufeat = self.agg_act(ufeat) ifeat = self.agg_act(ifeat) ufeat = self.dropout(ufeat) ifeat = self.dropout(ifeat) ufeat = self.ufc(ufeat) ifeat = self.ifc(ifeat) return ufeat, ifeat
def forward(self, enc_graph, dec_graph, ufeat, ifeat, Two_Stage=False): #user_out = [] #movie_out = [] for i in range(0, args.layers): user_o, movie_o = self.encoder[i](enc_graph, ufeat, ifeat, Two_Stage) if i == 0: user_out = user_o movie_out = movie_o else: user_out += user_o / float(i + 1) movie_out += movie_o / float(i + 1) #user_out.append(user_o) #movie_out.append(movie_o) ufeat = user_o ifeat = movie_o #print("user_out:", user_o[0]) #print("movie_out:", movie_o[0]) #pred_ratings = self.decoder2(dec_graph, th.cat([user_out[0], user_out[1]], 1), th.cat([movie_out[1], movie_out[0]], 1)) #user_out = th.cat(user_out, 1) #movie_out = th.cat(movie_out, 1) pred_ratings = self.decoder(dec_graph, user_out, movie_out) W_r_last = None reg_loss = 0.0 ''' for rating in self.rating_vals: rating = to_etype_name(rating) if W_r_last is not None: reg_loss += th.sum((self.encoder[0].W_r[rating] - W_r_last)**2) W_r_last = self.encoder[0].W_r[rating] #W_r_last_2 = self.encoder_2.W_r[rating] ''' W = th.matmul( self.encoder[0].att, self.encoder[0].basis.view(self.encoder[0].basis_units, -1)) #print("forward W:", W.shape) W = W.view(len(self.rating_vals), self.src_in_units, -1) for i, rating in enumerate(self.rating_vals): rating = to_etype_name(rating) if i != 0: reg_loss += -th.sum( th.cosine_similarity(W[i, :, :], W[i - 1, :, :], dim=1)) return pred_ratings, reg_loss, user_out, movie_out, W
def forward(self, graph, ufeat=None, ifeat=None): in_feats = {'user': ufeat, 'item': ifeat} mod_args = {} for i, rating in enumerate(self.rating_vals): rating = to_etype_name(rating) rev_rating = 'rev-%s' % rating mod_args[rating] = (self.W_r[rating] if self.W_r is not None else None, ) mod_args[rev_rating] = (self.W_r[rev_rating] if self.W_r is not None else None, ) out_feats = self.conv(graph, in_feats, mod_args=mod_args) ufeat = out_feats['user'] ifeat = out_feats['item'] ufeat = ufeat.view(ufeat.shape[0], -1) ifeat = ifeat.view(ifeat.shape[0], -1) return ufeat, ifeat
def forward(self, graph, ufeat=None, ifeat=None): """Forward function Parameters ---------- graph : DGLHeteroGraph User-item rating graph. It should contain two node types: "user" and "item" and many edge types each for one rating value. ufeat : torch.Tensor, optional User features. If None, using an identity matrix. ifeat : torch.Tensor, optional Movie features. If None, using an identity matrix. Returns ------- new_ufeat : torch.Tensor New user features new_ifeat : torch.Tensor New item features """ in_feats = {'user': ufeat, 'item': ifeat} mod_args = {} for i, rating in enumerate(self.rating_vals): rating = to_etype_name(rating) rev_rating = 'rev-%s' % rating mod_args[rating] = (self.W_r[rating] if self.W_r is not None else None, ) mod_args[rev_rating] = (self.W_r[rev_rating] if self.W_r is not None else None, ) out_feats = self.conv(graph, in_feats, mod_args=mod_args) ufeat = out_feats['user'] ifeat = out_feats['item'] ufeat = ufeat.view(ufeat.shape[0], -1) ifeat = ifeat.view(ifeat.shape[0], -1) # fc and non-linear ufeat = self.agg_act(ufeat) ifeat = self.agg_act(ifeat) ufeat = self.dropout(ufeat) ifeat = self.dropout(ifeat) ufeat = self.ufc(ufeat) ifeat = self.ifc(ifeat) return self.out_act(ufeat), self.out_act(ifeat)
def _npairs(graph): rst = 0 for r in self.possible_rating_values: r = to_etype_name(r) rst += graph.number_of_edges(str(r)) return rst
def run(proc_id, n_gpus, args, devices, dataset): dev_id = devices[proc_id] if n_gpus > 1: dist_init_method = 'tcp://{master_ip}:{master_port}'.format( master_ip='127.0.0.1', master_port='12345') world_size = n_gpus th.distributed.init_process_group(backend="nccl", init_method=dist_init_method, world_size=world_size, rank=dev_id) if n_gpus > 0: th.cuda.set_device(dev_id) train_labels = dataset.train_labels train_truths = dataset.train_truths num_edges = train_truths.shape[0] reverse_types = { to_etype_name(k): 'rev-' + to_etype_name(k) for k in dataset.possible_rating_values } reverse_types.update({v: k for k, v in reverse_types.items()}) sampler = dgl.dataloading.MultiLayerNeighborSampler([None], return_eids=True) dataloader = dgl.dataloading.EdgeDataLoader(dataset.train_enc_graph, { to_etype_name(k): th.arange( dataset.train_enc_graph.number_of_edges(etype=to_etype_name(k))) for k in dataset.possible_rating_values }, sampler, use_ddp=n_gpus > 1, batch_size=args.minibatch_size, shuffle=True, drop_last=False) if proc_id == 0: valid_dataloader = dgl.dataloading.EdgeDataLoader( dataset.valid_dec_graph, th.arange(dataset.valid_dec_graph.number_of_edges()), sampler, g_sampling=dataset.valid_enc_graph, batch_size=args.minibatch_size, shuffle=False, drop_last=False) test_dataloader = dgl.dataloading.EdgeDataLoader( dataset.test_dec_graph, th.arange(dataset.test_dec_graph.number_of_edges()), sampler, g_sampling=dataset.test_enc_graph, batch_size=args.minibatch_size, shuffle=False, drop_last=False) nd_possible_rating_values = \ th.FloatTensor(dataset.possible_rating_values) nd_possible_rating_values = nd_possible_rating_values.to(dev_id) net = Net(args=args, dev_id=dev_id) net = net.to(dev_id) if n_gpus > 1: net = DistributedDataParallel(net, device_ids=[dev_id], output_device=dev_id) rating_loss_net = nn.CrossEntropyLoss() learning_rate = args.train_lr optimizer = get_optimizer(args.train_optimizer)(net.parameters(), lr=learning_rate) print("Loading network finished ...\n") ### declare the loss information best_valid_rmse = np.inf no_better_valid = 0 best_epoch = -1 count_rmse = 0 count_num = 0 count_loss = 0 print("Start training ...") dur = [] iter_idx = 1 for epoch in range(1, args.train_max_epoch): if n_gpus > 1: dataloader.set_epoch(epoch) if epoch > 1: t0 = time.time() net.train() with tqdm.tqdm(dataloader) as tq: for step, (input_nodes, pair_graph, blocks) in enumerate(tq): head_feat, tail_feat, blocks = load_subtensor( input_nodes, pair_graph, blocks, dataset, dataset.train_enc_graph) frontier = blocks[0] compact_g = flatten_etypes(pair_graph, dataset, 'train').to(dev_id) true_relation_labels = compact_g.edata['label'] true_relation_ratings = compact_g.edata['rating'] head_feat = head_feat.to(dev_id) tail_feat = tail_feat.to(dev_id) frontier = frontier.to(dev_id) pred_ratings = net(compact_g, frontier, head_feat, tail_feat, dataset.possible_rating_values) loss = rating_loss_net(pred_ratings, true_relation_labels.to(dev_id)).mean() count_loss += loss.item() optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(net.parameters(), args.train_grad_clip) optimizer.step() if proc_id == 0 and iter_idx == 1: print("Total #Param of net: %d" % (torch_total_param_num(net))) real_pred_ratings = ( th.softmax(pred_ratings, dim=1) * nd_possible_rating_values.view(1, -1)).sum(dim=1) rmse = ((real_pred_ratings - true_relation_ratings.to(dev_id))**2).sum() count_rmse += rmse.item() count_num += pred_ratings.shape[0] tq.set_postfix( { 'loss': '{:.4f}'.format(count_loss / iter_idx), 'rmse': '{:.4f}'.format(count_rmse / count_num) }, refresh=False) iter_idx += 1 if epoch > 1: epoch_time = time.time() - t0 print("Epoch {} time {}".format(epoch, epoch_time)) if epoch % args.train_valid_interval == 0: if n_gpus > 1: th.distributed.barrier() if proc_id == 0: valid_rmse = evaluate(args=args, dev_id=dev_id, net=net, dataset=dataset, dataloader=valid_dataloader, segment='valid') logging_str = 'Val RMSE={:.4f}'.format(valid_rmse) if valid_rmse < best_valid_rmse: best_valid_rmse = valid_rmse no_better_valid = 0 best_epoch = epoch test_rmse = evaluate(args=args, dev_id=dev_id, net=net, dataset=dataset, dataloader=test_dataloader, segment='test') best_test_rmse = test_rmse logging_str += ', Test RMSE={:.4f}'.format(test_rmse) else: no_better_valid += 1 if no_better_valid > args.train_early_stopping_patience\ and learning_rate <= args.train_min_lr: logging.info( "Early stopping threshold reached. Stop training.") break if no_better_valid > args.train_decay_patience: new_lr = max( learning_rate * args.train_lr_decay_factor, args.train_min_lr) if new_lr < learning_rate: logging.info("\tChange the LR to %g" % new_lr) learning_rate = new_lr for p in optimizer.param_groups: p['lr'] = learning_rate no_better_valid = 0 print("Change the LR to %g" % new_lr) # sync on evalution if n_gpus > 1: th.distributed.barrier() if proc_id == 0: print(logging_str) if proc_id == 0: print( 'Best epoch Idx={}, Best Valid RMSE={:.4f}, Best Test RMSE={:.4f}'. format(best_epoch, best_valid_rmse, best_test_rmse))
def __init__( self, rating_vals, user_in_units, movie_in_units, msg_units, out_units, dropout_rate=0.0, agg='stack', # or 'sum' agg_act=None, out_act=None, share_user_item_param=False, device=None): super(GCMCLayer, self).__init__() self.rating_vals = rating_vals self.agg = agg self.share_user_item_param = share_user_item_param self.ufc = nn.Linear(msg_units, out_units) if share_user_item_param: self.ifc = self.ufc else: self.ifc = nn.Linear(msg_units, out_units) if agg == 'stack': # divide the original msg unit size by number of ratings to keep # the dimensionality assert msg_units % len(rating_vals) == 0 msg_units = msg_units // len(rating_vals) self.dropout = nn.Dropout(dropout_rate) self.W_r = nn.ParameterDict() subConv = {} subConv2 = {} for rating in rating_vals: # PyTorch parameter name can't contain "." rating = to_etype_name(rating) rev_rating = 'rev-%s' % rating if share_user_item_param and user_in_units == movie_in_units: self.W_r[rating] = nn.Parameter( th.randn(user_in_units, msg_units)) self.W_r['rev-%s' % rating] = self.W_r[rating] subConv[rating] = GCMCGraphGAT(user_in_units, msg_units, weight=False, device=device, dropout_rate=dropout_rate) subConv[rev_rating] = GCMCGraphGAT(user_in_units, msg_units, weight=False, device=device, dropout_rate=dropout_rate) else: self.W_r = None subConv[rating] = GCMCGraphConv(user_in_units, msg_units, weight=True, device=device, dropout_rate=dropout_rate) subConv[rev_rating] = GCMCGraphConv(movie_in_units, msg_units, weight=True, device=device, dropout_rate=dropout_rate) subConv2[rating] = GCMCGraphConv(msg_units, msg_units, weight=True, device=device, dropout_rate=dropout_rate) subConv2[rev_rating] = GCMCGraphConv(msg_units, msg_units, weight=True, device=device, dropout_rate=dropout_rate) self.conv = dglnn.HeteroGraphConv(subConv, aggregate=agg) # self.conv2 = dglnn.HeteroGraphConv(subConv2, aggregate=agg) self.agg_act = get_activation(agg_act) self.out_act = get_activation(out_act) self.device = device self.reset_parameters()
def _generate_enc_graph(self, rating_pairs, rating_values, add_support=False): user_movie_R = np.zeros((self._num_user, self._num_movie), dtype=np.float32) user_movie_R[rating_pairs] = rating_values movie_user_R = user_movie_R.transpose() rating_graphs = [] rating_row, rating_col = rating_pairs for rating in self.possible_rating_values: ridx = np.where(rating_values == rating) rrow = rating_row[ridx] rcol = rating_col[ridx] rating = to_etype_name(rating) bg = dgl.bipartite((rrow, rcol), 'user', rating, 'movie', num_nodes=(self._num_user, self._num_movie)) rev_bg = dgl.bipartite((rcol, rrow), 'movie', 'rev-%s' % rating, 'user', num_nodes=(self._num_movie, self._num_user)) rating_graphs.append(bg) rating_graphs.append(rev_bg) graph = dgl.hetero_from_relations(rating_graphs) # sanity check assert len(rating_pairs[0]) == sum( [graph.number_of_edges(et) for et in graph.etypes]) // 2 if add_support: def _calc_norm(x): x = x.numpy().astype('float32') x[x == 0.] = np.inf x = th.FloatTensor(1. / np.sqrt(x)) return x.unsqueeze(1) user_ci = [] user_cj = [] movie_ci = [] movie_cj = [] for r in self.possible_rating_values: r = to_etype_name(r) user_ci.append(graph['rev-%s' % r].in_degrees()) movie_ci.append(graph[r].in_degrees()) if self._symm: user_cj.append(graph[r].out_degrees()) movie_cj.append(graph['rev-%s' % r].out_degrees()) else: user_cj.append(th.zeros((self.num_user, ))) movie_cj.append(th.zeros((self.num_movie, ))) user_ci = _calc_norm(sum(user_ci)) movie_ci = _calc_norm(sum(movie_ci)) if self._symm: user_cj = _calc_norm(sum(user_cj)) movie_cj = _calc_norm(sum(movie_cj)) else: user_cj = th.ones(self.num_user, ) movie_cj = th.ones(self.num_movie, ) graph.nodes['user'].data.update({'ci': user_ci, 'cj': user_cj}) graph.nodes['movie'].data.update({'ci': movie_ci, 'cj': movie_cj}) return graph
def forward(self, compact_g, frontier, ufeat, ifeat, possible_rating_values, Two_Stage=False): # user_out, movie_out = self.encoder(frontier, ufeat, ifeat) ''' user_out_2, movie_out_2 = self.encoder_2(frontier, user_out, movie_out) user_out = th.cat([user_out, user_out_2], 1) movie_out = th.cat([movie_out, movie_out_2], 1) ''' user_out = [] movie_out = [] for i in range(0, args.layers): user_o, movie_o = self.encoder[i](frontier[i], ufeat, ifeat) ufeat = user_o ifeat = movie_o user_out.append(user_o) movie_out.append(movie_o) u_size = user_o.shape[0] m_size = movie_o.shape[0] for i in range(0, args.layers): if i == 0: user_o = user_out[i][:u_size, :] movie_o = movie_out[i][:m_size, :] else: user_o += user_out[i][:u_size, :] / float(i + 1) movie_o += movie_out[i][:m_size, :] / float(i + 1) #user_out.append(user_o) #movie_out.append(movie_o) # pred_ratings = self.decoder(compact_g, user_out, movie_out) # W_r_last = None # reg_loss = 0.0 # for rating in self.rating_vals: # rating = to_etype_name(rating) # if W_r_last is not None: # reg_loss += th.sum((self.encoder.W_r[rating] - W_r_last)**2) # W_r_last = self.encoder.W_r[rating] # return pred_ratings, reg_loss pred_ratings = self.decoder(compact_g, user_o, movie_o) W_r_last = None reg_loss = 0.0 ''' for rating in self.rating_vals: rating = to_etype_name(rating) if W_r_last is not None: reg_loss += th.sum((self.encoder[0].W_r[rating] - W_r_last)**2) W_r_last = self.encoder[0].W_r[rating] #W_r_last_2 = self.encoder_2.W_r[rating] ''' W = th.matmul( self.encoder[0].att, self.encoder[0].basis.view(self.encoder[0].basis_units, -1)) W = W.view(len(self.rating_vals), self.src_in_units, -1) for i, rating in enumerate(self.rating_vals): rating = to_etype_name(rating) if i != 0: reg_loss += -th.sum( th.cosine_similarity(W[i, :, :], W[i - 1, :, :], dim=1)) return pred_ratings, reg_loss