def evaluate_agent(agent, epoch, batch_size, recom_length, validSample, testSample, device, eval_type='valid', final_eval=False): correct = 0. correctk = 0. if eval_type == 'valid': sample = validSample else: sample = testSample print('\nVALIDATION : Epoch {0}'.format(epoch)) for i in range(0, sample.length(), batch_size): # prepare batch embed_batch, length, tgt_batch, reward_batch, action_batch = getBatch_pred( i, i + batch_size, sample, None, recom_length) embed_batch, tgt_batch, action_batch, reward_batch = Variable( embed_batch.cuda()), Variable(tgt_batch.cuda()), Variable( action_batch.cuda()), Variable(reward_batch.cuda()) k = embed_batch.size(0) agent.eval() probs, _, _ = agent((embed_batch, length), True) mask = torch.zeros(k, probs.size(1)).cuda() mask.scatter_(1, action_batch, 1.) outputk = probs * mask output_click = outputk.data.max(1)[1] correct += output_click.data.long().eq( tgt_batch.data.long()).cpu().numpy().sum() all_prob_output = outputk.data.cpu().numpy() for i in range(len(all_prob_output)): pos = int( np.argwhere( np.argsort(-all_prob_output[i]) == tgt_batch.data.long().cpu().numpy()[i])[0] + 1) # p@k if pos <= 10: correctk += 1 eval_acc = np.round(100 * correct / sample.length(), 2) eval_prek = np.round(100 * correctk / sample.length(), 2) if final_eval: print('finalgrep : accuracy {0} : {1}, precision@k {0} : {2}'.format( eval_type, eval_acc, eval_prek)) else: print( 'togrep : results : epoch {0} ; accuracy {1} : {2}, precision@10 {1} : {3}' .format(epoch, eval_type, eval_acc, eval_prek)) return eval_acc, eval_prek
def evaluate_interaction(model, epoch, batch_size, recom_length, validSample, testSample, loss_fn_target, loss_fn_reward, device, eval_type='valid', final_eval=False): correct = 0. correct_reward = 0. mapeach = 0. all_costs = [] if eval_type == 'valid': sample = validSample else: sample = testSample print('\nVALIDATION : Epoch {0}'.format(epoch)) for i in range(0, sample.length(), batch_size): # prepare batch embed_batch, length, tgt_batch, reward_batch, action_batch = getBatch_pred( i, i + batch_size, sample, None, recom_length) embed_batch, tgt_batch, reward_batch, action_batch = Variable( embed_batch.cuda()), Variable(tgt_batch.cuda()), Variable( reward_batch.cuda()), Variable(action_batch.cuda()) k = embed_batch.size(0) generator, agent = model generator.eval() agent.eval() enc_out, h = generator((embed_batch, length)) _, action, _ = agent((embed_batch, length), True) output = generator.next_click(enc_out[:, -1, :], action, len(embed_batch)) reward, reward_logit = generator.get_reward( tgt_batch.view(-1, 1), enc_out[:, -1, :].unsqueeze(0)) pred_reward = torch.round(reward.data) correct_reward += pred_reward.long().eq( reward_batch.data.long()).cpu().sum().numpy() pred = output.data.max(1)[1] correct += pred.long().eq(tgt_batch.data.long()).cpu().numpy().sum() all_prob = output.data.cpu().numpy() for i in range(len(output)): mapeach += 1 / int((np.argwhere( np.argsort(-all_prob[i]) == tgt_batch.data.long().cpu().numpy()[i])[0] + 1)) # loss with torch.no_grad(): loss_pred = loss_fn_target(output, tgt_batch) loss_reward = loss_fn_reward(reward_logit, reward_batch) loss = loss_pred + loss_reward all_costs.append(loss.data.cpu().numpy()) eval_acc = np.round(100 * correct / sample.length(), 2) eval_map = np.round(100 * mapeach / sample.length(), 2) eval_acc_reward = np.round(100 * correct_reward / sample.length(), 2) if final_eval: print( 'finalgrep : accuracy {0} : {1}, map {0} : {2}, accuracy reward {0} : {3}' .format(eval_type, eval_acc, eval_map, eval_acc_reward)) else: print( 'togrep : results : epoch {0} ; mean accuracy pred {1} : {2}, map pred {1} : {3}; mean accuracy reward {1} : {4}' .format(epoch, eval_type, eval_acc, eval_map, eval_acc_reward)) return eval_acc, eval_map, eval_acc_reward, np.mean(all_costs)
def evaluate_user(generator, epoch, batch_size, recom_length, validSample, testSample, loss_fn_target, loss_fn_reward, device, eval_type='valid', model_type='recommend', final_eval=False): correct = 0. correctk = 0. correct_reward = 0. all_costs = [] if eval_type == 'valid': sample = validSample else: sample = testSample print('\nVALIDATION : Epoch {0}'.format(epoch)) with torch.no_grad(): loss_fn_target = nn.CrossEntropyLoss() loss_fn_reward = nn.BCEWithLogitsLoss() loss_fn_target.size_average = True loss_fn_target.to(device) loss_fn_reward.size_average = True loss_fn_reward.to(device) for i in range(0, sample.length(), batch_size): # prepare batch embed_batch, length, tgt_batch, reward_batch, action_batch = getBatch_pred( i, i + batch_size, sample, None, recom_length) embed_batch, tgt_batch, reward_batch, action_batch = Variable( embed_batch.cuda()), Variable(tgt_batch.cuda()), Variable( reward_batch.cuda()), Variable(action_batch.cuda()) k = embed_batch.size(0) # model(agent) forward generator.eval() enc_out, h = generator((embed_batch, length)) if model_type == 'recommend': output = generator.next_click(enc_out[:, -1, :], action_batch, len(embed_batch)) else: output = generator.next_simple(enc_out[:, -1, :]) output_click = output.data.max(1)[1] correct += output_click.data.long().eq( tgt_batch.data.long()).cpu().numpy().sum() all_prob_output = output.data.cpu().numpy() reward, reward_logit = generator.get_reward( tgt_batch.view(-1, 1), enc_out[:, -1, :].unsqueeze(0)) pred_reward = torch.round(reward) correct_reward += pred_reward.long().eq( reward_batch.data.long()).cpu().sum().numpy() for i in range(len(all_prob_output)): pos = int( np.argwhere( np.argsort(-all_prob_output[i]) == tgt_batch.data.long().cpu().numpy()[i])[0] + 1) # p@k if pos <= 10: correctk += 1 # loss with torch.no_grad(): loss_pred = loss_fn_target(output, tgt_batch) loss_reward = loss_fn_reward(reward_logit, reward_batch) loss = loss_pred + loss_reward all_costs.append(loss.data.cpu().numpy()) eval_acc = np.round(100 * correct / sample.length(), 2) eval_prek = np.round(100 * correctk / sample.length(), 2) eval_acc_rewd = np.round(100 * correct_reward / sample.length(), 2) if final_eval: print('finalgrep : accuracy {0} : {1}, precision@k {0} : {2}'.format( eval_type, eval_acc, eval_prek)) else: print( 'togrep : results : epoch {0} ; accuracy {1} : {2}, precision@10 {1} : {3}, reward_accuracy {1} {4}' .format(epoch, eval_type, eval_acc, eval_prek, eval_acc_rewd)) return eval_acc, eval_prek, eval_acc_rewd, np.mean(all_costs)
def train_pred_each(generator, epoch, trainSample, optimizer, batch_size, embed_dim, recom_length, loss_fn_target, loss_fn_reward, device, generator_only = True, action_given = True, only_rewards = False): print('\nGENERATOR TRAINING : Epoch ' + str(epoch)) generator.train() all_costs = [] logs = [] decay=0.95 max_norm=5 #loss_fn.size_average = False all_num=0 last_time = time.time() correct = 0. correct_reward = 0. #mapeach=0. correctk = 0. #Adjust the learning rate if epoch>1: optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * decay print('Learning rate : {0}'.format(optimizer.param_groups[0]['lr'])) for stidx in range(0, trainSample.length(), batch_size): # prepare batch embed_batch, length, tgt_batch, reward_batch, action_batch = getBatch_pred(stidx, stidx + batch_size, trainSample, embed_dim, recom_length) embed_batch,tgt_batch, reward_batch, action_batch = Variable(embed_batch.to(device)), Variable(tgt_batch.to(device)), Variable(reward_batch.to(device)), Variable(action_batch.to(device)) k = embed_batch.size(0) #Actual batch size # model forward enc_out, h = generator((embed_batch, length)) if generator_only: if action_given == True: output = generator.next_click(enc_out[:,-1,:], action_batch, len(embed_batch)) else: output = generator.next_simple(enc_out[:,-1,:]) else: _, action, _ = agent((embed_batch, length)) output = generator.next_click(enc_out[:,-1,:], action, len(embed_batch)) #Get next click reward, reward_logit = generator.get_reward(tgt_batch.view(-1,1), enc_out[:,-1,:].unsqueeze(0)) all_prob_output = output.data.cpu().numpy() # reward correctness pred_reward = torch.round(reward.data)#.max(1)[1] correct_reward += pred_reward.long().eq(reward_batch.data.long()).cpu().sum().numpy() for i in range(len(all_prob_output)): pos = int(np.argwhere(np.argsort(-all_prob_output[i])==tgt_batch.data.long().cpu().numpy()[i])[0]+1) #mapeach += 1/pos # p@k if pos <= 1: correct += 1 if pos <= 10: correctk += 1 # loss loss_pred = loss_fn_target(output, tgt_batch) #weight_loss = (reward_batch + 1) #** 5.3 weight_loss = torch.FloatTensor(k).fill_(1).cuda() loss_fn_reward = nn.BCEWithLogitsLoss(weight_loss) loss_fn_target.size_average = True loss_reward = loss_fn_reward(reward_logit, reward_batch) if not only_rewards: loss = loss_pred + loss_reward else: loss = loss_reward #Unable updates of the rnn model for name, param in generator.named_parameters(): if 'embedding' in name or 'encoder' or 'enc2out' in name: param.requires_grad = False all_costs.append(loss.data.cpu().numpy()) # backward optimizer.zero_grad() loss.backward() #Gradient clipping clip_grad_norm_(filter(lambda p: p.requires_grad, generator.parameters()), 5) #clip_grad_value_(filter(lambda p: p.requires_grad, generator.parameters()), 1) # optimizer step optimizer.step() train_acc = np.round(100 * correct/trainSample.length(), 2) #train_map=np.round(100 * mapeach/trainSample.length(), 2) train_preck=np.round(100 * correctk/trainSample.length(), 2) train_reward_acc = np.round(100 * correct_reward/trainSample.length(), 2) print('results : epoch {0} ; mean accuracy pred : {1}; mean P@10 pred: {2}; mean accuracy reward: {3}'.format(epoch, train_acc,train_preck, train_reward_acc)) return train_acc, train_preck, np.mean(all_costs)