class TrainLoop_fusion_gen(): def __init__(self, opt, is_finetune): self.opt=opt self.train_dataset=dataset('data/train_data.jsonl',opt) self.dict=self.train_dataset.word2index self.index2word={self.dict[key]:key for key in self.dict} self.batch_size=self.opt['batch_size'] self.epoch=self.opt['epoch'] self.use_cuda=opt['use_cuda'] if opt['load_dict']!=None: self.load_data=True else: self.load_data=False self.is_finetune=False self.movie_ids = pkl.load(open("data/movie_ids.pkl", "rb")) # Note: we cannot change the type of metrics ahead of time, so you # should correctly initialize to floats or ints here self.metrics_rec={"recall@1":0,"recall@10":0,"recall@50":0,"loss":0,"count":0} self.metrics_gen={"dist1":0,"dist2":0,"dist3":0,"dist4":0,"bleu1":0,"bleu2":0,"bleu3":0,"bleu4":0,"count":0} self.build_model(is_finetune=True) if opt['load_dict'] is not None: # load model parameters if available print('[ Loading existing model params from {} ]' ''.format(opt['load_dict'])) states = self.model.load(opt['load_dict']) else: states = {} self.init_optim( [p for p in self.model.parameters() if p.requires_grad], optim_states=states.get('optimizer'), saved_optim_type=states.get('optimizer_type') ) def build_model(self,is_finetune): self.model = CrossModel(self.opt, self.dict, is_finetune) if self.opt['embedding_type'] != 'random': pass if self.use_cuda: self.model.cuda() def train(self): self.model.load_model() losses=[] best_val_gen=1000 gen_stop=False for i in range(self.epoch*3): train_set=CRSdataset(self.train_dataset.data_process(True),self.opt['n_entity'],self.opt['n_concept']) train_dataset_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=self.batch_size, shuffle=False) num=0 for context,c_lengths,response,r_length,mask_response,mask_r_length,entity,entity_vector,movie,concept_mask,dbpedia_mask,concept_vec, db_vec,rec in tqdm(train_dataset_loader): seed_sets = [] batch_size = context.shape[0] for b in range(batch_size): seed_set = entity[b].nonzero().view(-1).tolist() seed_sets.append(seed_set) self.model.train() self.zero_grad() scores, preds, rec_scores, rec_loss, gen_loss, mask_loss, info_db_loss, info_con_loss=self.model(context.cuda(), response.cuda(), mask_response.cuda(), concept_mask, dbpedia_mask, seed_sets, movie, concept_vec, db_vec, entity_vector.cuda(), rec, test=False) joint_loss=gen_loss losses.append([gen_loss]) self.backward(joint_loss) self.update_params() if num%50==0: print('gen loss is %f'%(sum([l[0] for l in losses])/len(losses))) losses=[] num+=1 output_metrics_gen = self.val(True) if best_val_gen < output_metrics_gen["dist4"]: pass else: best_val_gen = output_metrics_gen["dist4"] self.model.save_model() print("generator model saved once------------------------------------------------") _=self.val(is_test=True) def val(self,is_test=False): self.metrics_gen={"ppl":0,"dist1":0,"dist2":0,"dist3":0,"dist4":0,"bleu1":0,"bleu2":0,"bleu3":0,"bleu4":0,"count":0} self.metrics_rec={"recall@1":0,"recall@10":0,"recall@50":0,"loss":0,"gate":0,"count":0,'gate_count':0} self.model.eval() if is_test: val_dataset = dataset('data/test_data.jsonl', self.opt) else: val_dataset = dataset('data/valid_data.jsonl', self.opt) val_set=CRSdataset(val_dataset.data_process(True),self.opt['n_entity'],self.opt['n_concept']) val_dataset_loader = torch.utils.data.DataLoader(dataset=val_set, batch_size=self.batch_size, shuffle=False) inference_sum=[] golden_sum=[] context_sum=[] losses=[] recs=[] for context, c_lengths, response, r_length, mask_response, mask_r_length, entity, entity_vector, movie, concept_mask, dbpedia_mask, concept_vec, db_vec, rec in tqdm(val_dataset_loader): with torch.no_grad(): seed_sets = [] batch_size = context.shape[0] for b in range(batch_size): seed_set = entity[b].nonzero().view(-1).tolist() seed_sets.append(seed_set) _, _, _, _, gen_loss, mask_loss, info_db_loss, info_con_loss = self.model(context.cuda(), response.cuda(), mask_response.cuda(), concept_mask, dbpedia_mask, seed_sets, movie, concept_vec, db_vec, entity_vector.cuda(), rec, test=False) scores, preds, rec_scores, rec_loss, _, mask_loss, info_db_loss, info_con_loss = self.model(context.cuda(), response.cuda(), mask_response.cuda(), concept_mask, dbpedia_mask, seed_sets, movie, concept_vec, db_vec, entity_vector.cuda(), rec, test=True, maxlen=20, bsz=batch_size) golden_sum.extend(self.vector2sentence(response.cpu())) inference_sum.extend(self.vector2sentence(preds.cpu())) context_sum.extend(self.vector2sentence(context.cpu())) recs.extend(rec.cpu()) losses.append(torch.mean(gen_loss)) #print(losses) #exit() self.metrics_cal_gen(losses,inference_sum,golden_sum,recs) output_dict_gen={} for key in self.metrics_gen: if 'bleu' in key: output_dict_gen[key]=self.metrics_gen[key]/self.metrics_gen['count'] else: output_dict_gen[key]=self.metrics_gen[key] print(output_dict_gen) f=open('context_test.txt','w',encoding='utf-8') f.writelines([' '.join(sen)+'\n' for sen in context_sum]) f.close() f=open('output_test.txt','w',encoding='utf-8') f.writelines([' '.join(sen)+'\n' for sen in inference_sum]) f.close() return output_dict_gen def metrics_cal_gen(self,rec_loss,preds,responses,recs): def bleu_cal(sen1, tar1): bleu1 = sentence_bleu([tar1], sen1, weights=(1, 0, 0, 0)) bleu2 = sentence_bleu([tar1], sen1, weights=(0, 1, 0, 0)) bleu3 = sentence_bleu([tar1], sen1, weights=(0, 0, 1, 0)) bleu4 = sentence_bleu([tar1], sen1, weights=(0, 0, 0, 1)) return bleu1, bleu2, bleu3, bleu4 def distinct_metrics(outs): # outputs is a list which contains several sentences, each sentence contains several words unigram_count = 0 bigram_count = 0 trigram_count=0 quagram_count=0 unigram_set = set() bigram_set = set() trigram_set=set() quagram_set=set() for sen in outs: for word in sen: unigram_count += 1 unigram_set.add(word) for start in range(len(sen) - 1): bg = str(sen[start]) + ' ' + str(sen[start + 1]) bigram_count += 1 bigram_set.add(bg) for start in range(len(sen)-2): trg=str(sen[start]) + ' ' + str(sen[start + 1]) + ' ' + str(sen[start + 2]) trigram_count+=1 trigram_set.add(trg) for start in range(len(sen)-3): quag=str(sen[start]) + ' ' + str(sen[start + 1]) + ' ' + str(sen[start + 2]) + ' ' + str(sen[start + 3]) quagram_count+=1 quagram_set.add(quag) dis1 = len(unigram_set) / len(outs)#unigram_count dis2 = len(bigram_set) / len(outs)#bigram_count dis3 = len(trigram_set)/len(outs)#trigram_count dis4 = len(quagram_set)/len(outs)#quagram_count return dis1, dis2, dis3, dis4 predict_s=preds golden_s=responses #print(rec_loss[0]) #self.metrics_gen["ppl"]+=sum([exp(ppl) for ppl in rec_loss])/len(rec_loss) generated=[] for out, tar, rec in zip(predict_s, golden_s, recs): bleu1, bleu2, bleu3, bleu4=bleu_cal(out, tar) generated.append(out) self.metrics_gen['bleu1']+=bleu1 self.metrics_gen['bleu2']+=bleu2 self.metrics_gen['bleu3']+=bleu3 self.metrics_gen['bleu4']+=bleu4 self.metrics_gen['count']+=1 dis1, dis2, dis3, dis4=distinct_metrics(generated) self.metrics_gen['dist1']=dis1 self.metrics_gen['dist2']=dis2 self.metrics_gen['dist3']=dis3 self.metrics_gen['dist4']=dis4 def vector2sentence(self,batch_sen): sentences=[] for sen in batch_sen.numpy().tolist(): sentence=[] for word in sen: if word>3: sentence.append(self.index2word[word]) elif word==3: sentence.append('_UNK_') sentences.append(sentence) return sentences @classmethod def optim_opts(self): """ Fetch optimizer selection. By default, collects everything in torch.optim, as well as importing: - qhm / qhmadam if installed from github.com/facebookresearch/qhoptim Override this (and probably call super()) to add your own optimizers. """ # first pull torch.optim in optims = {k.lower(): v for k, v in optim.__dict__.items() if not k.startswith('__') and k[0].isupper()} try: import apex.optimizers.fused_adam as fused_adam optims['fused_adam'] = fused_adam.FusedAdam except ImportError: pass try: # https://openreview.net/pdf?id=S1fUpoR5FQ from qhoptim.pyt import QHM, QHAdam optims['qhm'] = QHM optims['qhadam'] = QHAdam except ImportError: # no QHM installed pass return optims def init_optim(self, params, optim_states=None, saved_optim_type=None): """ Initialize optimizer with model parameters. :param params: parameters from the model :param optim_states: optional argument providing states of optimizer to load :param saved_optim_type: type of optimizer being loaded, if changed will skip loading optimizer states """ opt = self.opt # set up optimizer args lr = opt['learningrate'] kwargs = {'lr': lr} kwargs['amsgrad'] = True kwargs['betas'] = (0.9, 0.999) optim_class = self.optim_opts()[opt['optimizer']] self.optimizer = optim_class(params, **kwargs) def backward(self, loss): """ Perform a backward pass. It is recommended you use this instead of loss.backward(), for integration with distributed training and FP16 training. """ loss.backward() def update_params(self): """ Perform step of optimization, clipping gradients and adjusting LR schedule if needed. Gradient accumulation is also performed if agent is called with --update-freq. It is recommended (but not forced) that you call this in train_step. """ update_freq = 1 if update_freq > 1: # we're doing gradient accumulation, so we don't only want to step # every N updates instead self._number_grad_accum = (self._number_grad_accum + 1) % update_freq if self._number_grad_accum != 0: return if self.opt['gradient_clip'] > 0: torch.nn.utils.clip_grad_norm_( self.model.parameters(), self.opt['gradient_clip'] ) self.optimizer.step() def zero_grad(self): """ Zero out optimizer. It is recommended you call this in train_step. It automatically handles gradient accumulation if agent is called with --update-freq. """ self.optimizer.zero_grad()
class IConversationalRecommender(): def __init__(self, opt): # tensorboard self.writer = SummaryWriter() # word from corpus trained by gensim self.word2index = json.load( open('data/word2index_redial.json', encoding='utf-8')) self.index2word = { self.word2index[key]: key for key in self.word2index } # mapping from movie ids to names self.id2moviename = pkl.load(open('data/movie_id2name.pkl', 'rb')) # dbpedia self.entity2entityId = pkl.load(open('data/entity2entityId.pkl', 'rb')) self.entityId2entity = dict([ (val, key) for key, val in self.entity2entityId.items() ]) # model self.opt = opt self.model = CrossModel(opt, self.word2index, is_finetune=True).cuda() self.processor = Processor(opt) # conversation logs self.logs = [] def visualize_model(self): sample_sen = 'Sample sentence for model visualization' context, length, concept_mask, concept_bitmask, dbpedia_mask, dbpedia_bitmask, \ entities, entity_bitmask = self.to_batch_tensor( *(self.processor.data_process([sample_sen]))) seed_sets = [123] self.writer.add_graph( self.model, (context.cuda(), concept_mask, dbpedia_mask, concept_bitmask, dbpedia_bitmask, seed_sets, entities.cuda(), 3)) def prompt(self): self.input = input('KGSF> ').strip() self.logs.append(self.input) # TODO add popup of movie items for selection def to_batch_tensor(self, *args): args = [torch.from_numpy(arg) for arg in args] return [torch.unsqueeze(arg, 0) for arg in args] def vector2sentence(self, sen: List[int]): sentence_id = [] sentence_name = [] for idx in sen: try: if idx >= 3: # without replace movie id with name word = '_UNK_' if idx == 3 else self.index2word[idx] sentence_id.append(word) # replace id with name word = self.convert_id_to_name( word) if word[0] == '@' else word sentence_name.append(word) except: print("OOV", idx) return ' '.join(w for w in sentence_name), ' '.join(w for w in sentence_id) def convert_id_to_name(self, movieId: str) -> str: try: return self.id2moviename[movieId[1:]] except: return movieId def start(self): self.model.load_model(model_path=self.opt['model_path']) while True: self.prompt() if self.input == '': print('End of conversation ...') self.logs = [] continue elif self.input == 'exit()': break # get model input from logs context, length, concept_mask, concept_bitmask, dbpedia_mask, dbpedia_bitmask, \ entities, entity_bitmask = self.processor.data_process(self.logs) seed_sets = [entities.nonzero()[0].tolist()] # inference self.model.eval() with torch.no_grad(): context, concept_mask, concept_bitmask, dbpedia_mask, dbpedia_bitmask, entities, entity_bitmask = \ self.to_batch_tensor(context, concept_mask, concept_bitmask, dbpedia_mask, dbpedia_bitmask, entities, entity_bitmask) scores, preds, rec_scores, _, _, _, _, _ = self.model( context.cuda(), concept_mask, dbpedia_mask, concept_bitmask, dbpedia_bitmask, seed_sets, entities.cuda(), TrainType.INFER, maxlen=20, bsz=1) response_display, response_log = self.vector2sentence( preds.squeeze().detach().cpu().numpy().tolist()) self.logs.append(response_log) print("Response> ", response_display)