def __init__(self, data_dir=DATA_PATH, eval_slots=multiwoz_zh_slot_list): DST.__init__(self) self.init_data() processor = Processor(args) self.processor = processor label_list = processor.get_labels() num_labels = [len(labels) for labels in label_list] # number of slot-values in each slot-type # tokenizer self.tokenizer = BertTokenizer.from_pretrained(args.bert_model_name, cache_dir=args.bert_model_cache_dir) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) self.device = torch.device("cuda" if USE_CUDA else "cpu") self.sumbt_model = BeliefTracker(args, num_labels, self.device) if USE_CUDA and N_GPU > 1: self.sumbt_model = torch.nn.DataParallel(self.sumbt_model) if args.fp16: self.sumbt_model.half() self.sumbt_model.to(self.device) ## Get slot-value embeddings self.label_token_ids, self.label_len = [], [] for labels in label_list: token_ids, lens = get_label_embedding(labels, args.max_label_length, self.tokenizer, self.device) self.label_token_ids.append(token_ids) self.label_len.append(lens) self.label_map = [{label: i for i, label in enumerate(labels)} for labels in label_list] self.label_map_inv = [{i: label for i, label in enumerate(labels)} for labels in label_list] self.label_list = label_list self.target_slot = processor.target_slot ## Get domain-slot-type embeddings self.slot_token_ids, self.slot_len = \ get_label_embedding(processor.target_slot, args.max_label_length, self.tokenizer, self.device) self.args = args self.state = default_state() self.param_restored = False if USE_CUDA and N_GPU == 1: self.sumbt_model.initialize_slot_value_lookup(self.label_token_ids, self.slot_token_ids) elif USE_CUDA and N_GPU > 1: self.sumbt_model.module.initialize_slot_value_lookup(self.label_token_ids, self.slot_token_ids) self.cached_res = {} convert_to_glue_format(DATA_PATH, SUMBT_PATH) if not os.path.isdir(os.path.join(SUMBT_PATH, args.output_dir)): os.makedirs(os.path.join(SUMBT_PATH, args.output_dir)) self.train_examples = processor.get_train_examples(os.path.join(SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.dev_examples = processor.get_dev_examples(os.path.join(SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.test_examples = processor.get_test_examples(os.path.join(SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.eval_slots = eval_slots
class SUMBTTracker(DST): """ Transferable multi-domain dialogue state tracker, adopted from https://github.com/SKTBrain/SUMBT """ # adapt data provider # unzip mt.zip, and zip each [train|val|test].json @staticmethod def init_data(): if not os.path.exists(os.path.join(DATA_PATH, 'train.json.zip')): with zipfile.ZipFile(os.path.join(DATA_PATH, 'mt.zip')) as f: f.extractall(DATA_PATH) for split in ['train', 'test', 'val']: with zipfile.ZipFile(os.path.join(DATA_PATH, f'{split}.json.zip'), 'w') as f: f.write(os.path.join(DATA_PATH, f'{split}.json'), f'{split}.json') def __init__(self, data_dir=DATA_PATH, eval_slots=multiwoz_zh_slot_list): DST.__init__(self) self.init_data() processor = Processor(args) self.processor = processor label_list = processor.get_labels() num_labels = [len(labels) for labels in label_list ] # number of slot-values in each slot-type # tokenizer self.tokenizer = BertTokenizer.from_pretrained( args.bert_model_name, cache_dir=args.bert_model_cache_dir) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) self.device = torch.device("cuda" if USE_CUDA else "cpu") self.sumbt_model = BeliefTracker(args, num_labels, self.device) if USE_CUDA and N_GPU > 1: self.sumbt_model = torch.nn.DataParallel(self.sumbt_model) if args.fp16: self.sumbt_model.half() self.sumbt_model.to(self.device) ## Get slot-value embeddings self.label_token_ids, self.label_len = [], [] for labels in label_list: token_ids, lens = get_label_embedding(labels, args.max_label_length, self.tokenizer, self.device) self.label_token_ids.append(token_ids) self.label_len.append(lens) self.label_map = [{label: i for i, label in enumerate(labels)} for labels in label_list] self.label_map_inv = [{i: label for i, label in enumerate(labels)} for labels in label_list] self.label_list = label_list self.target_slot = processor.target_slot ## Get domain-slot-type embeddings self.slot_token_ids, self.slot_len = \ get_label_embedding(processor.target_slot, args.max_label_length, self.tokenizer, self.device) self.args = args self.state = default_state() self.param_restored = False if USE_CUDA and N_GPU == 1: self.sumbt_model.initialize_slot_value_lookup( self.label_token_ids, self.slot_token_ids) elif USE_CUDA and N_GPU > 1: self.sumbt_model.module.initialize_slot_value_lookup( self.label_token_ids, self.slot_token_ids) self.cached_res = {} convert_to_glue_format(DATA_PATH, SUMBT_PATH) if not os.path.isdir(os.path.join(SUMBT_PATH, args.output_dir)): os.makedirs(os.path.join(SUMBT_PATH, args.output_dir)) self.train_examples = processor.get_train_examples(os.path.join( SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.dev_examples = processor.get_dev_examples(os.path.join( SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.test_examples = processor.get_test_examples(os.path.join( SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.eval_slots = eval_slots def load_weights(self, model_path=None): if model_path is None: model_ckpt = os.path.join(SUMBT_PATH, 'pre-trained/pytorch_model.bin') else: model_ckpt = model_path model = self.sumbt_model # in the case that slot and values are different between the training and evaluation if not USE_CUDA: ptr_model = torch.load(model_ckpt, map_location=torch.device('cpu')) else: ptr_model = torch.load(model_ckpt) print('loading pretrained weights') if not USE_CUDA or N_GPU == 1: state = model.state_dict() state.update(ptr_model) model.load_state_dict(state) else: # print("Evaluate using only one device!") model.module.load_state_dict(ptr_model) if USE_CUDA: model.to("cuda") def train(self, load_model=False, model_path=None): if load_model: if model_path is not None: self.load_weights(model_path) ## Training utterances all_input_ids, all_input_len, all_label_ids = convert_examples_to_features( self.train_examples, self.label_list, args.max_seq_length, self.tokenizer, args.max_turn_length) num_train_batches = all_input_ids.size(0) num_train_steps = int(num_train_batches / args.train_batch_size / args.gradient_accumulation_steps * args.num_train_epochs) logger.info("***** training *****") logger.info(" Num examples = %d", len(self.train_examples)) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_steps) all_input_ids, all_input_len, all_label_ids = all_input_ids.to( DEVICE), all_input_len.to(DEVICE), all_label_ids.to(DEVICE) train_data = TensorDataset(all_input_ids, all_input_len, all_label_ids) train_sampler = RandomSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) all_input_ids_dev, all_input_len_dev, all_label_ids_dev = convert_examples_to_features( self.dev_examples, self.label_list, args.max_seq_length, self.tokenizer, args.max_turn_length) logger.info("***** validation *****") logger.info(" Num examples = %d", len(self.dev_examples)) logger.info(" Batch size = %d", args.dev_batch_size) all_input_ids_dev, all_input_len_dev, all_label_ids_dev = \ all_input_ids_dev.to(DEVICE), all_input_len_dev.to(DEVICE), all_label_ids_dev.to(DEVICE) dev_data = TensorDataset(all_input_ids_dev, all_input_len_dev, all_label_ids_dev) dev_sampler = SequentialSampler(dev_data) dev_dataloader = DataLoader(dev_data, sampler=dev_sampler, batch_size=args.dev_batch_size) logger.info("Loaded data!") if args.fp16: self.sumbt_model.half() self.sumbt_model.to(DEVICE) ## Get domain-slot-type embeddings slot_token_ids, slot_len = \ get_label_embedding(self.processor.target_slot, args.max_label_length, self.tokenizer, DEVICE) # for slot_idx, slot_str in zip(slot_token_ids, self.processor.target_slot): # self.idx2slot[slot_idx] = slot_str ## Get slot-value embeddings label_token_ids, label_len = [], [] for slot_idx, labels in zip(slot_token_ids, self.label_list): # self.idx2value[slot_idx] = {} token_ids, lens = get_label_embedding(labels, args.max_label_length, self.tokenizer, DEVICE) label_token_ids.append(token_ids) label_len.append(lens) # for label, token_id in zip(labels, token_ids): # self.idx2value[slot_idx][token_id] = label logger.info('embeddings prepared') if USE_CUDA and N_GPU > 1: self.sumbt_model.module.initialize_slot_value_lookup( label_token_ids, slot_token_ids) else: self.sumbt_model.initialize_slot_value_lookup( label_token_ids, slot_token_ids) def get_optimizer_grouped_parameters(model): param_optimizer = [(n, p) for n, p in model.named_parameters() if p.requires_grad] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01, 'lr': args.learning_rate }, { 'params': [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0, 'lr': args.learning_rate }, ] return optimizer_grouped_parameters if not USE_CUDA or N_GPU == 1: optimizer_grouped_parameters = get_optimizer_grouped_parameters( self.sumbt_model) else: optimizer_grouped_parameters = get_optimizer_grouped_parameters( self.sumbt_model.module) t_total = num_train_steps scheduler = None if args.fp16: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) optimizer = FusedAdam(optimizer_grouped_parameters, lr=args.learning_rate, bias_correction=False, max_grad_norm=1.0) if args.fp16_loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer( optimizer, static_loss_scale=args.fp16_loss_scale) else: optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, correct_bias=False) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_proportion * t_total, num_training_steps=t_total) logger.info(optimizer) # Training code ############################################################################### logger.info("Training...") global_step = 0 last_update = None best_loss = None model = self.sumbt_model if not args.do_not_use_tensorboard: summary_writer = None else: summary_writer = SummaryWriter("./tensorboard_summary/logs_1214/") for epoch in trange(int(args.num_train_epochs), desc="Epoch"): # Train model.train() tr_loss = 0 nb_tr_examples = 0 nb_tr_steps = 0 for step, batch in enumerate(tqdm(train_dataloader)): batch = tuple(t.to(DEVICE) for t in batch) input_ids, input_len, label_ids = batch # Forward if N_GPU == 1: loss, loss_slot, acc, acc_slot, _ = model( input_ids, input_len, label_ids, N_GPU) else: loss, _, acc, acc_slot, _ = model(input_ids, input_len, label_ids, N_GPU) # average to multi-gpus loss = loss.mean() acc = acc.mean() acc_slot = acc_slot.mean(0) if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps # Backward if args.fp16: optimizer.backward(loss) else: loss.backward() # tensrboard logging if summary_writer is not None: summary_writer.add_scalar("Epoch", epoch, global_step) summary_writer.add_scalar("Train/Loss", loss, global_step) summary_writer.add_scalar("Train/JointAcc", acc, global_step) if N_GPU == 1: for i, slot in enumerate(self.processor.target_slot): summary_writer.add_scalar( "Train/Loss_%s" % slot.replace(' ', '_'), loss_slot[i], global_step) summary_writer.add_scalar( "Train/Acc_%s" % slot.replace(' ', '_'), acc_slot[i], global_step) tr_loss += loss.item() nb_tr_examples += input_ids.size(0) nb_tr_steps += 1 if (step + 1) % args.gradient_accumulation_steps == 0: # modify lealrning rate with special warm up BERT uses lr_this_step = args.learning_rate * warmup_linear( global_step / t_total, args.warmup_proportion) if summary_writer is not None: summary_writer.add_scalar("Train/LearningRate", lr_this_step, global_step) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step if scheduler is not None: torch.nn.utils.clip_grad_norm_( optimizer_grouped_parameters, 1.0) optimizer.step() if scheduler is not None: scheduler.step() optimizer.zero_grad() global_step += 1 # Perform evaluation on validation dataset model.eval() dev_loss = 0 dev_acc = 0 dev_loss_slot, dev_acc_slot = None, None nb_dev_examples, nb_dev_steps = 0, 0 for step, batch in enumerate( tqdm(dev_dataloader, desc="Validation")): batch = tuple(t.to(DEVICE) for t in batch) input_ids, input_len, label_ids = batch if input_ids.dim() == 2: input_ids = input_ids.unsqueeze(0) input_len = input_len.unsqueeze(0) label_ids = label_ids.unsuqeeze(0) with torch.no_grad(): if N_GPU == 1: loss, loss_slot, acc, acc_slot, _ = model( input_ids, input_len, label_ids, N_GPU) else: loss, _, acc, acc_slot, _ = model( input_ids, input_len, label_ids, N_GPU) # average to multi-gpus loss = loss.mean() acc = acc.mean() acc_slot = acc_slot.mean(0) num_valid_turn = torch.sum(label_ids[:, :, 0].view(-1) > -1, 0).item() dev_loss += loss.item() * num_valid_turn dev_acc += acc.item() * num_valid_turn if N_GPU == 1: if dev_loss_slot is None: dev_loss_slot = [l * num_valid_turn for l in loss_slot] dev_acc_slot = acc_slot * num_valid_turn else: for i, l in enumerate(loss_slot): dev_loss_slot[ i] = dev_loss_slot[i] + l * num_valid_turn dev_acc_slot += acc_slot * num_valid_turn nb_dev_examples += num_valid_turn dev_loss = dev_loss / nb_dev_examples dev_acc = dev_acc / nb_dev_examples if N_GPU == 1: dev_acc_slot = dev_acc_slot / nb_dev_examples # tensorboard logging if summary_writer is not None: summary_writer.add_scalar("Validate/Loss", dev_loss, global_step) summary_writer.add_scalar("Validate/Acc", dev_acc, global_step) if N_GPU == 1: for i, slot in enumerate(self.processor.target_slot): summary_writer.add_scalar( "Validate/Loss_%s" % slot.replace(' ', '_'), dev_loss_slot[i] / nb_dev_examples, global_step) summary_writer.add_scalar( "Validate/Acc_%s" % slot.replace(' ', '_'), dev_acc_slot[i], global_step) dev_loss = round(dev_loss, 6) output_model_file = os.path.join( os.path.join(SUMBT_PATH, args.output_dir), "pytorch_model.bin") if last_update is None or dev_loss < best_loss: if not USE_CUDA or N_GPU == 1: torch.save(model.state_dict(), output_model_file) else: torch.save(model.module.state_dict(), output_model_file) last_update = epoch best_loss = dev_loss best_acc = dev_acc logger.info( "*** Model Updated: Epoch=%d, Validation Loss=%.6f, Validation Acc=%.6f, global_step=%d ***" % (last_update, best_loss, best_acc, global_step)) else: logger.info( "*** Model NOT Updated: Epoch=%d, Validation Loss=%.6f, Validation Acc=%.6f, global_step=%d ***" % (epoch, dev_loss, dev_acc, global_step)) if last_update + args.patience <= epoch: break def test(self, mode='dev', model_path=None): '''Testing funciton of TRADE (to be added)''' # Evaluation self.load_weights(model_path) if mode == 'test': eval_examples = self.dev_examples elif mode == 'dev': eval_examples = self.test_examples all_input_ids, all_input_len, all_label_ids = convert_examples_to_features( eval_examples, self.label_list, args.max_seq_length, self.tokenizer, args.max_turn_length) all_input_ids, all_input_len, all_label_ids = all_input_ids.to( DEVICE), all_input_len.to(DEVICE), all_label_ids.to(DEVICE) logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_examples)) logger.info(" Batch size = %d", args.eval_batch_size) eval_data = TensorDataset(all_input_ids, all_input_len, all_label_ids) # Run prediction for full data eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.dev_batch_size) model = self.sumbt_model eval_loss, eval_accuracy = 0, 0 eval_loss_slot, eval_acc_slot = None, None nb_eval_steps, nb_eval_examples = 0, 0 accuracies = { 'joint7': 0, 'slot7': 0, 'joint5': 0, 'slot5': 0, 'joint_rest': 0, 'slot_rest': 0, 'num_turn': 0, 'num_slot7': 0, 'num_slot5': 0, 'num_slot_rest': 0 } for input_ids, input_len, label_ids in tqdm(eval_dataloader, desc="Evaluating"): # if input_ids.dim() == 2: # input_ids = input_ids.unsqueeze(0) # input_len = input_len.unsqueeze(0) # label_ids = label_ids.unsuqeeze(0) with torch.no_grad(): if not USE_CUDA or N_GPU == 1: loss, loss_slot, acc, acc_slot, pred_slot = model( input_ids, input_len, label_ids, 1) else: loss, _, acc, acc_slot, pred_slot = model( input_ids, input_len, label_ids, N_GPU) nbatch = label_ids.size(0) nslot = pred_slot.size(3) pred_slot = pred_slot.view(nbatch, -1, nslot) accuracies = eval_all_accs(pred_slot, label_ids, accuracies) nb_eval_ex = (label_ids[:, :, 0].view(-1) != -1).sum().item() nb_eval_examples += nb_eval_ex nb_eval_steps += 1 if not USE_CUDA or N_GPU == 1: eval_loss += loss.item() * nb_eval_ex eval_accuracy += acc.item() * nb_eval_ex if eval_loss_slot is None: eval_loss_slot = [l * nb_eval_ex for l in loss_slot] eval_acc_slot = acc_slot * nb_eval_ex else: for i, l in enumerate(loss_slot): eval_loss_slot[i] = eval_loss_slot[i] + l * nb_eval_ex eval_acc_slot += acc_slot * nb_eval_ex else: eval_loss += sum(loss) * nb_eval_ex eval_accuracy += sum(acc) * nb_eval_ex eval_loss = eval_loss / nb_eval_examples eval_accuracy = eval_accuracy / nb_eval_examples if not USE_CUDA or N_GPU == 1: eval_acc_slot = eval_acc_slot / nb_eval_examples loss = None if not USE_CUDA or N_GPU == 1: result = { 'eval_loss': eval_loss, 'eval_accuracy': eval_accuracy, 'loss': loss, 'eval_loss_slot': '\t'.join( [str(val / nb_eval_examples) for val in eval_loss_slot]), 'eval_acc_slot': '\t'.join([str((val).item()) for val in eval_acc_slot]) } else: result = { 'eval_loss': eval_loss, 'eval_accuracy': eval_accuracy, 'loss': loss } out_file_name = 'eval_results' # if TARGET_SLOT == 'all': # out_file_name += '_all' output_eval_file = os.path.join( os.path.join(SUMBT_PATH, args.output_dir), "%s.txt" % out_file_name) if not USE_CUDA or N_GPU == 1: with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) out_file_name = 'eval_all_accuracies' with open( os.path.join(os.path.join(SUMBT_PATH, args.output_dir), "%s.txt" % out_file_name), 'w') as f: s = '{:^22s}:{:^22s}:{:^22s}:{:^22s}:{:^22s}:{:^22s}'.format( 'joint acc (7 domain)', 'slot acc (7 domain)', 'joint acc (5 domain)', 'slot acc (5 domain)', 'joint restaurant', 'slot acc restaurant') f.write(s + '\n') print(s) s = '{:^22.5f}:{:^22.5f}:{:^22.5f}:{:^22.5f}:{:^22.5f}:{:^22.5f}'.format( (accuracies['joint7'] / accuracies['num_turn']).item(), (accuracies['slot7'] / accuracies['num_slot7']).item(), (accuracies['joint5'] / accuracies['num_turn']).item(), (accuracies['slot5'] / accuracies['num_slot5']).item(), (accuracies['joint_rest'] / accuracies['num_turn']).item(), (accuracies['slot_rest'] / accuracies['num_slot_rest']).item()) f.write(s + '\n') print(s) def init_session(self): self.state = default_state() if not self.param_restored: if os.path.isfile( os.path.join(DOWNLOAD_DIRECTORY, 'pytorch_model.bin')): print('loading weights from downloaded model') self.load_weights(model_path=os.path.join( DOWNLOAD_DIRECTORY, 'pytorch_model.bin')) elif os.path.isfile( os.path.join(SUMBT_PATH, args.output_dir, 'pytorch_model.bin')): print('loading weights from trained model') self.load_weights(model_path=os.path.join( SUMBT_PATH, args.output_dir, 'pytorch_model.bin')) else: raise ValueError('no available weights found.') self.param_restored = True def update(self, user_act=None): """Update the dialogue state with the generated tokens from TRADE""" if not isinstance(user_act, str): raise Exception('Expected user_act is str but found {}'.format( type(user_act))) prev_state = self.state actual_history = copy.deepcopy(prev_state['history']) query = self.construct_query(actual_history) pred_states = self.predict(query) new_belief_state = copy.deepcopy(prev_state['belief_state']) for state in pred_states: domain, slot, value = state.split('-', 2) # slot = REF_SYS_DA[domain.capitalize()].get(slot, slot) assert 'semi' in new_belief_state[domain] assert 'book' in new_belief_state[domain] domain_dic = new_belief_state[domain] if '预订' in slot: assert slot.startswith('预订') slot = slot[2:] assert slot in domain_dic['book'] if slot in domain_dic['semi']: new_belief_state[domain]['semi'][slot] = value # normalize_value(self.value_dict, domain, slot, value) elif slot in domain_dic['book']: new_belief_state[domain]['book'][slot] = value else: with open('trade_tracker_unknown_slot.log', 'a+') as f: f.write( 'unknown slot name <{}> with value <{}> of domain <{}>\nitem: {}\n\n' .format(slot, value, domain, state)) # new_request_state = copy.deepcopy(prev_state['request_state']) # # update request_state # user_request_slot = self.detect_requestable_slots(user_act) # for domain in user_request_slot: # for key in user_request_slot[domain]: # if domain not in new_request_state: # new_request_state[domain] = {} # if key not in new_request_state[domain]: # new_request_state[domain][key] = user_request_slot[domain][key] new_state = copy.deepcopy(dict(prev_state)) new_state['belief_state'] = new_belief_state # new_state['request_state'] = new_request_state self.state = new_state # print((pred_states, query)) return self.state def predict(self, query): cache_query_key = ''.join(str(list(chain.from_iterable(query[0])))) if cache_query_key in self.cached_res.keys(): return self.cached_res[cache_query_key] input_ids, input_len = query input_ids = torch.tensor(input_ids).to(self.device).unsqueeze(0) input_len = torch.tensor(input_len).to(self.device).unsqueeze(0) labels = None _, pred_slot = self.sumbt_model(input_ids, input_len, labels) pred_slot_t = pred_slot[0][-1].tolist() predict_belief = [] for idx, i in enumerate(pred_slot_t): predict_belief.append('{}-{}'.format(self.target_slot[idx], self.label_map_inv[idx][i])) self.cached_res[cache_query_key] = predict_belief return predict_belief def construct_query(self, context): '''Construct query from context''' ids = [] lens = [] context_len = len(context) if context[0][0] != 'sys': context = [['sys', '']] + context for i in range(0, context_len, 2): # utt_user = '' # utt_sys = '' # for evaluation utt_sys = context[i][1] utt_user = context[i + 1][1] tokens_user = [ x if x != '#' else '[SEP]' for x in self.tokenizer.tokenize(utt_user) ] tokens_sys = [ x if x != '#' else '[SEP]' for x in self.tokenizer.tokenize(utt_sys) ] _truncate_seq_pair(tokens_user, tokens_sys, self.args.max_seq_length - 3) tokens = ["[CLS]"] + tokens_user + ["[SEP]" ] + tokens_sys + ["[SEP]"] input_len = [len(tokens_user) + 2, len(tokens_sys) + 1] input_ids = self.tokenizer.convert_tokens_to_ids(tokens) padding = [0] * (self.args.max_seq_length - len(input_ids)) input_ids += padding assert len(input_ids) == self.args.max_seq_length ids.append(input_ids) lens.append(input_len) return (ids, lens) def detect_requestable_slots(self, observation): result = {} observation = observation.lower() _observation = ' {} '.format(observation) for value in self.det_dic.keys(): _value = ' {} '.format(value.strip()) if _value in _observation: key, domain = self.det_dic[value].split('-') if domain not in result: result[domain] = {} result[domain][key] = 0 return result
def __init__(self, data_dir=DATA_PATH): DST.__init__(self) # if not os.path.exists(data_dir): # if model_file == '': # raise Exception( # 'Please provide remote model file path in config') # resp = urllib.request.urlretrieve(model_file)[0] # temp_file = tarfile.open(resp) # temp_file.extractall('data') # assert os.path.exists(data_dir) processor = Processor(args) self.processor = processor # values of each slot e.g. values_list label_list = processor.get_labels() num_labels = [len(labels) for labels in label_list] # number of slot-values in each slot-type # tokenizer self.tokenizer = BertTokenizer.from_pretrained(args.bert_model_name, cache_dir=args.bert_model_cache_dir) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) self.device = torch.device("cuda" if USE_CUDA else "cpu") self.sumbt_model = BeliefTracker(args, num_labels, self.device) if USE_CUDA and N_GPU > 1: self.sumbt_model = torch.nn.DataParallel(self.sumbt_model) if args.fp16: self.sumbt_model.half() self.sumbt_model.to(self.device) ## Get slot-value embeddings self.label_token_ids, self.label_len = [], [] for labels in label_list: # encoding values token_ids, lens = get_label_embedding(labels, args.max_label_length, self.tokenizer, self.device) self.label_token_ids.append(token_ids) self.label_len.append(lens) self.label_map = [{label: i for i, label in enumerate(labels)} for labels in label_list] self.label_map_inv = [{i: label for i, label in enumerate(labels)} for labels in label_list] self.label_list = label_list self.target_slot = processor.target_slot ## Get domain-slot-type embeddings self.slot_token_ids, self.slot_len = \ get_label_embedding(processor.target_slot, args.max_label_length, self.tokenizer, self.device) self.args = args self.state = default_state() self.param_restored = False if USE_CUDA and N_GPU == 1: self.sumbt_model.initialize_slot_value_lookup(self.label_token_ids, self.slot_token_ids) elif USE_CUDA and N_GPU > 1: self.sumbt_model.module.initialize_slot_value_lookup(self.label_token_ids, self.slot_token_ids) self.cached_res = {} convert_to_glue_format(DATA_PATH, SUMBT_PATH) if not os.path.isdir(os.path.join(SUMBT_PATH, args.output_dir)): os.makedirs(os.path.join(SUMBT_PATH, args.output_dir)) self.train_examples = processor.get_train_examples(os.path.join(SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.dev_examples = processor.get_dev_examples(os.path.join(SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.test_examples = processor.get_test_examples(os.path.join(SUMBT_PATH, args.tmp_data_dir), accumulation=False)
def __init__( self, model_file='https://convlab.blob.core.windows.net/convlab-2/sumbt.tar.gz', arg_path=os.path.join(SUMBT_PATH, 'config.json'), eval_slots=multiwoz_slot_list_en): DST.__init__(self) # if not os.path.exists(data_dir): # if model_file == '': # raise Exception( # 'Please provide remote model file path in config') # resp = urllib.request.urlretrieve(model_file)[0] # temp_file = tarfile.open(resp) # temp_file.extractall('data') # assert os.path.exists(data_dir) args = json.load(open(arg_path)) args = SimpleNamespace(**args) self.args = args data_dir = os.path.join(ROOT_PATH, args.data_dir) if args.lang == 'zh': convert_to_glue_format = convert_to_glue_format_zh default_state = default_state_zh processor = ProcessorZh(args) eval_slots = multiwoz_slot_list_zh else: convert_to_glue_format = convert_to_glue_format_en default_state = default_state_en processor = ProcessorEn(args) eval_slots = multiwoz_slot_list_en self.processor = processor label_list = processor.get_labels() num_labels = [len(labels) for labels in label_list ] # number of slot-values in each slot-type # tokenizer self.tokenizer = AutoTokenizer.from_pretrained( args.bert_model_name, cache_dir=args.bert_model_cache_dir) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) self.device = torch.device("cuda" if USE_CUDA else "cpu") self.sumbt_model = BeliefTracker(args, num_labels, self.device) if USE_CUDA and N_GPU > 1: self.sumbt_model = torch.nn.DataParallel(self.sumbt_model) if args.fp16: self.sumbt_model.half() self.sumbt_model.to(self.device) ## Get slot-value embeddings self.label_token_ids, self.label_len = [], [] for labels in label_list: token_ids, lens = get_label_embedding(labels, args.max_label_length, self.tokenizer, self.device) self.label_token_ids.append(token_ids) self.label_len.append(lens) self.label_map = [{label: i for i, label in enumerate(labels)} for labels in label_list] self.label_map_inv = [{i: label for i, label in enumerate(labels)} for labels in label_list] self.label_list = label_list self.target_slot = processor.target_slot ## Get domain-slot-type embeddings self.slot_token_ids, self.slot_len = \ get_label_embedding(processor.target_slot, args.max_label_length, self.tokenizer, self.device) self.args = args self.state = default_state() self.param_restored = False if USE_CUDA and N_GPU == 1: self.sumbt_model.initialize_slot_value_lookup( self.label_token_ids, self.slot_token_ids) elif USE_CUDA and N_GPU > 1: self.sumbt_model.module.initialize_slot_value_lookup( self.label_token_ids, self.slot_token_ids) self.det_dic = {} for domain, dic in REF_USR_DA.items(): for key, value in dic.items(): assert '-' not in key self.det_dic[key.lower()] = key + '-' + domain self.det_dic[value.lower()] = key + '-' + domain self.cached_res = {} convert_to_glue_format(os.path.join(ROOT_PATH, args.data_dir), SUMBT_PATH, args) if not os.path.isdir(os.path.join(SUMBT_PATH, args.output_dir)): os.makedirs(os.path.join(SUMBT_PATH, args.output_dir)) self.train_examples = processor.get_train_examples(os.path.join( SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.dev_examples = processor.get_dev_examples(os.path.join( SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.test_examples = processor.get_test_examples(os.path.join( SUMBT_PATH, args.tmp_data_dir), accumulation=False) self.eval_slots = eval_slots