def train(data): print("Training model...") data.show_data_summary() save_data_name = data.model_dir + ".dset" data.save(save_data_name) model = SeqLabel(data) loss_function = nn.NLLLoss() if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adagrad": optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adadelta": optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "rmsprop": optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) elif data.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2) else: print("Optimizer illegal: %s" % (data.optimizer)) exit(0) best_dev = -10 # data.HP_iteration = 1 ## start training for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) ## set model in train model model.train() model.zero_grad() batch_size = data.HP_batch_size batch_id = 0 train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole sample_loss += loss.data[0] total_loss += loss.data[0] if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) # continue speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish if data.seg: current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: current_score = acc print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if data.seg: print("Exceed previous best f score:", best_dev) else: print("Exceed previous best acc score:", best_dev) model_name = data.model_dir + '.' + str(idx) + ".model" print("Save current best model in file:", model_name) torch.save(model.state_dict(), model_name) best_dev = current_score # ## decode test speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if data.seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) gc.collect()
def train(): total_batch = 0 # model = CnnLstmCrf(config) model = SeqLabel(data) optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.l2) if gpu: model = model.cuda() best_dev = -10 for idx in range(config.epoch): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, config.epoch)) optimizer = lr_decay(optimizer, idx, config.lr_decay, config.lr) instance_count = 0 sample_id = 0 sample_loss = 0 # 每500个batch清零 total_loss = 0 # 一个epoch里的完整loss right_token = 0 # 一个epoch里预测正确的token数量 whole_token = 0 random.shuffle(data.train_ids) print("Shuffle: first input word list:", data.train_ids[0][1]) model.train() model.zero_grad() batch_size = config.batch_size train_num = len(data.train_ids) print('batch_size:', batch_size, 'train_num:', train_num) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_ids[start:end] # [char,word,feat,label] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, \ batch_label, mask = batchify_sequence_labeling_with_label(instance, gpu, if_train=True) # loss, tag_seq = model(batch_char, batch_word, batch_features, mask, batch_charrecover, batch_wordlen, batch_label) loss, tag_seq = model.calculate_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole # print('right_token/whole_token:', right_token/whole_token) sample_loss += loss.item() total_loss += loss.item() if end % 6400 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % ( end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % ( end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % ( idx, epoch_cost, train_num / epoch_cost, total_loss)) if total_loss > 1e8 or str(total_loss) == "nan": print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....") exit(1) logger.info("Epoch: %s, Total loss: %s" % (idx, total_loss)) speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) logger.info( "Epoch: %s, Loss: %s, Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % ( idx, total_loss, dev_cost, speed, acc, p, r, f)) if current_score > best_dev: model_name = config.model_path + '.' + str(idx) + '.model' torch.save(model.state_dict(), model_name) best_dev = current_score # logger.info("data:dev, epoch:%s, f1:%s, precision:%s, recall:%s" % (idx, current_score, p, r)) speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish logger.info("Epoch: %s, Loss: %s, Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % ( idx, total_loss, test_cost, speed, acc, p, r, f))
def train(data): save_data_name = data.model_dir + ".dset" data.save(save_data_name) model = SeqLabel(data) # 加载预训练 print('loading model %s' % model_path) model.load_state_dict(torch.load(model_path, map_location=map_location)) print('data.seg:', data.seg) optimizer = '' if data.optimizer.lower() == "sgd": optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum, weight_decay=data.HP_l2) best_dev = -10 print('data.HP_gpu:', data.HP_gpu) for idx in range(data.HP_iteration): epoch_start = time.time() temp_start = epoch_start print("Epoch: %s/%s" % (idx, data.HP_iteration)) if data.optimizer == "SGD": optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 # 每500个batch清零 total_loss = 0 # 一个epoch里的完整loss right_token = 0 whole_token = 0 # print("Before Shuffle: first input word list:", data.train_Ids[0][0]) random.shuffle(data.train_Ids) print("Shuffle: first input word list:", data.train_Ids[0][0]) model.train() model.zero_grad() batch_size = data.HP_batch_size # batch_id = 0 train_num = len(data.train_Ids) print('train_num:', train_num) # 训练样本的数量 total_batch = train_num // batch_size + 1 print('total_batch:', total_batch) for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, \ batch_label, mask = batchify_with_label(instance, data.HP_gpu, True, data.sentence_classification) instance_count += 1 loss, tag_seq = model.calculate_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) right, whole = predict_check( tag_seq, batch_label, mask, data.sentence_classification) # pred与gold的校验 right_token += right whole_token += whole sample_loss += loss.item() total_loss += loss.item() if end % 500 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() optimizer.step() model.zero_grad() temp_time = time.time() temp_cost = temp_time - temp_start print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) print("total_loss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) speed, acc, p, r, f, _, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) if current_score > best_dev: print("Exceed previous best f score:", best_dev) model_name = data.model_dir + '.' + str(idx) + ".model" print("Save current best torch_model in file:", model_name) # 保存当前epoch结束的模型 torch.save(model.state_dict(), model_name) best_dev = current_score # 每50轮保存一下 if idx % 50 == 0: model_name = data.model_dir + '.' + str(idx) + ".model" print('Save every 50 epoch in file: %s' % model_name) torch.save(model.state_dict(), model_name) speed, acc, p, r, f, _, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) # 对自己add对样本做一下evaluate: speed, acc, p, r, f, _, _ = evaluate(data, model, "raw") raw_finish = time.time() raw_cost = raw_finish - test_finish print( "Raw: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (raw_cost, speed, acc, p, r, f)) gc.collect()
class NCRF: def __init__(self): # print("Python Version: %s.%s"%(sys.version_info[0],sys.version_info[1])) # print("PyTorch Version:%s"%(torch.__version__)) # print("Process ID: ", os.getpid()) self.data = Data() self.data.HP_gpu = torch.cuda.is_available() if self.data.HP_gpu: self.data.device = 'cuda' # print("GPU:", self.data.HP_gpu, "; device:", self.data.device) self.optimizer = None self.model = None def read_data_config_file(self, config_dir): self.data.read_config(config_dir) def manual_data_setting(self, setting_dict): ## set data through manual dict, all value should be in string format. self.data.manual_config(setting_dict) def initialize_model_and_optimizer(self): if self.data.sentence_classification: self.model = SentClassifier(self.data) else: self.model = SeqLabel(self.data) if self.data.optimizer.lower() == "sgd": self.optimizer = optim.SGD(self.model.parameters(), lr=self.data.HP_lr, momentum=self.data.HP_momentum, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "adagrad": self.optimizer = optim.Adagrad(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "adadelta": self.optimizer = optim.Adadelta(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "rmsprop": self.optimizer = optim.RMSprop(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) elif self.data.optimizer.lower() == "adam": self.optimizer = optim.Adam(self.model.parameters(), lr=self.data.HP_lr, weight_decay=self.data.HP_l2) else: print("Optimizer illegal: %s" % (self.data.optimizer)) exit(1) def initialize_data(self, input_list=None): self.data.initial_alphabets(input_list) if self.data.use_word_emb and self.data.use_word_seq: self.data.build_pretrain_emb() def initialization(self, input_list=None): ## must initialize data before initialize model and optimizer, as alphabet size and pretrain emb matters self.num_ = ''' input_list: [train_list, dev_list, test_list] train_list/dev_list/test_list: [sent_list, label_list, feature_list] sent_list: list of list [[word1, word2,...],...,[wordx, wordy]...] label_list: if sentence_classification: list of labels [label1, label2,...labelx, labely,...] else: list of list [[label1, label2,...],...,[labelx, labely,...]] feature_list: if sentence_classification: list of labels [[feat1, feat2,..],...,[feat1, feat2,..]], len(feature_list)= sentence_num else: list of list [[[feat1, feat2,..],...,[feat1, feat2,..]],...,[[feat1, feat2,..],...,[feat1, feat2,..]]], , len(feature_list)= sentence_num ''' self.initialize_data(input_list) self.and_optimizer = self.initialize_model_and_optimizer() def self_generate_instances(self): self.data.generate_instance('train') self.data.generate_instance('dev') self.data.generate_instance('test') def generate_instances_from_list(self, input_list, name): return self.data.generate_instance_from_list(input_list, name) def save(self, model_dir="ncrf.model"): # print("Save model to file: ", model_dir) the_dict = { 'data': self.data, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict() } torch.save(the_dict, model_dir) def load(self, model_dir="ncrf.model"): the_dict = torch.load(model_dir) self.data = the_dict['data'] self.data.silence = True ## initialize the model and optimizer befor load state dict self.initialize_model_and_optimizer() self.model.load_state_dict(the_dict['state_dict']) self.optimizer.load_state_dict(the_dict['optimizer']) # print("Model loaded from file: ", model_dir) def train(self, train_Ids=None, save_model_dir=None): ''' train_Ids: list of words, chars and labels, various length. [[words, features, chars, labels],[words, features, chars,labels],...] words: word ids for one sentence. (batch_size, sent_len) features: features ids for one sentence. (batch_size, sent_len, feature_num) chars: char ids for on sentences, various length. (batch_size, sent_len, each_word_length) labels: label ids for one sentence. (batch_size, sent_len) save_model_dir: model name to be saved ''' if train_Ids: self.data.train_Ids = train_Ids # print(self.data.train_Ids[0]) print('-----begin train------') # exit(0) best_dev = -10 best_model = None for idx in range(self.data.HP_iteration): epoch_start = time.time() temp_start = epoch_start # print("Epoch: %s/%s" %(idx,self.data.HP_iteration)) if self.data.optimizer == "SGD": self.optimizer = lr_decay(self.optimizer, idx, self.data.HP_lr_decay, self.data.HP_lr) instance_count = 0 sample_id = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(self.data.train_Ids) first_list = ", ".join([ self.data.word_alphabet.get_instance(a) for a in self.data.train_Ids[0][0] ]) # print("Shuffle: first input: [%s]" %(first_list)) ## set model in train model self.model.train() batch_size = self.data.HP_batch_size batch_id = 0 train_num = len(self.data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): self.optimizer.zero_grad() start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = self.data.train_Ids[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_word_text, batch_label, mask = batchify_with_label( instance, self.data.HP_gpu, True, self.data.sentence_classification) instance_count += 1 loss, tag_seq = self.model.calculate_loss( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_word_text, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask, self.data.sentence_classification) right_token += right whole_token += whole # print("loss:",loss.item()) sample_loss += loss.item() total_loss += loss.item() if end % 300000 == 0: temp_time = time.time() temp_cost = temp_time - temp_start temp_start = temp_time print( " Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)) if sample_loss > 1e8 or str(sample_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) sys.stdout.flush() sample_loss = 0 loss.backward() self.optimizer.step() temp_time = time.time() temp_cost = temp_time - temp_start # print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) # print("totalloss:", total_loss) if total_loss > 1e8 or str(total_loss) == "nan": print( "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...." ) exit(1) # continue speed, f = evaluate(self.data, self.model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f print("Dev: time: %.2fs, speed: %.2fst/s; f: %.4f" % (dev_cost, speed, f)) if current_score > best_dev: # if self.data.seg: print("Exceed previous best f score:", best_dev) _, f = evaluate(self.data, self.model, "test") print("Test: f: %.4f" % (f)) # _ , f = evaluate(self.data, self.model, "test") # if self.data.seg: # print("Test: f: %.4f"%(f)) # else: # print("Exceed previous best f score:", best_dev) # if save_model_dir == None: # model_name = self.data.model_dir + ".model" # else: # model_name = save_model_dir + ".model" # self.save(model_name) # torch.save(model.state_dict(), model_name) best_dev = current_score # best_model = model_name ## decode test # else: # print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)) gc.collect() # if best_model != None: # self.load(best_model) # _ , f = evaluate(self.data, self.model, "test") # print("Test: f: %.4f"%(f)) # def evaluate(self): def decode(self, raw_Ids): ''' raw_Ids: list of words, chars and labels, various length. [[words, features, chars, labels],[words, features, chars,labels],...] words: word ids for one sentence. (batch_size, sent_len) features: features ids for one sentence. (batch_size, sent_len, feature_num) chars: char ids for on sentences, various length. (batch_size, sent_len, each_word_length) labels: label ids for one sentence. (batch_size, sent_len) ## label should be padded in raw input ''' instances = raw_Ids ## set model in eval model self.model.eval() batch_size = self.data.HP_batch_size instance_num = len(instances) total_batch = instance_num // batch_size + 1 decode_label = [] for batch_id in tqdm(range(total_batch)): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > instance_num: end = instance_num instance = instances[start:end] if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_word_text, batch_label, mask = batchify_with_label( instance, self.data.HP_gpu, False, self.data.sentence_classification) tag_seq = self.model(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_word_text, None, mask) tag_seq = tag_seq[batch_wordrecover.cpu()] decode_label += tag_seq.cpu().data.numpy().tolist() return decode_label def decode_prob(self, raw_Ids): ''' raw_Ids: list of words, chars and labels, various length. [[words, features, chars, labels],[words, features, chars,labels],...] words: word ids for one sentence. (batch_size, sent_len) features: features ids for one sentence. (batch_size, sent_len, feature_num) chars: char ids for on sentences, various length. (batch_size, sent_len, each_word_length) labels: label ids for one sentence. (batch_size, sent_len) ## label should be padded in raw input ''' if not self.data.sentence_classification: print( "decode probability is only valid in sentence classification task. Exit." ) exit(0) instances = raw_Ids target_probability_list = [] target_result_list = [] ## set model in eval model self.model.eval() batch_size = self.data.HP_batch_size instance_num = len(instances) total_batch = instance_num // batch_size + 1 for batch_id in tqdm(range(total_batch)): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > instance_num: end = instance_num instance = instances[start:end] if start % 10000 == 0: print("Decode: ", start) if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_word_text, batch_label, mask = batchify_with_label( instance, self.data.HP_gpu, False, self.data.sentence_classification) target_probability, _ = self.model.get_target_probability( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_word_text, None, mask) target_probability = target_probability[batch_wordrecover.cpu()] target_probability_list.append(target_probability) target_probabilities = np.concatenate(target_probability_list, axis=0) return target_probabilities def decode_prob_and_attention_weights(self, raw_Ids): ''' raw_Ids: list of words, chars and labels, various length. [[words, features, chars, labels],[words, features, chars,labels],...] words: word ids for one sentence. (batch_size, sent_len) features: features ids for one sentence. (batch_size, sent_len, feature_num) chars: char ids for on sentences, various length. (batch_size, sent_len, each_word_length) labels: label ids for one sentence. (batch_size, sent_len) ## label should be padded in raw input ''' if not self.data.sentence_classification: print( "decode probability is only valid in sentence classification task. Exit." ) exit(0) if self.data.words2sent_representation.upper( ) != "ATTENTION" and self.data.words2sent_representation.upper( ) != "ATT": print( "attention weights are only valid in attention model. Current: %s, Exit." % (self.data.words2sent_representation)) exit(0) instances = raw_Ids target_probability_list = [] sequence_attention_weight_list = [] ## set model in eval model self.model.eval() batch_size = self.data.HP_batch_size instance_num = len(instances) total_batch = instance_num // batch_size + 1 for batch_id in tqdm(range(total_batch)): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > instance_num: end = instance_num instance = instances[start:end] if start % 10000 == 0: print("Decode: ", start) if not instance: continue batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_word_text, batch_label, mask = batchify_with_label( instance, self.data.HP_gpu, False, self.data.sentence_classification) target_probability, weights = self.model.get_target_probability( batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_word_text, None, mask) ## target_probability, weights are both numpy target_probability = target_probability[batch_wordrecover.cpu()] weights = weights[batch_wordrecover.cpu()] target_probability_list.append(target_probability) sequence_attention_weight_list += weights.tolist() target_probabilities = np.concatenate(target_probability_list, axis=0) print(len(sequence_attention_weight_list)) ## sequence_attention_weight_list: list with different batch size and many padded 0 return target_probabilities, sequence_attention_weight_list