def gradient_clip(gradients, max_gradient_norm, safe_clip): """Clipping gradients of a model.""" if safe_clip: utils.print_out('Enable Safe Clip') safe_value = max_gradient_norm gradients = [ tf.clip_by_value(x, -safe_value, safe_value) for x in gradients ] gradient_norm = tf.reduce_mean(gradients[0]) # clipped_gradients, gradient_norm = tf.clip_by_global_norm( # gradients, max_gradient_norm) gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)] gradient_norm_summary.append( tf.summary.scalar("clipped_gradient", gradient_norm)) return gradients, gradient_norm_summary, gradient_norm else: clipped_gradients, gradient_norm = tf.clip_by_global_norm( gradients, max_gradient_norm) gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)] gradient_norm_summary.append( tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients))) return clipped_gradients, gradient_norm_summary, gradient_norm
def eval_std_metrics(hparams, ref_tgt_file, ref_src_file, generated_file): metrics = 'embed,rouge,bleu-1,bleu-2,bleu-3,bleu-4,distinct-1,distinct-2,distinct_c-1,distinct_c-2,accuracy,len,entropy'.split( ',') scores = [] metric_num = len(metrics) pool = Pool(metric_num) jobs = [] for metric in metrics: job = pool.apply_async( evaluation_utils.evaluate, (ref_tgt_file, ref_src_file, generated_file, hparams['pre_embed_file'], metric, hparams['tgt_vocab_size'], None, None, hparams['pre_embed_dim'])) jobs.append(job) pool.close() pool.join() res = dict() for metric, job in zip(metrics, jobs): score = job.get() if type(score) is list or type(score) is tuple: score = '-'.join([str(x) for x in score]) else: score = str(score) utils.print_out('%s->%s\n' % (metric, score)) res[metric] = score return res
def print_variables_in_ckpt(ckpt_path): """Print a list of variables in a checkpoint together with their shapes.""" utils.print_out("# Variables in ckpt %s" % ckpt_path) reader = tf.train.NewCheckpointReader(ckpt_path) variable_map = reader.get_variable_to_shape_map() for key in sorted(variable_map.keys()): utils.print_out(" %s: %s" % (key, variable_map[key]))
def eval_std_metrics_st(hparams, ref_tgt_file, ref_src_file, generated_file): metrics = 'embed,rouge,bleu-1,bleu-2,bleu-3,bleu-4,distinct-1,distinct-2,distinct_c-1,distinct_c-2,accuracy,len'.split( ',') scores = [] metric_num = len(metrics) for metric in metrics: score = evaluation_utils.evaluate(ref_tgt_file, ref_src_file, generated_file, hparams['pre_embed_file'], metric, dim=hparams['pre_embed_dim']) utils.print_out(('%s->%s\n') % (metric, score)) if type(score) is list or type(score) is tuple: for x in score: scores.append(str(x)) else: scores.append(str(score)) metrics = ['entropy'] for metric in metrics: score = evaluation_utils.evaluate(hparams['tgt_file'], hparams['src_file'], generated_file, hparams['pre_embed_file'], metric, vocab_size=hparams['tgt_vocab_size']) utils.print_out(('%s->%s\n') % (metric, score)) if type(score) is list or type(score) is tuple: for x in score: scores.append(str(x)) else: scores.append(str(score))
def check_vocab(vocab_file, out_dir, check_special_token=True, sos=None, eos=None, unk=None): """Check if vocab_file doesn't exist, create from corpus_file.""" if tf.gfile.Exists(vocab_file): utils.print_out("# Vocab file %s exists" % vocab_file) vocab, vocab_size = load_vocab(vocab_file) if check_special_token: # Verify if the vocab starts with unk, sos, eos # If not, prepend those tokens & generate a new vocab file if not unk: unk = UNK if not sos: sos = SOS if not eos: eos = EOS assert len(vocab) >= 3 if vocab[0] != unk or vocab[1] != sos or vocab[2] != eos: utils.print_out("The first 3 vocab words [%s, %s, %s]" " are not [%s, %s, %s]" % (vocab[0], vocab[1], vocab[2], unk, sos, eos)) vocab = [unk, sos, eos] + vocab vocab_size += 3 new_vocab_file = os.path.join(out_dir, os.path.basename(vocab_file)) with codecs.getwriter("utf-8")(tf.gfile.GFile( new_vocab_file, "wb")) as f: for word in vocab: f.write("%s\n" % word) vocab_file = new_vocab_file else: raise ValueError("vocab_file '%s' does not exist." % vocab_file) vocab_size = len(vocab) return vocab_size, vocab_file
def create_encoder(self, seq_inputs, entity_inputs, lengths, name='encoder'): """ :param inputs: [batch,time,dimension] :param lengths: [batch] :param hparams: hparams :return: """ hparams = self.hparams mode = self.mode num_layers = hparams['encoder_num_layers'] cell_type = hparams['cell_type'] num_units = hparams['num_units'] forget_bias = hparams['forget_bias'] embed_dim = hparams['embed_dim'] dropout = self.dropout with tf.variable_scope(name) as scope: inputs_for_std = seq_inputs inputs_for_fact = entity_inputs inputs = tf.concat([inputs_for_std, inputs_for_fact], axis=-1) # Crate KEFU Encoder RNN Cells def create_kefu_cell(name): cell_list = [ model_helper.create_cell(cell_type, num_units, forget_bias, dropout, mode) for x in range(2) ] cell_fw = tf.contrib.rnn.MultiRNNCell(cell_list) return cell_fw with tf.variable_scope('Knowledge_RNN'): cell_fw = create_kefu_cell('KEFU_FW') cell_bw = create_kefu_cell('KEFU_BW') utils.print_out( 'Creating bi_directional RNN Encoder, num_layers=%s, cell_type=%s, num_units=%d' % (num_layers, cell_type, num_units)) bi_encoder_outputs, bi_encoder_state = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs, dtype=tf.float32, sequence_length=lengths, time_major=False, swap_memory=True) encoder_outputs = tf.concat(bi_encoder_outputs, -1) # 级联最后一层 encoder_state = [tf.concat(x, -1) for x in bi_encoder_state] return encoder_outputs, encoder_state
def create_or_restore_a_model(out_dir, model, sess): latest_ckpt = tf.train.latest_checkpoint(out_dir) if latest_ckpt: try: print('Try to load from %s' % latest_ckpt) model.saver.restore(sess, latest_ckpt) except tf.errors.NotFoundError as e: utils.print_out("Can't load checkpoint") print_variables_in_ckpt(latest_ckpt) utils.print_out("%s" % str(e)) raise e sess.run(tf.tables_initializer()) utils.print_out(" loaded model parameters from %s" % (latest_ckpt)) step, epoch = sess.run([model.global_step, model.epoch_step]) else: init_op = tf.random_uniform_initializer( -0.08, 0.08, ) tf.get_variable_scope().set_initializer(init_op) sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) utils.print_out(" created model with fresh parameters") step, epoch = 0, 0 return step, epoch
def get_learning_rate_decay(learning_rate, global_step, hparams): """Get learning rate decay.""" decay_scheme = hparams['decay_scheme'] start_decay_step, decay_steps, decay_factor = get_decay_info(hparams) utils.print_out( " decay_scheme=%s, start_decay_step=%d, decay_steps %d, " "decay_factor %g" % (decay_scheme, start_decay_step, decay_steps, decay_factor)) return tf.cond(global_step < start_decay_step, lambda: learning_rate, lambda: tf.compat.v1.train.exponential_decay( learning_rate, (global_step - start_decay_step), decay_steps, decay_factor, staircase=True), name="learning_rate_decay_cond")
def load_and_restore_config(config_path, verbose=False): hparams = load_config(config_path, verbose=True) out_dir = hparams['model_path'] utils.default_path = os.path.join(out_dir, 'log.txt') model_config_path = os.path.join(out_dir, 'config.json') eval_file = os.path.join(out_dir, 'eval_out.txt') if os.path.exists(out_dir) is False: os.makedirs(out_dir) if os.path.exists(model_config_path): utils.print_out('reload the parameters from the %s' % model_config_path) loaded_hparams = load_config(model_config_path, verbose=True) for key in hparams.keys(): if key not in loaded_hparams: utils.print_out('ADD HParam Key : %s' % key) loaded_hparams[key] = hparams[key] hparams = loaded_hparams return hparams
def load_config(config_path, verbose=False): """ :param config_path: :return: hparams """ utils.print_out('load json config file from %s' % config_path) with open(config_path, encoding='utf-8') as fin: config = json.load(fin) if verbose: pprint.pprint(config) if 'loss' not in config: config['loss'] = [] config['loss_r'] = [] config['loss_c'] = [] config['epochs'] = [] if 'loss_c' not in config: config['loss_c'] = [] return config
def load_embed_txt(embed_file): """Load embed_file into a python dictionary. Note: the embed_file should be a Glove/word2vec formatted txt file. Assuming Here is an exampe assuming embed_size=5: the -0.071549 0.093459 0.023738 -0.090339 0.056123 to 0.57346 0.5417 -0.23477 -0.3624 0.4037 and 0.20327 0.47348 0.050877 0.002103 0.060547 For word2vec format, the first line will be: <num_words> <emb_size>. Args: embed_file: file path to the embedding file. Returns: a dictionary that maps word to vector, and the size of embedding dimensions. """ emb_dict = dict() emb_size = None is_first_line = True with codecs.getreader("utf-8")(tf.gfile.GFile(embed_file, "rb")) as f: for line in f: tokens = line.rstrip().split(" ") if is_first_line: is_first_line = False if len(tokens) == 2: # header line emb_size = int(tokens[1]) continue word = tokens[0] vec = list(map(float, tokens[1:])) emb_dict[word] = vec if emb_size: if emb_size != len(vec): utils.print_out( "Ignoring %s since embeding size is inconsistent." % word) del emb_dict[word] else: emb_size = len(vec) return emb_dict, emb_size
def prepare_copynet_vocab(vocab_file, out_dir, src_len, pattern='<src_#>', sos=None, eos=None, unk=None): """Check if vocab_file doesn't exist, create from corpus_file.""" if tf.gfile.Exists(vocab_file): utils.print_out("# Vocab file %s exists" % vocab_file) vocab, vocab_size = load_vocab(vocab_file) if True: # Verify if the vocab starts with unk, sos, eos # If not, prepend those tokens & generate a new vocab file if not unk: unk = UNK if not sos: sos = SOS if not eos: eos = EOS assert len(vocab) >= 3 if vocab[0] != unk or vocab[1] != sos or vocab[2] != eos: utils.print_out("The first 3 vocab words [%s, %s, %s]" " are not [%s, %s, %s]" % (vocab[0], vocab[1], vocab[2], unk, sos, eos)) vocab = [unk, sos, eos] + vocab vocab_size += 3 for i in range(src_len): vocab.append(pattern.replace('#', str(i))) new_vocab_file = vocab_file + '.copy' print('Output CopyNet Vocab -> %s' % new_vocab_file) with codecs.getwriter("utf-8")(tf.gfile.GFile( new_vocab_file, "wb")) as f: for word in vocab: f.write("%s\n" % word) vocab_file = new_vocab_file else: raise ValueError("vocab_file '%s' does not exist." % vocab_file) vocab_size = len(vocab) return vocab_size, vocab_file
def get_learning_rate_warmup(learning_rate, global_step, hparams): """Get learning rate warmup.""" warmup_steps = hparams['warmup_steps'] warmup_scheme = hparams['warmup_scheme'] utils.print_out(" learning_rate=%g, warmup_steps=%d, warmup_scheme=%s" % (hparams['learning_rate'], warmup_steps, warmup_scheme)) # Apply inverse decay if global steps less than warmup steps. # Inspired by https://arxiv.org/pdf/1706.03762.pdf (Section 5.3) # When step < warmup_steps, # learing_rate *= warmup_factor ** (warmup_steps - step) if warmup_scheme == "t2t": # 0.01^(1/warmup_steps): we start with a lr, 100 times smaller warmup_factor = tf.exp(tf.math.log(0.01) / warmup_steps) inv_decay = warmup_factor**(tf.cast(warmup_steps - global_step, tf.float32)) else: raise ValueError("Unknown warmup scheme %s" % warmup_scheme) return tf.cond(global_step < warmup_steps, lambda: inv_decay * learning_rate, lambda: learning_rate, name="learning_rate_warump_cond")
def add_record(self, report_dict, step, epoch): if self.start_time == -1: # First report self.start_time = time.time() self.last_report_time = time.time() self.current_time = time.time() self.last_report_step = step - 1 self.current_step = step else: self.current_time = time.time() self.current_step = step # update for key in report_dict: self.value_dict[key] += report_dict[key] if self.current_step - self.last_report_step >= self.report_per_steps: num_steps = self.current_step - self.last_report_step num_time = self.current_time - self.last_report_time step_time = num_time / num_steps summary = [] for key in report_dict: if key == 'lr' or key == 'learning_rate': summary.append('%s=%f' % (key, self.value_dict[key] / num_steps)) else: summary.append('%s=%.2f' % (key, self.value_dict[key] / num_steps)) utils.print_out( '#[E%d/Step%d] Training Summary: interval steps: %d, step_per_time=%.2f' % (epoch, self.current_step, num_steps, step_time)) utils.print_out('\t'.join(summary)) self.last_report_step = step self.last_report_time = time.time() self.value_dict = defaultdict(float)
def create_cell(unit_type, num_units, forget_bias, dropout, mode, residual_connection=False, device_str=None, residual_fn=None): # dropout (= 1 - keep_prob) is set to 0 during eval and infer dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0 # Cell Type if unit_type == "lstm": single_cell = tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=forget_bias) elif unit_type == "gru": single_cell = tf.contrib.rnn.GRUCell(num_units) elif unit_type == "layer_norm_lstm": utils.print_out(" Layer Normalized LSTM, forget_bias=%g" % forget_bias) single_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( num_units, forget_bias=forget_bias, layer_norm=True) elif unit_type == "nas": single_cell = tf.contrib.rnn.NASCell(num_units) else: raise ValueError("Unknown unit type %s!" % unit_type) single_cell = tf.contrib.rnn.DropoutWrapper(cell=single_cell, input_keep_prob=(1.0 - dropout)) # Residual if residual_connection: single_cell = tf.contrib.rnn.ResidualWrapper(single_cell, residual_fn=residual_fn) utils.print_out(" %s" % type(single_cell).__name__) # Device Wrapper if device_str: single_cell = tf.contrib.rnn.DeviceWrapper(single_cell, device_str) utils.print_out(" %s, device=%s" % (type(single_cell).__name__, device_str)) return single_cell
def restore_a_model(out_dir, model, sess): latest_ckpt = tf.train.latest_checkpoint(out_dir) if latest_ckpt: try: print('Try to load from %s' % latest_ckpt) model.saver.restore(sess, latest_ckpt) except tf.errors.NotFoundError as e: utils.print_out("Can't load checkpoint") print_variables_in_ckpt(latest_ckpt) utils.print_out("%s" % str(e)) raise e sess.run(tf.tables_initializer()) utils.print_out(" loaded model parameters from %s" % (latest_ckpt)) step, epoch = sess.run([model.global_step, model.epoch_step]) else: raise Exception() return step, epoch
def test(): # Dataset hparams = config_parser.load_and_restore_config(args.config_path, verbose=True) if args.beam != -1: hparams['beam_width'] = args.beam utils.print_out("Reset beam_width to %d" % args.beam) if args.beam > 10: hparams['batch_size'] = hparams['batch_size'] * 30 // args.beam hparams['length_penalty_weight'] = args.length_penalty_weight hparams['diverse_decoding_rate'] = args.diverse_decoding_rate hparams['coverage_penalty_weight'] = args.coverage_penalty_weight # Dataset dataset = dataset_utils.create_flexka3_iterator(hparams, is_eval=True) if hparams.get('rank_based', False): model = RModel(dataset, hparams, model_helper.INFER) else: model = Model(dataset, hparams, model_helper.INFER) dropout = dataset['dropout'] fact_vocab = [] with open(hparams['fact_path'], encoding='utf-8') as fin: for line in fin.readlines(): items = line.strip('\n').split() #entity_in_post, ent items[0] = 'P:'+items[0] items[1] = 'E:'+items[1] fact_vocab.append(','.join(items)) out_dir = os.path.join(hparams['model_path'], 'min_ppl') if os.path.exists(os.path.join(hparams['model_path'],'decoded')) is False: os.mkdir(os.path.join(hparams['model_path'],'decoded')) top1_position_path = os.path.join(hparams['model_path'], 'decoded', 'test.predicted_golden_fact_position_top1.txt') topk_position_path = os.path.join(hparams['model_path'], 'decoded', 'test.predicted_golden_fact_position_top10.txt') top1_output_path = os.path.join(hparams['model_path'], 'decoded', 'predicted_top1.fh0') top10_output_path = os.path.join(hparams['model_path'], 'decoded', 'predicted_top10.fh0') meta_output_path = os.path.join(hparams['model_path'], 'decoded', 'fact_prediction.txt') test_query_file = hparams['test_src_file'] test_response_file = hparams['test_tgt_file'] with open(test_query_file, 'r+', encoding='utf-8') as fin: queries = [x.strip('\n') for x in fin.readlines()] with open(test_response_file, 'r+', encoding='utf-8') as fin: responses = [x.strip('\n') for x in fin.readlines()] with tf.Session(config=model_helper.create_tensorflow_config()) as sess: step, epoch = model_helper.create_or_restore_a_model(out_dir, model, sess) dataset['init_fn'](sess,'test_') MRs = [] MRRs = [] hit1s = [] hit5s = [] hit10s = [] hit20s = [] utils.print_out('Current Epoch,Step : %s/%s, Max Epoch,Step : %s/%s' % (epoch, step, hparams['num_train_epochs'], hparams['num_train_steps'])) case_id = 0 with open(meta_output_path, 'w+', encoding='utf-8') as fout: with open(top1_position_path, 'w+', encoding='utf-8') as ftop1: with open(topk_position_path, 'w+', encoding='utf-8') as ftopk: with open(top1_output_path, 'w+', encoding='utf-8') as fout1: with open(top10_output_path, 'w+', encoding='utf-8') as foutk: while True: try: cue_fact, facts, probs = sess.run([ dataset['cue_fact'], dataset['inputs_for_facts'], model.classifier_scores, ], feed_dict={dropout: 0.0}) topk_index, topk_labels = batch_top_k(probs, facts) ranks, reversed_ranks, hits = batch_rank_eval(cue_fact, probs, hitAT=(1, 5, 10, 20)) MRs = MRs + ranks MRRs = MRRs + reversed_ranks hit1s = hit1s + hits[0] hit5s = hit5s + hits[1] hit10s = hit10s + hits[2] hit20s = hit20s + hits[3] for my_index, my_label in zip(topk_index, topk_labels): ftop1.write('%s\n' % my_index[0]) fout1.write('%s\n' % fact_vocab[my_label[0]].split(',')[1][2:] ) for index in my_index: ftopk.write('%s\n' % index) foutk.write('%s\n' % fact_vocab[index]) case_id += 1 except tf.errors.OutOfRangeError as e: pass break MR = np.average(MRs) MRR = np.average(MRRs) hit1 = np.average(hit1s) * 100 hit5 = np.average(hit5s) * 100 hit10 = np.average(hit10s) * 100 hit20 = np.average(hit20s) * 100 utils.print_out('MR=%.2f,MRR=%.2f,hit1=%.2f,hit5=%.2f,hit10=%.2f,hit20=%.2f' % (MR, MRR, hit1, hit5, hit10, hit20))
def main(args): hparams = config_parser.load_config(args.config_path, verbose=True) if args.beam != -1: hparams['beam_width'] = args.beam utils.print_out("Reset beam_width to %d" % args.beam) res_suffix = 'res' if args.pre_embed_file != '': hparams['pre_embed_file'] = args.pre_embed_file utils.print_out("Reset pre_embed_file to %s" % args.pre_embed_file) res_suffix = 'ores' if args.pre_embed_dim != -1: hparams['pre_embed_dim'] = args.pre_embed_dim utils.print_out("Reset pre_embed_dim to %s" % args.pre_embed_file) if args.rerank == 0: config_id = 'B%s_L%.1f_D%.1f_C%.1f' % ( hparams['beam_width'], args.length_penalty_weight, args.diverse_decoding_rate, args.coverage_penalty_weight) else: config_id = 'R%s_B%s_L%.1f_D%.1f_C%.1f' % ( args.rerank, hparams['beam_width'], args.length_penalty_weight, args.diverse_decoding_rate, args.coverage_penalty_weight) if os.path.exists(os.path.join(hparams['model_path'], 'decoded')) is False: os.mkdir(os.path.join(hparams['model_path'], 'decoded')) if args.binary: top1_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.txt.bi' % config_id) topk_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_topk.txt.bi' % config_id) else: top1_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.txt' % config_id) topk_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_topk.txt' % config_id) if args.rerank > 0: top1_out_file_path += '.mmi' topk_out_file_path += '.mmi' if os.path.exists(os.path.join(hparams['model_path'], 'decoded')) is False: os.mkdir(os.path.join(hparams['model_path'], 'decoded')) # Evalutation if args.binary: score_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.%s.bi' % (config_id, 'eres')) else: score_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.%s' % (config_id, 'eres')) # check entity_list = [] entity_dict = dict() entity_dict_path = hparams['entity_path'] with open(entity_dict_path, encoding='utf-8') as f: for i, line in enumerate(f): e = line.strip() entity_list.append(e) entity_dict[e] = i # load generations generations = [] with open(top1_out_file_path, 'r', encoding='utf-8') as fin: for line in fin: line = line.replace('#', '') line = line.replace('$C:', '') line = line.replace('$R:', '') line = line.replace('$E:', '') generations.append(line.strip('\n').split()) # load refs refs = [] with open(hparams['test_tgt_file'], 'r', encoding='utf-8') as fin: for line in fin: line = line.replace('#', '') line = line.replace('$C:', '') line = line.replace('$R:', '') line = line.replace('$E:', '') refs.append(line.strip('\n').split()) # load facts facts = [] with open(hparams['fact_path'], 'r', encoding='utf-8') as fin: for line in fin: facts.append([x.replace('#', '') for x in line.strip('\n').split()]) # load fact idx fact_idx = [] with open(hparams['test_fact_file'], 'r', encoding='utf-8') as fin: for line in fin: fact_idx.append([int(x) for x in line.strip('\n').split()]) entity_scores = [] entity_rates = [] entity_recalls = [] entity_precisions = [] entity_distincts = [] entity_targets = [] with open(score_file_path, 'w+', encoding='utf-8') as fout: for generation, ref, idx in zip(generations, refs, fact_idx): # print(generation) # print(ref) entity_set = set() target_entity_set = set() for i in idx: if len(facts[i]) == 4: if facts[i][0] not in stopwords: target_entity_set.add(facts[i][0]) if facts[i][1] not in stopwords: entity_set.add(facts[i][1]) if facts[i][3] not in stopwords: entity_set.add(facts[i][3]) elif len(facts[i]) == 5: if facts[i][1] not in stopwords: target_entity_set.add(facts[i][1]) if facts[i][1] not in stopwords: entity_set.add(facts[i][1]) if facts[i][0] not in stopwords: entity_set.add(facts[i][0]) entity_score = 0.0 target_entity_score = 0.0 generation_entities = set() matched_entity = set() for word in generation: if word in entity_set: generation_entities.add(word) entity_score += 1 if word in target_entity_set: matched_entity.add(word) target_entity_score = len(matched_entity) entity_distincts.append(len(generation_entities)) if len(generation) != 0: entity_rate = entity_score / len(generation) else: entity_rate = 0 entity_scores.append(entity_score) entity_rates.append(entity_rate) entity_targets.append(target_entity_score) fout.write('%s\n' % ' '.join(matched_entity)) ref_entities = set() for word in ref: if word in entity_set: ref_entities.add(word) if len(ref_entities) != 0: entity_recalls.append(len(ref_entities & generation_entities) / (0.0+len(ref_entities))) else: entity_recalls.append(1.0) if len(generation_entities) != 0: entity_precisions.append(len(ref_entities & generation_entities) / (0.0+len(generation_entities))) else: if len(ref_entities) != 0: entity_precisions.append(0.0) else: entity_precisions.append(1.0) fout.write('%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n' % ( sum(entity_targets)/len(generations), # Matched Entity Score sum(entity_distincts)/len(generations), # Used Entity Score sum(entity_rates)/len(generations), # Used Entity 占比 sum(entity_recalls)/len(generations), # Recall sum(entity_precisions)/len(generations), # Precision )) if args.only_entity is False: if args.binary: score_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.%s.bi' % (config_id, res_suffix)) else: score_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.%s' % (config_id, res_suffix)) scores = [] metrics = 'rouge,bleu-1,bleu-2,distinct-1,distinct-2'.split(',') thread_pool = Pool(args.thread) jobs = [] for metric in metrics: job = thread_pool.apply_async(evaluation_utils.evaluate, ( hparams['test_tgt_file'], hparams['test_src_file'], top1_out_file_path, hparams['pre_embed_file'], metric, hparams['pre_embed_dim'], None, None, hparams['beam_width'])) jobs.append(job) # entropy metrics.append('entropy') job = thread_pool.apply_async(evaluation_utils.evaluate, ( hparams['tgt_file'], hparams['src_file'], top1_out_file_path, hparams['pre_embed_file'], 'entropy', hparams['pre_embed_dim'], hparams['tgt_vocab_size'])) jobs.append(job) thread_pool.close() thread_pool.join() # Embedding-based complex_score = evaluation_utils.evaluate(hparams['test_tgt_file'], hparams['test_src_file'], top1_out_file_path, hparams['pre_embed_file'], 'embed', dim=hparams['pre_embed_dim']) score = complex_score[0:len(complex_score) // 2] if len(score) == 1: score = score[0] utils.print_out(('%s->%s\n') % ('embed', score)) if type(score) is list or type(score) is tuple: for x in score: scores.append(str(x)) else: scores.append(str(score)) for job, metric in zip(jobs, metrics): complex_score = job.get() score = complex_score[0:len(complex_score) // 2] if len(score) == 1: score = score[0] utils.print_out(('%s->%s\n') % (metric, score)) if type(score) is list or type(score) is tuple: for x in score: scores.append(str(x)) else: scores.append(str(score)) with open(score_file_path, 'w+', encoding='utf-8') as fin: fin.write('\t'.join(scores))
def create_decoder(self, encoder_outputs, encoder_states, name='decoder'): hparams = self.hparams mode = self.mode sim_dim = self.hparams.get("sim_dim", 64) lengths = self._lengths_for_decoder copy_embedding_transform_fn = None copy_embedding_transform_fn = tf.layers.Dense( units=hparams['embed_dim'], name='copy_embedding_transformation') copy_fn_var_scope = tf.get_variable_scope() if self.mode == model_helper.TRAIN and hparams.get( "multi_decoder_input", False): # Common Words embedding_list = [] common_word_embedding = self._input_embeddings_for_decoder embedding_list.append(common_word_embedding) if hparams.get("copy_predict_mode", False): decoder_input_idx = self._inputs_for_decoder not_common_words = tf.greater_equal(decoder_input_idx, hparams['tgt_vocab_size']) not_entity_words = tf.less( decoder_input_idx, hparams['tgt_vocab_size'] + hparams['copy_token_nums']) is_copy_words = not_common_words & not_entity_words is_copy_mask = tf.cast(is_copy_words, tf.float32) copy_idx = decoder_input_idx - hparams['tgt_vocab_size'] copy_idx = tf.maximum(copy_idx, 0) copy_idx = tf.minimum(copy_idx, hparams['copy_token_nums'] - 1) src_idx = self._inputs_for_encoder batch_size = tf.shape(src_idx)[0] max_src_len = tf.shape(src_idx)[1] max_tgt_len = tf.shape(copy_idx)[1] offset = tf.range(batch_size) * max_src_len offset = tf.expand_dims(offset, -1) offset = tf.tile(offset, [1, max_tgt_len]) offset_copy_idx = copy_idx + offset flatten_encoder_outputs = tf.reshape( encoder_outputs, [-1, tf.shape(encoder_outputs)[-1]]) copy_embedding = tf.nn.embedding_lookup( flatten_encoder_outputs, offset_copy_idx) copy_embedding = tf.reshape( copy_embedding, [batch_size, max_tgt_len, hparams.get("num_units") * 2]) copy_embedding = copy_embedding_transform_fn(copy_embedding) # common_word_idx = tf.where(is_copy_mask, copy_to_word_idx, common_word_idx) is_copy_mask = tf.expand_dims(is_copy_mask, -1) copy_embedding = copy_embedding * is_copy_mask embedding_list.append(copy_embedding) if hparams.get("entity_predict_mode", False): embedding_list.append( self._input_entity_embeddings_for_decoder) targets_in_embedding = tf.concat(embedding_list, -1) else: targets_in_embedding = self._input_embeddings_for_decoder with tf.variable_scope(name) as scope: num_layers = hparams['decoder_num_layers'] cell_type = hparams['cell_type'] num_units = hparams['num_units'] forget_bias = hparams['forget_bias'] dropout = self.dropout maximum_iterations = tf.reduce_max(self._lengths_for_encoder) * 2 # Create RNN Cell with tf.variable_scope('std_rnn'): cell_list = [ model_helper.create_cell( unit_type=cell_type, num_units=num_units, forget_bias=forget_bias, dropout=dropout, mode=mode, ) for x in range(num_layers) ] if num_layers > 1: cell_std = tf.contrib.rnn.MultiRNNCell(cell_list) else: cell_std = cell_list[0] if hparams.get("decoder_num_layers") == hparams.get( "encoder_num_layers") and hparams.get( "pass_raw_encoder_state", False): decoder_initial_state = [] for i in range(num_layers): decoder_initial_state.append(encoder_states[i]) if num_layers > 1: decoder_initial_state = tuple(decoder_initial_state) else: decoder_initial_state = decoder_initial_state[0] else: decoder_initial_state = [] if self.knowledge_fusion is None: concatenated_encoder_states = tf.nn.dropout( tf.concat(encoder_states, -1), keep_prob=1.0 - dropout) else: concatenated_encoder_states = tf.nn.dropout( tf.concat(encoder_states + [self.knowledge_fusion], -1), keep_prob=1.0 - dropout) if self.hparams.get('word_bow_loss', 0.0) > 0.0: def safe_log(y): return tf.log( tf.clip_by_value(y, 1e-9, tf.reduce_max(y))) if self.hparams.get('word_bow_loss_type_2', False) is False: common_word_inputs = tf.layers.dense( concatenated_encoder_states, self.hparams.get("mid_projection_dim"), tf.nn.elu, name='word_bow_predictor_1') word_logits = tf.layers.dense( common_word_inputs, self.hparams.get("tgt_vocab_size"), use_bias=False, name='word_bow_predictor_2') else: word_logits = tf.layers.dense( self.knowledge_fusion, self.hparams.get("tgt_vocab_size"), use_bias=False, name='word_bow_predictor_2') word_probs = tf.nn.softmax(word_logits) word_bow_loss = -tf.reduce_sum( self._golden_word_bow * safe_log(word_probs), -1) / tf.maximum( tf.reduce_sum(self._golden_word_bow, -1), 1) self.word_bow_loss = tf.reduce_sum( word_bow_loss) / self.batch_size else: self.word_bow_loss = tf.constant(0.0) for i in range(num_layers): init_out = tf.layers.dense(concatenated_encoder_states, num_units, activation=tf.nn.tanh, use_bias=False, name='std_transformer_%d' % i) decoder_initial_state.append(init_out) if num_layers > 1: decoder_initial_state = tuple(decoder_initial_state) else: decoder_initial_state = decoder_initial_state[0] with tf.variable_scope('cue_rnn'): cell_list = [ model_helper.create_cell( unit_type=cell_type, num_units=num_units, forget_bias=forget_bias, dropout=dropout, mode=mode, ) for x in range(num_layers) ] _batch_size = tf.shape(self._fact_candidate)[0] _fact_num = tf.shape(self._fact_candidate)[1] fact_projection = self.fact_projection if num_layers > 1: cell_cue = tf.contrib.rnn.MultiRNNCell(cell_list) else: cell_cue = cell_list[0] # Attention assert hparams['attention'] is not None memory = encoder_outputs if (self.mode == model_helper.INFER and hparams['infer_mode'] == "beam_search"): memory, source_sequence_length, decoder_initial_state, batch_size = ( self._prepare_beam_search_decoder_inputs( hparams["beam_width"], memory, self._lengths_for_encoder, decoder_initial_state)) if hparams.get('kefu_decoder', False): _lengths_for_fact_candidate = tf.contrib.seq2seq.tile_batch( self._lengths_for_fact_candidate, multiplier=hparams['beam_width']) _fact_candidate_embedding = tf.contrib.seq2seq.tile_batch( fact_projection, multiplier=hparams['beam_width']) _cue_input_embedding = tf.contrib.seq2seq.tile_batch( self._cue_fact_embedding, multiplier=hparams['beam_width']) fact_entity_idx = tf.contrib.seq2seq.tile_batch( self._fact_candidate, multiplier=hparams['beam_width']) encoder_memory = tf.contrib.seq2seq.tile_batch( encoder_outputs, multiplier=hparams['beam_width']) encoder_memory_len = tf.contrib.seq2seq.tile_batch( self._lengths_for_encoder, multiplier=hparams['beam_width']) if self.knowledge_distribution is not None: knowledge_distribution = tf.contrib.seq2seq.tile_batch( self.knowledge_distribution, multiplier=hparams['beam_width']) else: knowledge_distribution = self.knowledge_distribution else: fact_entity_idx = self._fact_candidate _lengths_for_fact_candidate = self._lengths_for_fact_candidate _fact_candidate_embedding = fact_projection source_sequence_length = self._lengths_for_encoder batch_size = self.batch_size _cue_input_embedding = self._cue_fact_embedding encoder_memory = encoder_outputs encoder_memory_len = self._lengths_for_encoder knowledge_distribution = self.knowledge_distribution attention_mechanism = self.create_attention_mechanism( hparams["attention"], num_units, memory, source_sequence_length, self.mode) generate_probs_in_cell = hparams.get( 'kefu_decoder', True) and (hparams.get("entity_predict_mode", False) or hparams.get("copy_predict_mode", False)) if generate_probs_in_cell: common_word_projection = self._projection_layer else: common_word_projection = None # Only generate alignment in greedy INFER mode. alignment_history = (self.mode == model_helper.INFER and hparams["infer_mode"] != "beam_search") k_openness_history = self.mode == model_helper.INFER if hparams.get('kefu_decoder', False): if hparams.get("use_dynamic_knowledge_distribution", True) is False: knowledge_distribution = None cell_fw = KEFUAttentionWrapper2.AttentionWrapper( cell_std, cell_cue, _cue_input_embedding, _fact_candidate_embedding, _lengths_for_fact_candidate, knowledge_distribution, attention_mechanism, mid_projection_dim=hparams.get( "mid_projection_dim_for_commonword", hparams.get("mid_projection_dim", 1280)), cue_fact_mode=hparams.get("cue_fact", False), cue_fact_mask=self.mode == model_helper.INFER, encoder_memory=encoder_memory, encoder_memory_len=encoder_memory_len, balance_gate=hparams.get("balance_gate", True), entity_predict_mode=hparams.get('entity_predict_mode', False), copy_predict_mode=hparams.get('copy_predict_mode', False), vocab_sizes=(hparams['tgt_vocab_size'], hparams['copy_token_nums'], hparams['entity_token_nums']), common_word_projection=common_word_projection, attention_layer_size=num_units, alignment_history=alignment_history, k_openness_history=k_openness_history, output_attention=hparams["output_attention"], sim_dim=sim_dim, name="attention") else: cell_fw = tf.contrib.seq2seq.AttentionWrapper( cell_std, attention_mechanism, attention_layer_size=num_units, alignment_history=alignment_history, output_attention=hparams["output_attention"], name="attention") batch_size = tf.to_int32(batch_size) decoder_initial_state = cell_fw.zero_state( batch_size, tf.float32).clone(cell_state=decoder_initial_state) # Train or Eval if mode != tf.contrib.learn.ModeKeys.INFER: utils.print_out( 'Creating Training RNN Decoder, num_layers=%s, cell_type=%s, num_units=%d' % (num_layers, cell_type, num_units)) # Helper helper = tf.contrib.seq2seq.TrainingHelper( targets_in_embedding, lengths, time_major=False) # Decoder my_decoder = tf.contrib.seq2seq.BasicDecoder( cell_fw, helper, decoder_initial_state) # Dynamic decoding outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode( my_decoder, output_time_major=False, swap_memory=True, scope=scope) rnn_outputs = outputs.rnn_output if generate_probs_in_cell: logits = rnn_outputs else: logits = self._projection_layer(rnn_outputs) if hparams.get("cue_fact", False): self._cue_fact_loss = final_context_state.cue_fact_openness else: self._cue_fact_loss = tf.constant(0.0) sampled_id = None scores = tf.no_op() self.selector_logits = tf.transpose( final_context_state.model_selector_openness.stack(), [1, 0, 2]) else: utils.print_out( 'Creating Infer RNN Decoder, num_layers=%s, cell_type=%s, num_units=%d' % (num_layers, cell_type, num_units)) infer_mode = hparams["infer_mode"] utils.print_out('Infer mode : %s' % infer_mode) start_token = tf.cast( self.tgt_vocab_table.lookup(tf.constant(vocab_utils.SOS)), tf.int32) end_token = tf.cast( self.tgt_vocab_table.lookup(tf.constant(vocab_utils.EOS)), tf.int32) start_tokens = tf.fill([tf.shape(self._inputs_for_encoder)[0]], start_token) def embedding_fn_multi( input_idx, fact_entity_idx=fact_entity_idx, copy_embedding_transform_fn=copy_embedding_transform_fn ): common_word_idx = input_idx embedding_list = [] # Reverse # Common Copy Entity if hparams.get("entity_predict_mode", False): # entity mode relative_entity_idx = input_idx - hparams.get( 'src_vocab_size') - hparams.get('copy_token_nums') is_entity = tf.greater_equal(relative_entity_idx, 0) is_entity_mask = tf.cast(is_entity, tf.float32) relative_entity_idx = tf.maximum( 0, relative_entity_idx) # [batch, fact_len] fact_entity_idx = fact_entity_idx batch_size = tf.shape(input_idx)[0] max_fact_num = tf.shape(fact_entity_idx)[1] flatten_fact_idx = tf.reshape(fact_entity_idx, [-1]) offset = tf.expand_dims(tf.range(batch_size), -1) * max_fact_num relative_entity_idx = relative_entity_idx + offset fact_idx = tf.nn.embedding_lookup( flatten_fact_idx, relative_entity_idx) entity_idx = tf.nn.embedding_lookup( self._fact_entity_in_response, fact_idx) entity2word_idx = tf.nn.embedding_lookup( self._entity2word, entity_idx) common_word_idx = tf.where(is_entity, entity2word_idx, common_word_idx) tmp_common_word_idx = common_word_idx if hparams.get("copy_predict_mode", False) is False: common_word_idx_to_entity_idx = tf.nn.embedding_lookup( self._word2entity, tmp_common_word_idx) entity_embedding = tf.nn.embedding_lookup( self._embedding_entity, common_word_idx_to_entity_idx) embedding_list.append(entity_embedding) if hparams.get("copy_predict_mode", False): src_idx = self._inputs_for_encoder max_src_len = tf.shape(src_idx)[1] batch_size = tf.shape(input_idx)[0] isnot_common_words = tf.greater_equal( input_idx, hparams['tgt_vocab_size']) isnot_entity_words = tf.less( input_idx, hparams['tgt_vocab_size'] + max_src_len) is_copy_words = isnot_common_words & isnot_entity_words is_copy_mask = tf.cast(is_copy_words, tf.float32) copy_idx = input_idx - hparams['tgt_vocab_size'] copy_idx = tf.maximum(copy_idx, 0) copy_idx = tf.minimum(copy_idx, max_src_len - 1) max_tgt_len = tf.shape(copy_idx)[1] offset = tf.range(batch_size) * max_src_len offset = tf.expand_dims(offset, -1) offset = tf.tile(offset, [1, max_tgt_len]) offset_copy_idx = copy_idx + offset flatten_src_idx = tf.reshape(src_idx, [-1]) flatten_encoder_outputs = tf.reshape( encoder_outputs, [-1, tf.shape(encoder_outputs)[-1]]) copy_to_word_idx = tf.nn.embedding_lookup( flatten_src_idx, offset_copy_idx) copy_embedding = tf.nn.embedding_lookup( flatten_encoder_outputs, offset_copy_idx) copy_embedding = tf.reshape(copy_embedding, [ batch_size, max_tgt_len, hparams.get("num_units") * 2 ]) with tf.variable_scope(copy_fn_var_scope): copy_embedding = copy_embedding_transform_fn( copy_embedding) common_word_idx = tf.where(is_copy_words, copy_to_word_idx, common_word_idx) tmp_common_word_idx = common_word_idx if hparams.get("entity_predict_mode", False): common_word_idx_to_entity_idx = tf.nn.embedding_lookup( self._word2entity, tmp_common_word_idx) entity_embedding = tf.nn.embedding_lookup( self._embedding_entity, common_word_idx_to_entity_idx) embedding_list.append(entity_embedding) is_copy_mask = tf.expand_dims(is_copy_mask, -1) copy_embedding = copy_embedding * is_copy_mask embedding_list.append(copy_embedding) embedding_list.append( tf.nn.embedding_lookup(self._embedding_vocab, common_word_idx)) if hparams.get('add_token_type_feature', False): embedding_list.append( tf.nn.embedding_lookup(self._embedding_id2type, input_idx)) # Must embedding_list.reverse() return tf.concat(embedding_list, -1) def embedding_fn(x, fact_entity_idx=fact_entity_idx): if hparams.get('entity_predict_mode', False): # > 0 is_entity else is word or copy token [0,500] relative_entity_idx = x - hparams.get( 'src_vocab_size') - hparams.get('copy_token_nums') is_entity = tf.greater(relative_entity_idx, 0) relative_entity_idx = tf.maximum( 0, relative_entity_idx) # [batch, fact_len] fact_entity_idx = fact_entity_idx # Cast relative idx to right idx batch_size = tf.shape(fact_entity_idx)[0] max_fact_num = tf.shape(fact_entity_idx)[1] fact_entity_idx = tf.reshape(fact_entity_idx, [-1, 1]) # batch_range offset = tf.expand_dims(tf.range(batch_size), -1) * max_fact_num relative_entity_idx = relative_entity_idx + offset relative_entity_idx = tf.reshape( relative_entity_idx, tf.shape(x)) entity_idx = tf.nn.embedding_lookup( fact_entity_idx, relative_entity_idx) entity_idx = tf.squeeze(entity_idx, -1) entity_idx = tf.nn.embedding_lookup( self._fact_entity_in_response, entity_idx) entity2word_idx = tf.nn.embedding_lookup( self._entity2word, entity_idx) x = tf.where(is_entity, entity2word_idx, x) return tf.nn.embedding_lookup(self._embedding_vocab, x) if infer_mode == "greedy": helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding_fn, start_tokens, end_token) if infer_mode == "beam_search": beam_width = hparams["beam_width"] beam_decoder_fn = BeamSearchDecoder if generate_probs_in_cell: projection_layer = None else: projection_layer = self._projection_layer if hparams.get("multi_decoder_input", False): my_embedding_fn = embedding_fn_multi else: my_embedding_fn = embedding_fn my_decoder = beam_decoder_fn( cell=cell_fw, embedding=my_embedding_fn, start_tokens=start_tokens, end_token=end_token, initial_state=decoder_initial_state, beam_width=beam_width, output_layer=projection_layer, coverage_penalty_weight=hparams.get( 'coverage_penalty_weight', 0), diverse_decoding_rate=hparams.get( 'diverse_decoding_rate', 0), length_penalty_weight=hparams.get( 'length_penalty_weight', 0)) else: raise ValueError("Unknown infer_mode '%s'", infer_mode) if infer_mode != 'beam_search': my_decoder = tf.contrib.seq2seq.BasicDecoder( cell_fw, helper, decoder_initial_state, output_layer=projection_layer # applied per timestep ) # Dynamic decoding outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode( my_decoder, maximum_iterations=maximum_iterations, output_time_major=False, swap_memory=True, scope=scope) if infer_mode == "beam_search": # sampled_id [batch_id,length,beam_id] sampled_id = outputs.predicted_ids logits = tf.no_op() scores = outputs.beam_search_decoder_output.scores # first dim is set to the beam_id sampled_id = tf.transpose(sampled_id, [2, 0, 1]) #mapped_sampled_id = sampled_id scores = tf.transpose(scores, [2, 0, 1]) if hparams.get('kefu_decoder', False): self.mode_selector = final_context_state.cell_state.model_selector_openness self.fact_alignments = final_context_state.cell_state.fact_alignments if hparams.get('cue_fact', False): self.k_openness = final_context_state.cell_state.k_openness else: self.k_openness = tf.constant(0.0) self.copy_alignments = final_context_state.cell_state.copy_alignments self.fact_alignments = final_context_state.cell_state.fact_alignments if hparams.get("fact_memory_read", False): self.fact_memory_alignments = final_context_state.cell_state.fact_memory_alignments else: self.fact_memory_alignments = tf.no_op() else: self.debug = tf.no_op() else: logits = outputs.rnn_output sampled_id = outputs.sample_id scores = outputs.scores sampled_id = tf.expand_dims(sampled_id, 0) scores = tf.expand_dims(scores, 0) return logits, sampled_id, scores
def create_model(self, name='flexka'): def safe_log(y): return tf.log(tf.clip_by_value(y, 1e-9, tf.reduce_max(y))) hparams = self.hparams with tf.variable_scope(name) as scope: encoder_outputs, encoder_states = self.create_encoder( self._input_embeddings_for_encoder, self._input_entity_embeddings_for_encoder, self._lengths_for_encoder, ) if self._fact_distribution: self.knowledge_fusion = None max_candidate_num = tf.shape(self._fact_candidate_embedding)[1] fact_embedding_projection = self.fact_projection prior_inputs = tf.concat(encoder_states, -1) prior_projection = tf.layers.dense(prior_inputs, units=hparams.get( "sim_dim", 64), activation=tf.nn.tanh, use_bias=True, name='prior_distribution') prior_projection = tf.expand_dims(prior_projection, 1) prior_projection = tf.tile(prior_projection, [1, max_candidate_num, 1]) prior_scores = tf.reduce_sum( prior_projection * fact_embedding_projection, -1) fact_seq_mask = tf.sequence_mask( self._lengths_for_fact_candidate, dtype=tf.float32) unk_mask = tf.sequence_mask(tf.ones_like( self._lengths_for_fact_candidate), maxlen=max_candidate_num, dtype=tf.float32) fact_mask = (1.0 - fact_seq_mask) * -1e10 + unk_mask * -1e10 prior_scores += fact_mask prior_distribution = tf.nn.softmax(prior_scores) if self.mode == model_helper.TRAIN: with tf.variable_scope(tf.get_variable_scope(), reuse=True): decoder_encoder_outputs, decoder_encoder_states = self.create_encoder( self._input_embeddings_for_decoder, self._input_entity_embeddings_for_decoder, self._lengths_for_decoder) post_inputs = tf.concat( decoder_encoder_states + encoder_states, -1) post_projection = tf.layers.dense(post_inputs, units=hparams.get( "sim_dim", 64), activation=tf.nn.tanh, use_bias=True, name='post_distribution') post_projection = tf.expand_dims(post_projection, 1) post_projection = tf.tile(post_projection, [1, max_candidate_num, 1]) post_scores = tf.reduce_sum( post_projection * fact_embedding_projection, -1) post_scores += fact_mask post_distribution = tf.nn.softmax(post_scores) self.knowledge_distribution = post_distribution if hparams.get('knowledge_fusion', "none") == 'initDecoder': self.knowledge_fusion = tf.reduce_sum( self._fact_candidate_embedding * tf.expand_dims(post_distribution, -1), 1) kld_loss = post_distribution * safe_log( post_distribution / tf.clip_by_value( prior_distribution, 1e-9, 1.0)) #* fact_seq_mask kld_loss = tf.reduce_mean(kld_loss, -1) self.kld_loss = tf.reduce_sum(kld_loss) / self.batch_size knowledge_bow_loss = -tf.reduce_sum( self._golden_fact_bow * safe_log( self.knowledge_distribution), -1) / tf.maximum( tf.reduce_sum(self._golden_fact_bow, -1), 1) self.knowledge_bow_loss = tf.reduce_sum( knowledge_bow_loss) / self.batch_size else: with tf.variable_scope(tf.get_variable_scope(), reuse=True): decoder_encoder_outputs, decoder_encoder_states = self.create_encoder( self._input_embeddings_for_decoder, self._input_entity_embeddings_for_decoder, self._lengths_for_decoder) post_inputs = tf.concat( decoder_encoder_states + encoder_states, -1) post_projection = tf.layers.dense(post_inputs, units=hparams.get( "sim_dim", 64), activation=tf.nn.tanh, use_bias=True, name='post_distribution') post_projection = tf.expand_dims(post_projection, 1) post_projection = tf.tile(post_projection, [1, max_candidate_num, 1]) post_scores = tf.reduce_sum( post_projection * fact_embedding_projection, -1) post_scores += fact_mask self.post_knowledge_distribution = tf.nn.softmax( post_scores) self.knowledge_distribution = prior_distribution if hparams.get('knowledge_fusion', "none") == 'initDecoder': self.knowledge_fusion = tf.reduce_sum( self._fact_candidate_embedding * tf.expand_dims(prior_distribution, -1), 1) if self.knowledge_fusion is not None: self.knowledge_fusion = tf.nn.dropout( self.knowledge_fusion, keep_prob=1.0 - self.dropout) else: self.kld_loss = tf.constant(0.0) self.knowledge_distribution = None self.knowledge_fusion = None self.knowledge_bow_loss = tf.constant(0.0) logits, sampled_id, scores = self.create_decoder( encoder_outputs, encoder_states, ) self.logits = logits self.scores = scores if self.mode != model_helper.INFER: loss = self.compute_loss(logits, self._outputs_for_decoder, self._lengths_for_decoder, unk_helper=hparams.get( "unk_helper", True)) self.train_loss = tf.reduce_sum(loss) / self.batch_size self._train_update_loss = self.train_loss teach_force_loss = self.compute_loss( self.selector_logits, self._outputs_type_for_decoder, self._lengths_for_decoder, unk_helper=False) self.teach_force_loss = tf.reduce_sum( teach_force_loss) / self.batch_size if hparams.get("teach_force", False): self._train_update_loss += self.teach_force_loss * hparams.get( "teach_force_rate", 0.5) else: pass if self._fact_distribution: self._train_update_loss += self.kld_loss if hparams.get("knowledge_bow_loss", False): self._train_update_loss += self.knowledge_bow_loss * hparams.get( "knowledge_bow_loss") if self.hparams.get('word_bow_loss', 0.0) > 0.0: self._train_update_loss += self.word_bow_loss * self.hparams.get( 'word_bow_loss') self._cue_fact_loss = tf.constant(0.0) else: self.sampled_id = self.reverse_target_vocab_table.lookup( tf.to_int64(sampled_id)) # Print vars utils.print_out('-------------Trainable Variables------------------') for var in tf.trainable_variables(): utils.print_out(var)
def load_knolwedge_graph(hparams): """ 加载和知识图谱相关的概念 :param hparams: :return: """ entity_dict_path = hparams['entity_path'] relation_dict_path = hparams['relation_path'] utils.print_out("load entity dict from %s" % entity_dict_path) utils.print_out("load relation dict from %s" % relation_dict_path) entity_embed_path = hparams['entity_embedding_path'] relation_embed_path = hparams['relation_embedding_path'] embed_dim = hparams['entity_dim'] entity_vocab = lookup_ops.index_table_from_file(entity_dict_path, default_value=0) reverse_entity_vocab = lookup_ops.index_to_string_table_from_file( entity_dict_path, default_value=UNK_ENTITY) padding_entity_list = [ UNK_ENTITY, NONE_ENTITY, PAD_ENTITY, NOT_HEAD_ENTITY, NOT_TAIL_ENTITY ] padding_relation_list = [NONE_RELATION, PAD_RELATION, NOT_TBD] entity_list = [] relation_list = [] entity_dict = dict() relation_dict = dict() # 保证位置正确 with open(entity_dict_path, encoding='utf-8') as f: for i, line in enumerate(f): e = line.strip() entity_list.append(e) entity_dict[e] = i for i in range(len(padding_entity_list)): assert padding_entity_list[i] == entity_list[i] with open(relation_dict_path, encoding='utf-8') as f: for i, line in enumerate(f): e = line.strip() relation_list.append(e) relation_dict[e] = i for i in range(len(padding_relation_list)): assert padding_relation_list[i] == relation_list[i] print("Loading entity vectors...") entity_embed = [] with open(entity_embed_path, 'r+', encoding='utf-8') as f: for i, line in enumerate(f): if '\t' not in line: s = line.strip().split(' ') else: s = line.strip().split('\t') entity_embed.append([float(x) for x in s]) print("Loading relation vectors...") relation_embed = [] with open(relation_embed_path, 'r+', encoding='utf-8') as f: for i, line in enumerate(f): if '\t' not in line: s = line.strip().split(' ') else: s = line.strip().split('\t') relation_embed.append([float(x) for x in s]) entity_embed = np.array(entity_embed, dtype=np.float32) relation_embed = np.array(relation_embed, dtype=np.float32) entity_embed = tf.get_variable('entity_embed', dtype=tf.float32, initializer=entity_embed, trainable=False) relation_embed = tf.get_variable('relation_embed', dtype=tf.float32, initializer=relation_embed, trainable=False) entity_embed = tf.reshape(entity_embed, [-1, embed_dim]) relation_embed = tf.reshape(relation_embed, [-1, embed_dim]) padding_entity_embedding = tf.get_variable( 'entity_padding_embed', [len(padding_entity_list), embed_dim], dtype=tf.float32, initializer=tf.zeros_initializer()) padding_relation_embedding = tf.get_variable( 'relation_padding_embed', [len(padding_relation_list), embed_dim], dtype=tf.float32, initializer=tf.zeros_initializer()) tf_entity_embed = tf.concat([padding_entity_embedding, entity_embed], axis=0) tf_relation_embed = tf.concat([padding_relation_embedding, relation_embed], axis=0) tf_entity_embed = tf.layers.dense(tf_entity_embed, hparams['entity_dim'], use_bias=False, name='entity_embedding') tf_relation_embed = tf.layers.dense(tf_relation_embed, hparams['entity_dim'], use_bias=False, name='relation_embedding') tf_entity_embed = tf.concat([tf_entity_embed, tf_relation_embed], axis=0) # Facts utils.print_out('Loading facts') fact_dict_path = hparams['fact_path'] entity_fact = [] entity_target = [] with open(fact_dict_path, encoding='utf-8') as fin: lines = fin.readlines() utils.print_out('Total Entity-Fact : %d' % len(lines)) for line in lines: items = line.strip('\n').split() for i in [0, 1, 3]: items[i] = int(entity_dict.get(items[i], 0)) items[2] = int(relation_dict.get(items[2])) + len( entity_dict) # realtion和 entity共用一个列表 entity_fact.append(items[1:]) entity_target.append(items[0]) # uni ids entity_fact = np.array(entity_fact, dtype=np.int32) entity_target = np.array(entity_target, dtype=np.int32) entity_fact = np.reshape(entity_fact, [len(lines), 3]) entity_target = np.reshape(entity_target, [len(lines)]) tf_entity_fact = tf.constant(value=entity_fact, dtype=np.int32) tf_entity_target = tf.constant(value=entity_target, dtype=np.int32) tf_entity_fact_embedding = tf.nn.embedding_lookup(tf_entity_embed, tf_entity_fact) tf_entity_fact_embedding = tf.reshape(tf_entity_fact_embedding, [-1, 3 * hparams['entity_dim']]) return tf_entity_embed, tf_entity_fact_embedding, tf_entity_target, entity_vocab, reverse_entity_vocab
def train(): # Load config hparams = config_parser.load_and_restore_config(args.config_path, verbose=True) out_dir = hparams['model_path'] eval_file = os.path.join(out_dir, 'eval_out.txt') status_per_steps = hparams['status_per_steps'] status_counter = Status(status_per_steps) # Dataset dataset = dataset_utils.create_flexka3_iterator(hparams) if hparams.get('rank_based', False): model = RModel(dataset, hparams, model_helper.TRAIN) else: model = Model(dataset, hparams, model_helper.TRAIN) dropout = dataset['dropout'] with tf.Session(config=model_helper.create_tensorflow_config()) as sess: step, epoch = model_helper.create_or_restore_a_model(out_dir, model, sess) dataset['init_fn'](sess) epoch_start_time = time.time() while utils.should_stop(epoch, step, hparams) is False: try: gradient,lr, _, loss, regulation_loss, step, epoch, batch_size, cue_fact, probs, kld_loss = sess.run([ model.grad_norm,model.learning_rate, model.update, model._knowledge_bow_loss, model.regulation_loss, model.global_step, model.epoch_step, model.batch_size, dataset['cue_fact'], model.classifier_scores, model.kld_loss ], feed_dict={dropout: hparams['dropout'], model.learning_rate: hparams['learning_rate']}) ranks, reversed_ranks, hits = batch_rank_eval(cue_fact, probs, hitAT=(1, 5, 10, 20)) MR = np.average(ranks) MRR = np.average(reversed_ranks) hit1 = np.average(hits[0]) * 100 hit5 = np.average(hits[1]) * 100 hit10 = np.average(hits[2]) * 100 hit20 = np.average(hits[3]) * 100 # print(sess.run(model.debug)) status_counter.add_record({'gradient':gradient,'loss': loss, 'kld': kld_loss*1000000, 'lr': lr, 'MR':MR, 'MRR':MRR, 'hit1':hit1, 'hit5':hit5, 'hit10':hit10, 'hit20':hit20 }, step, epoch) except tf.errors.InvalidArgumentError as e: print('Found Inf or NaN global norm') raise e except tf.errors.OutOfRangeError: utils.print_out('epoch %d is finished, step %d' % (epoch, step)) sess.run([model.next_epoch]) # Save Epoch model.saver.save( sess, os.path.join(out_dir, "seq2seq.ckpt"), global_step=model.global_step) utils.print_out('Saved model to -> %s' % out_dir) # EVAL on Dev/Test Set: for prefix in ['valid_', 'test_']: dataset['init_fn'](sess, prefix) eval_loss = [] eval_count = [] eval_batch = [] MRs = [] MRRs = [] hit1s = [] hit5s = [] hit10s = [] hit20s = [] while True: try: loss, batch_size, batch_size,cue_fact, probs,kld_loss = sess.run( [model._knowledge_bow_loss, model.batch_size, model.batch_size, dataset['cue_fact'], model.classifier_scores, model.kld_loss], feed_dict={dropout: 0.0}) eval_loss.append(loss) eval_batch.append(batch_size) ranks, reversed_ranks, hits = batch_rank_eval(cue_fact, probs, hitAT=(1, 5, 10, 20)) MRs = MRs + ranks MRRs = MRRs + reversed_ranks hit1s = hit1s + hits[0] hit5s = hit5s + hits[1] hit10s = hit10s + hits[2] hit20s = hit20s + hits[3] except tf.errors.OutOfRangeError as e: pass break loss = sum(eval_loss) / len(eval_loss) MR = np.average(MRs) MRR = np.average(MRRs) hit1 = np.average(hit1s) * 100 hit5 = np.average(hit5s) * 100 hit10 = np.average(hit10s) * 100 hit20 = np.average(hit20s) * 100 KLD = kld_loss*1000000 if prefix == 'valid_': utils.print_out('Eval on Dev: EVAL LOSS: %.4f' % (loss)) utils.eval_print(eval_file, 'Eval on Dev: Epoch %d Step %d EVAL LOSS: %.4f' % (epoch, step, loss)) utils.print_out('Eval on Dev KLD=%.2f,MR=%.2f,MRR=%.2f,hit1=%.2f,hit5=%.2f,hit10=%.2f,hit20=%.2f' % (KLD,MR,MRR,hit1,hit5,hit10,hit20)) utils.eval_print(eval_file, 'Eval on Dev KLD=%.2f,MR=%.2f,MRR=%.2f,hit1=%.2f,hit5=%.2f,hit10=%.2f,hit20=%.2f' % (KLD,MR,MRR,hit1,hit5,hit10,hit20)) hparams['loss'].append(float(loss)) hparams['epochs'].append(int(step)) config_parser.save_config(hparams) if min(hparams['loss']) - loss >= 0: model.ppl_saver.save( sess, os.path.join(out_dir, 'min_ppl', "seq2seq.ckpt"), global_step=model.global_step) utils.print_out('Saved min_ppl model to -> %s' % out_dir) if len(hparams['loss']) > 1: if hparams['loss'][-1] > hparams['loss'][-2]: hparams['learning_rate'] = hparams['learning_rate'] * hparams['learning_halve'] utils.eval_print(eval_file, 'Halved the learning rate to %f' % hparams['learning_rate']) config_parser.save_config(hparams) else: utils.print_out('Eval on Test: EVAL PPL: %.4f' % (loss)) utils.print_out('Eval on Test KLD=%.2f,MR=%.2f,MRR=%.2f,hit1=%.2f,hit5=%.2f,hit10=%.2f,hit20=%.2f' % ( KLD,MR, MRR, hit1, hit5, hit10, hit20)) utils.eval_print(eval_file, 'Eval on Test KLD=%.2f,MR=%.2f,MRR=%.2f,hit1=%.2f,hit5=%.2f,hit10=%.2f,hit20=%.2f' % ( KLD,MR, MRR, hit1, hit5, hit10, hit20)) utils.eval_print(eval_file, 'Eval on Test: Epoch %d Step %d EVAL PPL: %.4f' % (epoch, step, loss)) # NEXT EPOCH epoch_time = time.time() - epoch_start_time utils.print_time(epoch_time, 'Epoch Time:') epoch_time = time.time() - epoch_start_time epoch_time *= (hparams['num_train_epochs'] - epoch - 1) utils.print_time(epoch_time, 'Reaming Time:') epoch_start_time = time.time() dataset['init_fn'](sess) utils.print_out('model has been fully trained !')
def create_model(self, name='flexka'): def safe_log(y): return tf.log(tf.clip_by_value(y, 1e-9, tf.reduce_max(y))) hparams = self.hparams with tf.variable_scope(name) as scope: encoder_outputs, encoder_states = self.create_encoder(self._input_embeddings_for_encoder, self._input_entity_embeddings_for_encoder, self._lengths_for_encoder, ) self.kld_loss = tf.constant(0.0) self.knowledge_distribution = None self.knowledge_fusion = None self.knowledge_bow_loss = tf.constant(0.0) maximium_candidate_num = tf.shape(self._fact_candidate_embedding)[1] fact_seq_mask = tf.sequence_mask(self._lengths_for_fact_candidate, dtype=tf.float32) unk_mask = tf.sequence_mask(tf.ones_like(self._lengths_for_fact_candidate), maxlen=maximium_candidate_num, dtype=tf.float32) fact_mask = (1.0 - fact_seq_mask) * -1e10 + unk_mask * -1e10 fact_embedding_projection = self.fact_projection if hparams.get("flexka_classifier_mode", 'dot') == 'dot': # Student Network fact_embedding_projection = tf.nn.dropout(fact_embedding_projection, keep_prob=1.0 - self.dropout) classifier_inputs = tf.concat(encoder_states, -1) classifier_inputs = tf.nn.dropout(classifier_inputs, keep_prob=1.0 - self.dropout) classifier_projection = tf.layers.dense(classifier_inputs, units=300, activation=tf.nn.tanh, use_bias=True, name='classifier_inputs') classifier_projection = tf.expand_dims(classifier_projection, 1) classifier_projection = tf.tile(classifier_projection, [1, maximium_candidate_num, 1]) classifier_scores = tf.reduce_sum(classifier_projection * fact_embedding_projection, -1) classifier_scores += fact_mask classifier_probs = tf.nn.softmax(classifier_scores) elif hparams.get("flexka_classifier_mode", 'dot') == 'attention': # Student Network # [batch, fact_len, dim] fact_query = self.fact_projection # fact_value = tf.layers.dense(self._fact_candidate_embedding, units=300, # activation=tf.nn.tanh, # name='dynamic_fact_value') # [batch, encoder_len, dim] concated_encoder_states = tf.concat(encoder_outputs, -1) concated_encoder_states = tf.nn.dropout(concated_encoder_states, keep_prob=1.0 - self.dropout) encoder_key = tf.layers.dense(concated_encoder_states, units=300, activation=tf.nn.tanh, name='encoder_keys') # [batch, encoder_len, dim] encoder_value = tf.layers.dense(concated_encoder_states, units=300, activation=tf.nn.tanh, name='encoder_values') # [batch, fact_len, encoder_len] fact_encoder_logits = tf.matmul(fact_query, tf.transpose(encoder_key, [0, 2, 1])) fact_encoder_probs = tf.nn.softmax(fact_encoder_logits, -1) # [batch, fact_len, dim] fact_encoder = tf.matmul(fact_encoder_probs, encoder_value) classifier_scores = tf.reduce_sum(fact_encoder * fact_embedding_projection, -1) classifier_scores += fact_mask classifier_probs = tf.nn.softmax(classifier_scores) elif hparams.get("flexka_classifier_mode", 'dot') == 'prior_posterior_attention': # Student Network # [batch, fact_len, dim] fact_query = self.fact_projection concated_encoder_states = tf.concat(encoder_outputs, -1) concated_encoder_states = tf.nn.dropout(concated_encoder_states, keep_prob=1.0 - self.dropout) encoder_key = tf.layers.dense(concated_encoder_states, units=300, activation=tf.nn.tanh, name='encoder_keys') # [batch, encoder_len, dim] encoder_value = tf.layers.dense(concated_encoder_states, units=300, activation=tf.nn.tanh, name='encoder_values') # [batch, fact_len, encoder_len] fact_encoder_logits = tf.matmul(fact_query, tf.transpose(encoder_key, [0, 2, 1])) fact_encoder_probs = tf.nn.softmax(fact_encoder_logits, -1) # [batch, fact_len, dim] fact_encoder = tf.matmul(fact_encoder_probs, encoder_value) classifier_scores = tf.reduce_sum(fact_encoder * fact_embedding_projection, -1) classifier_scores += fact_mask prior_classifier_probs = tf.nn.softmax(classifier_scores) with tf.variable_scope(tf.get_variable_scope(), reuse=True): decoder_encoder_outputs, decoder_encoder_states = self.create_encoder( self._input_embeddings_for_decoder, self._input_entity_embeddings_for_decoder, self._lengths_for_decoder) # decoder_encoder_states = tf.concat(decoder_encoder_states, -1) # decoder_encoder_states = tf.nn.dropout(decoder_encoder_states, # keep_prob=1.0 - self.dropout) # #[batch, dim] # posterior_knowledge = tf.layers.dense(decoder_encoder_states, units=100, # activation=tf.nn.tanh, # use_bias=True, name='posterior_knowledge') # posterior_knowledge = tf.expand_dims(posterior_knowledge, 1) # posterior_knowledge = tf.tile(posterior_knowledge,[1,maximium_candidate_num,1]) # posterior_fact_query = tf.layers.dense(tf.concat([self._fact_candidate_embedding,posterior_knowledge], -1) # , units=300, activation=tf.nn.tanh, name='posterior_fact_projection') # # # [batch, fact_len, encoder_len] # posterior_fact_encoder_logits = tf.matmul(posterior_fact_query, tf.transpose(encoder_key, [0, 2, 1])) # posterior_fact_encoder_probs = tf.nn.softmax(posterior_fact_encoder_logits, -1) # # [batch, fact_len, dim] # posterior_fact_encoder = tf.matmul(posterior_fact_encoder_probs, encoder_value) # posterior_classifier_scores = tf.reduce_sum(posterior_fact_encoder * fact_embedding_projection, -1) # posterior_classifier_scores += fact_mask # posterior_classifier_probs = tf.nn.softmax(posterior_classifier_scores) # posterior_classifier_probs_for_kld = posterior_classifier_probs posterior_classifier_inputs = tf.concat(encoder_states+decoder_encoder_states, -1) posterior_classifier_inputs = tf.nn.dropout(posterior_classifier_inputs, keep_prob=1.0 - self.dropout) posterior_classifier_projection = tf.layers.dense(posterior_classifier_inputs, units=300, activation=tf.nn.tanh, use_bias=True, name='posterior_classifier_inputs') posterior_classifier_projection = tf.expand_dims(posterior_classifier_projection, 1) posterior_classifier_projection = tf.tile(posterior_classifier_projection, [1, maximium_candidate_num, 1]) posterior_classifier_scores = tf.reduce_sum(posterior_classifier_projection * fact_embedding_projection, -1) posterior_classifier_scores += fact_mask posterior_classifier_probs = tf.nn.softmax(posterior_classifier_scores) posterior_classifier_probs_for_kld = tf.nn.softmax(posterior_classifier_scores / hparams.get("kld_temp", 1.0)) kld_loss = posterior_classifier_probs_for_kld * safe_log( posterior_classifier_probs_for_kld / tf.clip_by_value(prior_classifier_probs, 1e-9, 1.0)) self.kld_loss = tf.reduce_sum(kld_loss) / self.batch_size classifier_probs = prior_classifier_probs elif hparams.get("flexka_classifier_mode", 'dot') == 'posterior_dot': # Teacher Network with tf.variable_scope(tf.get_variable_scope(), reuse=True): decoder_encoder_outputs, decoder_encoder_states = self.create_encoder( self._input_embeddings_for_decoder, self._input_entity_embeddings_for_decoder, self._lengths_for_decoder) fact_embedding_projection = tf.nn.dropout(fact_embedding_projection, keep_prob=1.0 - self.dropout) posterior_classifier_inputs = tf.concat(encoder_states+decoder_encoder_states, -1) posterior_classifier_inputs = tf.nn.dropout(posterior_classifier_inputs, keep_prob=1.0 - self.dropout) posterior_classifier_projection = tf.layers.dense(posterior_classifier_inputs, units=300, activation=tf.nn.tanh, use_bias=True, name='posterior_classifier_inputs') posterior_classifier_projection = tf.expand_dims(posterior_classifier_projection, 1) posterior_classifier_projection = tf.tile(posterior_classifier_projection, [1, maximium_candidate_num, 1]) posterior_classifier_scores = tf.reduce_sum(posterior_classifier_projection * fact_embedding_projection, -1) posterior_classifier_scores += fact_mask posterior_classifier_probs = tf.nn.softmax(posterior_classifier_scores) classifier_probs = posterior_classifier_probs elif hparams.get("flexka_classifier_mode", 'dot') in {'prior_posterior_dot','lazy_prior_posterior_dot'}: # Teacher Network with tf.variable_scope(tf.get_variable_scope(), reuse=True): decoder_encoder_outputs, decoder_encoder_states = self.create_encoder( self._input_embeddings_for_decoder, self._input_entity_embeddings_for_decoder, self._lengths_for_decoder) fact_embedding_projection = tf.nn.dropout(fact_embedding_projection, keep_prob=1.0 - self.dropout) posterior_classifier_inputs = tf.concat(encoder_states+decoder_encoder_states, -1) posterior_classifier_inputs = tf.nn.dropout(posterior_classifier_inputs, keep_prob=1.0 - self.dropout) posterior_classifier_projection = tf.layers.dense(posterior_classifier_inputs, units=300, activation=tf.nn.tanh, use_bias=True, name='posterior_classifier_inputs') posterior_classifier_projection = tf.expand_dims(posterior_classifier_projection, 1) posterior_classifier_projection = tf.tile(posterior_classifier_projection, [1, maximium_candidate_num, 1]) posterior_classifier_scores = tf.reduce_sum(posterior_classifier_projection * fact_embedding_projection, -1) posterior_classifier_scores += fact_mask posterior_classifier_probs = tf.nn.softmax(posterior_classifier_scores) posterior_classifier_probs_for_kld = tf.nn.softmax(posterior_classifier_scores / hparams.get("kld_temp", 1.0)) classifier_inputs = tf.concat(encoder_states, -1) classifier_inputs = tf.nn.dropout(classifier_inputs, keep_prob=1.0 - self.dropout) classifier_projection = tf.layers.dense(classifier_inputs, units=300, activation=tf.nn.tanh, use_bias=True, name='classifier_inputs') classifier_projection = tf.expand_dims(classifier_projection, 1) classifier_projection = tf.tile(classifier_projection, [1, maximium_candidate_num, 1]) classifier_scores = tf.reduce_sum(classifier_projection * fact_embedding_projection, -1) classifier_scores += fact_mask prior_classifier_probs = tf.nn.softmax(classifier_scores) kld_loss = posterior_classifier_probs_for_kld * safe_log( posterior_classifier_probs_for_kld / tf.clip_by_value(prior_classifier_probs, 1e-9, 1.0)) # kld_loss = tf.square(posterior_classifier_probs - prior_classifier_probs) self.kld_loss = tf.reduce_sum(kld_loss) / self.batch_size classifier_probs = prior_classifier_probs elif hparams.get("flexka_classifier_mode", 'dot') == 'mlp': classifier_inputs = tf.concat(encoder_states, -1) classifier_inputs = tf.nn.dropout(classifier_inputs, keep_prob=1.0 - self.dropout) classifier_projection = tf.layers.dense(classifier_inputs, units=300, activation=tf.nn.tanh, use_bias=True, name='classifier_inputs') classifier_projection = tf.expand_dims(classifier_projection, 1) classifier_projection = tf.tile(classifier_projection, [1, maximium_candidate_num, 1]) score_input = tf.concat([classifier_projection, fact_embedding_projection], -1) score_input = tf.nn.dropout(score_input, keep_prob=1.0 - self.dropout) classifier_scores = tf.layers.dense(score_input, units=1, activation=tf.nn.tanh, name='score_estimator') classifier_scores = tf.squeeze(classifier_scores) classifier_scores += fact_mask classifier_probs = tf.nn.softmax(classifier_scores) else: raise ValueError() self.classifier_scores = classifier_probs if self.mode == model_helper.TRAIN: knowledge_bow_loss = - tf.reduce_sum(self._golden_fact_bow * safe_log(classifier_probs),-1) self._knowledge_bow_loss = tf.reduce_sum(knowledge_bow_loss) / self.batch_size self._train_update_loss = self._knowledge_bow_loss if hparams.get("flexka_classifier_mode", 'dot') in {'prior_posterior_dot', 'prior_posterior_attention', 'lazy_prior_posterior_dot'}: posterior_knowledge_bow_loss = - tf.reduce_sum(self._golden_fact_bow * safe_log(posterior_classifier_probs), -1) posterior_knowledge_bow_loss = tf.reduce_sum(posterior_knowledge_bow_loss) / self.batch_size self._train_update_loss += posterior_knowledge_bow_loss regulation_loss = (tf.reduce_sum((1.0 - classifier_probs * classifier_probs) * fact_seq_mask) / self.batch_size) self.regulation_loss = regulation_loss if hparams.get("flexka_classifier_regulation_loss", 0.0) > 0.0: # self._train_update_loss = self._knowledge_bow_loss - self._neg_knowledge_bow_loss self._train_update_loss = self._knowledge_bow_loss + regulation_loss * hparams.get("flexka_classifier_regulation_loss", 0.0) # Print vars utils.print_out('-------------Trainable Variables------------------') for var in tf.trainable_variables(): utils.print_out(var)
def train(): # Load config hparams = config_parser.load_and_restore_config(args.config_path, verbose=True) out_dir = hparams['model_path'] eval_file = os.path.join(out_dir, 'eval_out.txt') status_per_steps = hparams['status_per_steps'] status_counter = Status(status_per_steps) # dataset iterator dataset = dataset_utils.create_flexka2_iterator(hparams, is_eval=False) model = Model(dataset, hparams, model_helper.TRAIN) dropout = dataset['dropout'] with tf.Session(config=model_helper.create_tensorflow_config()) as sess: # Initialize or restore a model step, epoch = model_helper.create_or_restore_a_model( out_dir, model, sess) dataset['init_fn'](sess) epoch_start_time = time.time() while utils.should_stop(epoch, step, hparams) is False: try: teach_force_loss, kld_loss, knowledge_bow_loss, word_bow_loss, lr, _, loss, step, epoch, predict_count, batch_size \ = sess.run([ model.teach_force_loss, model.kld_loss, model.knowledge_bow_loss, model.word_bow_loss, model.learning_rate, model.update, model.train_loss, model.global_step, model.epoch_step, model.predict_count, model.batch_size], feed_dict={dropout: hparams['dropout'], model.learning_rate: hparams['learning_rate']}) # print(sess.run(model.debug)) ppl = utils.safe_exp(loss * batch_size / predict_count) status_counter.add_record( { 'ppl': ppl, 'loss': loss, 'mode_loss': teach_force_loss, 'word_bow_loss': word_bow_loss, 'knowledge_bow_loss': knowledge_bow_loss, 'kld_loss': kld_loss * 1000000, 'lr': lr, 'count': predict_count }, step, epoch) except tf.errors.InvalidArgumentError as e: print('Found Inf or NaN global norm') raise e except tf.errors.OutOfRangeError: utils.print_out('epoch %d is finished, step %d' % (epoch, step)) sess.run([model.next_epoch]) # Save Epoch model.saver.save(sess, os.path.join(out_dir, "seq2seq.ckpt"), global_step=model.global_step) utils.print_out('Saved model to -> %s' % out_dir) # EVAL on Dev/Test Set: for prefix in ['valid_', 'test_']: dataset['init_fn'](sess, prefix) eval_loss = [] eval_count = [] eval_batch = [] while True: try: loss, predict_count, batch_size, batch_size = sess.run( [ model.train_loss, model.predict_count, model.batch_size, model.batch_size ], feed_dict={dropout: 0.0}) eval_loss.append(loss) eval_count.append(predict_count) eval_batch.append(batch_size) except tf.errors.OutOfRangeError as e: pass break ppl = utils.safe_exp( sum(eval_loss) * sum(eval_batch) / len(eval_batch) / sum(eval_count)) if prefix == 'valid_': utils.print_out('Eval on Dev: EVAL PPL: %.4f' % (ppl)) utils.eval_print( eval_file, 'Eval on Dev: Epoch %d Step %d EVAL PPL: %.4f' % (epoch, step, ppl)) hparams['loss'].append(float(ppl)) hparams['epochs'].append(int(step)) config_parser.save_config(hparams) if min(hparams['loss']) - ppl >= 0: model.ppl_saver.save(sess, os.path.join( out_dir, 'min_ppl', "seq2seq.ckpt"), global_step=model.global_step) utils.print_out('Saved min_ppl model to -> %s' % out_dir) if len(hparams['loss']) > 1: if hparams['loss'][-1] > hparams['loss'][-2]: hparams['learning_rate'] = hparams[ 'learning_rate'] * hparams['learning_halve'] utils.eval_print( eval_file, 'Halved the learning rate to %f' % hparams['learning_rate']) config_parser.save_config(hparams) else: utils.print_out('Eval on Test: EVAL PPL: %.4f' % (ppl)) utils.eval_print( eval_file, 'Eval on Test: Epoch %d Step %d EVAL PPL: %.4f' % (epoch, step, ppl)) # NEXT EPOCH epoch_time = time.time() - epoch_start_time utils.print_time(epoch_time, 'Epoch Time:') epoch_time = time.time() - epoch_start_time epoch_time *= (hparams['num_train_epochs'] - epoch - 1) utils.print_time(epoch_time, 'Remaining Time:') epoch_start_time = time.time() dataset['init_fn'](sess) utils.print_out('model has been fully trained !')
def load_entity_vocab(hparams): """ Currently same as GenDS.knowledge_utils.load_entity_vocab :param hparams: :return: """ word2entity_dict_path = hparams['word2entity_dict_path'] entity2word_dict_path = hparams['entity2word_dict_path'] entity_dict_path = hparams['entity_path'] relation_dict_path = hparams['relation_path'] entity_embed_path = hparams['entity_embedding_path'] relation_embed_path = hparams['relation_embedding_path'] embed_dim = hparams['entity_dim'] utils.print_out("load entity dict from %s" % entity_dict_path) inv_relation = hparams.get('flexka_inv_relation', False) entity_vocab = lookup_ops.index_table_from_file(entity_dict_path, default_value=0) reverse_entity_vocab = lookup_ops.index_to_string_table_from_file( entity_dict_path, default_value=UNK_ENTITY) padding_entity_list = [ UNK_ENTITY, NONE_ENTITY, PAD_ENTITY, NOT_HEAD_ENTITY, NOT_TAIL_ENTITY ] padding_relation_list = [NONE_RELATION, PAD_RELATION, NOT_TBD] # word2entity with open(word2entity_dict_path, encoding='utf-8') as fin: word2entities = np.array([int(x.strip('\n')) for x in fin.readlines()], dtype=np.int32) word2entities = tf.get_variable('word2entities', dtype=tf.int32, initializer=word2entities, trainable=False) # entity2word with open(entity2word_dict_path, encoding='utf-8') as fin: entiy2words = np.array([int(x.strip('\n')) for x in fin.readlines()], dtype=np.int32) entiy2words = tf.get_variable('entity2words', dtype=tf.int32, initializer=entiy2words, trainable=False) entity_list = [] relation_list = [] entity_dict = dict() relation_dict = dict() # check with open(entity_dict_path, encoding='utf-8') as f: for i, line in enumerate(f): e = line.strip() entity_list.append(e) entity_dict[e] = i for i in range(len(padding_entity_list)): assert padding_entity_list[i] == entity_list[i] with open(relation_dict_path, encoding='utf-8') as f: for i, line in enumerate(f): e = line.strip() relation_list.append(e) relation_dict[e] = i for i in range(len(padding_relation_list)): assert padding_relation_list[i] == relation_list[i] print("Loading entity vectors...") entity_embed = [] with open(entity_embed_path, 'r+', encoding='utf-8') as f: for i, line in enumerate(f): if '\t' not in line: s = line.strip().split(' ') else: s = line.strip().split('\t') entity_embed.append([float(x) for x in s]) print("Loading relation vectors...") relation_embed = [] with open(relation_embed_path, 'r+', encoding='utf-8') as f: for i, line in enumerate(f): if '\t' not in line: s = line.strip().split(' ') else: s = line.strip().split('\t') relation_embed.append([float(x) for x in s]) entity_embed = np.array(entity_embed, dtype=np.float32) relation_embed = np.array(relation_embed, dtype=np.float32) entity_embed = tf.get_variable('entity_embed', dtype=tf.float32, initializer=entity_embed, trainable=False) relation_embed = tf.get_variable('relation_embed', dtype=tf.float32, initializer=relation_embed, trainable=False) entity_embed = tf.reshape(entity_embed, [-1, embed_dim]) relation_embed = tf.reshape(relation_embed, [-1, embed_dim]) padding_entity_embedding = tf.get_variable( 'entity_padding_embed', [len(padding_entity_list), embed_dim], dtype=tf.float32, initializer=tf.zeros_initializer()) padding_relation_embedding = tf.get_variable( 'relation_padding_embed', [len(padding_relation_list), embed_dim], dtype=tf.float32, initializer=tf.zeros_initializer()) tf_entity_embed = tf.concat([padding_entity_embedding, entity_embed], axis=0) tf_relation_embed = tf.concat([padding_relation_embedding, relation_embed], axis=0) tf_entity_embed = tf.layers.dense(tf_entity_embed, hparams['entity_dim'], use_bias=False, name='entity_embedding_transformer') tf_relation_embed = tf.layers.dense(tf_relation_embed, hparams['entity_dim'], use_bias=False, name='relation_embedding_transformer') if inv_relation: print('inv_relation') inv_relation = tf.layers.dense( relation_embed, hparams['entity_dim'], use_bias=False, name='inv_relation_embedding_transformer') tf_relation_embed = tf.concat([tf_relation_embed, inv_relation], axis=0) tf_entity_embed = tf.concat([tf_entity_embed, tf_relation_embed], axis=0) # Facts utils.print_out('Loading facts') fact_dict_path = hparams['fact_path'] entity_fact = [] entity_source = [] entity_target = [] fact_idf = [] with open(fact_dict_path, encoding='utf-8') as fin: lines = fin.readlines() utils.print_out('Total Entity-Fact : %d' % len(lines)) print(lines[0].strip('\n').split()) for line in lines: items = line.strip('\n').split() # 0:entity_in_post, 1:entity_in_response, 2 head, 3 relation, 4 tail 5/6/7 score for i in [0, 1, 2, 4]: items[i] = int(entity_dict.get(items[i], 0)) if items[3] not in relation_dict: print(items[3]) print(relation_dict) assert items[3] not in relation_dict items[3] = int(relation_dict.get(items[3])) + len( entity_dict) # realtion和 entity共用一个列表 entity_fact.append(items[2:5]) entity_source.append(items[0]) entity_target.append(items[1]) # uni ids if len(items) > 5: idf = [float(items[5]), float(items[6]), float(items[7])] else: idf = [0.0, 0.0, 0.0] fact_idf.append(idf) fact_idf = np.array(fact_idf, dtype=np.float32) entity_fact = np.array(entity_fact, dtype=np.int32) entity_source = np.array(entity_source, dtype=np.int32) entity_target = np.array(entity_target, dtype=np.int32) entity_fact = np.reshape(entity_fact, [len(lines), 3]) entity_source = np.reshape(entity_source, [len(lines)]) entity_target = np.reshape(entity_target, [len(lines)]) tf_fact_idf = tf.constant(value=fact_idf, dtype=np.float32) tf_entity_fact = tf.constant(value=entity_fact, dtype=np.int32) tf_entity_source = tf.constant(value=entity_source, dtype=np.int32) tf_entity_target = tf.constant(value=entity_target, dtype=np.int32) tf_entity_fact_embedding = tf.nn.embedding_lookup(tf_entity_embed, tf_entity_fact) # index by context id tf_entity_fact_embedding = tf.reshape(tf_entity_fact_embedding, [-1, 3 * hparams['entity_dim']]) return tf_entity_embed, tf_entity_fact_embedding, tf_entity_source, tf_entity_target, entity_vocab, reverse_entity_vocab, word2entities, entiy2words, tf_fact_idf
def save_config(config, config_path=None): if config_path is None: config_path = config['model_path']+'/config.json' utils.print_out('save json config file to %s' % config_path) with open(config_path, 'w+', encoding='utf-8') as fout: json.dump(config, fout)
def test(): hparams = config_parser.load_and_restore_config(args.config_path, verbose=True) if args.beam != -1: hparams['beam_width'] = args.beam utils.print_out("Reset beam_width to %d" % args.beam) if args.beam > 10: hparams['batch_size'] = hparams['batch_size'] * 30 // args.beam hparams['length_penalty_weight'] = args.length_penalty_weight hparams['diverse_decoding_rate'] = args.diverse_decoding_rate hparams['coverage_penalty_weight'] = args.coverage_penalty_weight # Dataset dataset = dataset_utils.create_flexka2_iterator(hparams, is_eval=True) model = Model(dataset, hparams, model_helper.INFER) dropout = dataset['dropout'] entity_word_vocab = [] with open(hparams['fact_path'], encoding='utf-8') as fin: for line in fin.readlines(): items = line.strip('\n').split() items[0] = 'P:' + items[0] items[1] = 'E:' + items[1] entity_word_vocab.append(','.join(items)) entity_set = set() with open(hparams['entity_path'], encoding='utf-8') as fin: for line in fin.readlines(): items = line.strip('\n') entity_set.add(items) input_srcs = [] input_src_lens = [] with open(hparams['%ssrc_file' % 'test_'], encoding='utf-8') as fin: for line in fin.readlines(): items = line.strip('\n') input_srcs.append(items) input_src_lens.append(len(items.split())) out_dir = os.path.join(hparams['model_path'], 'min_ppl') if os.path.exists(os.path.join(hparams['model_path'], 'decoded')) is False: os.mkdir(os.path.join(hparams['model_path'], 'decoded')) if os.path.exists( os.path.join(hparams['model_path'], 'decoded', 'fact_attention')) is False: os.mkdir( os.path.join(hparams['model_path'], 'decoded', 'fact_attention')) config_id = 'B%s_L%.1f_D%.1f_C%.1f' % \ (hparams['beam_width'], args.length_penalty_weight, args.diverse_decoding_rate, args.coverage_penalty_weight) beam_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s.txt' % config_id) top1_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.txt' % config_id) topk_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_topk.txt' % config_id) test_query_file = hparams['test_src_file'] test_response_file = hparams['test_tgt_file'] with open(test_query_file, 'r+', encoding='utf-8') as fin: queries = [x.strip('\n') for x in fin.readlines()] with open(test_response_file, 'r+', encoding='utf-8') as fin: responses = [x.strip('\n') for x in fin.readlines()] with tf.Session(config=model_helper.create_tensorflow_config()) as sess: step, epoch = model_helper.create_or_restore_a_model( out_dir, model, sess) dataset['init_fn'](sess, 'test_') utils.print_out('Current Epoch,Step : %s/%s, Max Epoch,Step : %s/%s' % (epoch, step, hparams['num_train_epochs'], hparams['num_train_steps'])) case_id = 0 with open(beam_out_file_path, 'w+', encoding='utf-8') as fout: with open(top1_out_file_path, 'w+', encoding='utf-8') as ftop1: with open(topk_out_file_path, 'w+', encoding='utf-8') as ftopk: while True: try: model_selector, facts, lengts_for_facts, src_ids, sample_ids, probs, scores = sess.run( [ model.mode_selector, dataset['inputs_for_facts'], dataset['lengths_for_facts'], dataset['inputs_for_encoder'], model.sampled_id, model.logits, model.scores ], feed_dict={dropout: 0.0}) # print(() num_responses_per_query = sample_ids.shape[0] num_cases = sample_ids.shape[1] for sent_id in range(num_cases): fout.write('#Case : %d\n' % case_id) fout.write('\tquery:\t%s\n' % queries[case_id]) fout.write('\tresponse:\t%s\n' % responses[case_id]) if hparams['beam_width'] == 1 and hparams.get( 'fusion_encoder', True): input_src = input_srcs[case_id].split() for i in range(len(input_src)): if input_src[i] in entity_set: input_src[i] = input_src[i].upper() for beam_id in range(num_responses_per_query): translations, score = model_helper.get_translation( sample_ids[beam_id], scores[beam_id], sent_id, '</s>') new_translation = [] for pid, token in enumerate( translations.split()): if token[:len('$ENT_')] == '$ENT_': relative_fact_id = int( token[len('$ENT_'):]) fact = entity_word_vocab[facts[ sent_id, relative_fact_id]] entity_in_response = fact.split( ',')[1] new_translation.append( '$' + entity_in_response) elif token[:len('$CP_')] == '$CP_': position = int(token[len('$CP_'):]) new_translation.append( '$C:' + input_srcs[case_id].split() [position]) else: new_translation.append(token) translations = ' '.join(new_translation) fout.write('\tBeam%d\t%.4f\t%s\n' % (beam_id, score, translations)) if beam_id == 0: ftop1.write( '%s\n' % (translations.replace( '#', '').replace( '$R:', '').replace( '$C:', '').replace( '$E:', ''))) ftopk.write('%s\n' % (translations.replace( '#', '').replace('$R:', '').replace( '$C:', '').replace('$E:', ''))) case_id += 1 except tf.errors.OutOfRangeError as e: break
def create_model(self, name='flexka'): def safe_log(y): return tf.log(tf.clip_by_value(y, 1e-9, tf.reduce_max(y))) hparams = self.hparams with tf.variable_scope(name) as scope: encoder_outputs, encoder_states = self.create_encoder( self._input_embeddings_for_encoder, self._input_entity_embeddings_for_encoder, self._lengths_for_encoder, ) self.kld_loss = tf.constant(0.0) self.knowledge_distribution = None self.knowledge_fusion = None self.knowledge_bow_loss = tf.constant(0.0) maximium_candidate_num = tf.shape( self._fact_candidate_embedding)[1] fact_seq_mask = tf.sequence_mask(self._lengths_for_fact_candidate, dtype=tf.float32) unk_mask = tf.sequence_mask(tf.ones_like( self._lengths_for_fact_candidate), maxlen=maximium_candidate_num, dtype=tf.float32) fact_mask = (1.0 - fact_seq_mask) * -1e10 + unk_mask * -1e10 fact_embedding_projection = self.fact_projection classifier_inputs = tf.concat(encoder_states, -1) classifier_inputs = tf.nn.dropout(classifier_inputs, keep_prob=1.0 - self.dropout) classifier_projection = tf.layers.dense(classifier_inputs, units=300, activation=tf.nn.elu, use_bias=True, name='classifier_inputs') classifier_projection = tf.expand_dims(classifier_projection, 1) classifier_projection = tf.tile(classifier_projection, [1, maximium_candidate_num, 1]) score_input = tf.concat( [classifier_projection, fact_embedding_projection], -1) score_input = tf.nn.dropout(score_input, keep_prob=1.0 - self.dropout) classifier_scores = tf.layers.dense(score_input, units=1, activation=tf.nn.sigmoid, name='score_estimator') classifier_scores = tf.squeeze(classifier_scores) self.classifier_scores = classifier_scores if self.mode == model_helper.TRAIN: pos_scores = tf.reduce_sum( self._golden_fact_bow * classifier_scores, -1) neg_scores = tf.reduce_sum( self._neg_fact_bow * classifier_scores, -1) knowledge_bow_loss = tf.maximum(0.0, 0.3 - pos_scores + neg_scores) self._knowledge_bow_loss = tf.reduce_sum( knowledge_bow_loss) / self.batch_size self._train_update_loss = self._knowledge_bow_loss self._knowledge_bow_loss *= 100 self.regulation_loss = tf.constant(0.0) # Print vars utils.print_out('-------------Trainable Variables------------------') for var in tf.trainable_variables(): utils.print_out(var)