def check_vocab(vocab_file, out_dir, check_special_token=True, sos=None, eos=None, unk=None): """Check if vocab_file doesn't exist, create from corpus_file.""" if tf.gfile.Exists(vocab_file): utils.print_out("# Vocab file %s exists" % vocab_file) vocab, vocab_size = load_vocab(vocab_file) if check_special_token: # Verify if the vocab starts with unk, sos, eos # If not, prepend those tokens & generate a new vocab file if not unk: unk = UNK if not sos: sos = SOS if not eos: eos = EOS assert len(vocab) >= 3 if vocab[0] != unk or vocab[1] != sos or vocab[2] != eos: utils.print_out("The first 3 vocab words [%s, %s, %s]" " are not [%s, %s, %s]" % (vocab[0], vocab[1], vocab[2], unk, sos, eos)) vocab = [unk, sos, eos] + vocab vocab_size += 3 new_vocab_file = os.path.join(out_dir, os.path.basename(vocab_file)) with codecs.getwriter("utf-8")(tf.gfile.GFile( new_vocab_file, "wb")) as f: for word in vocab: f.write("%s\n" % word) vocab_file = new_vocab_file else: raise ValueError("vocab_file '%s' does not exist." % vocab_file) vocab_size = len(vocab) return vocab_size, vocab_file
def main(): args = get_args() if args.train: train(args.model_name, args.restore) else: import_lib() dataset = Dataset.Dataset() model = PHVM.PHVM(len(dataset.vocab.id2featCate), len(dataset.vocab.id2featVal), len(dataset.vocab.id2word), len(dataset.vocab.id2category), key_wordvec=None, val_wordvec=None, tgt_wordvec=dataset.vocab.id2vec, type_vocab_size=len(dataset.vocab.id2type)) best_checkpoint_dir = config.checkpoint_dir + "/" + args.model_name + config.best_model_dir tmp_checkpoint_dir = config.checkpoint_dir + "/" + args.model_name + config.tmp_model_dir model_utils.restore_model(model, best_checkpoint_dir, tmp_checkpoint_dir) dataset.prepare_dataset() texts = infer(model, dataset, dataset.test) dump(texts, config.result_dir + "/{}.json".format(args.model_name)) utils.print_out("finish file test")
def save(self): hparams_file = os.path.join( self.model_dir, "{}_config.yml".format(file_name(self.config))) print_out(" saving config to %s" % hparams_file) to_dump_dict = dict(self.__dict__) if to_dump_dict['train_data']: to_dump_dict['train_data'] = os.path.abspath( to_dump_dict['train_data']) if to_dump_dict['test_data']: to_dump_dict['test_data'] = os.path.abspath( to_dump_dict['test_data']) if to_dump_dict['dev_data']: to_dump_dict['dev_data'] = os.path.abspath( to_dump_dict['dev_data']) if to_dump_dict['pretrain_data']: to_dump_dict['pretrain_data'] = os.path.abspath( to_dump_dict['pretrain_data']) else: to_dump_dict.pop('pretrain_data') if to_dump_dict['vocab_file']: to_dump_dict['vocab_file'] = os.path.abspath( to_dump_dict['vocab_file']) with codecs.getwriter("utf-8")(open(hparams_file, "wb")) as f: yaml.dump(to_dump_dict, f, default_flow_style=False)
def _cell_list(unit_type, num_units, num_layers, forget_bias, dropout, mode, num_gpus, base_gpu=0, single_cell_fn=None): """Create a list of RNN cells.""" if not single_cell_fn: single_cell_fn = _single_cell # Multi-GPU cell_list = [] for i in range(num_layers): utils.print_out(" cell %d" % i, new_line=False) single_cell = single_cell_fn(unit_type=unit_type, num_units=num_units, forget_bias=forget_bias, dropout=dropout, mode=mode, device_str=get_device_str( i + base_gpu, num_gpus)) utils.print_out("") cell_list.append(single_cell) return cell_list
def print_step_info(prefix, global_step, info, result_summary, log_f): """Print all info at the current global step.""" utils.print_out( "%sstep %d lr %g step-time %.2fs wps %.2fK gN %.2f %s, %s" % (prefix, global_step, info["learning_rate"], info["avg_step_time"], info["speed"], info["avg_grad_norm"], result_summary, time.ctime()), log_f)
def before_train(loaded_train_model, train_model, train_sess, global_step, hparams, log_f): """Misc tasks to do before training.""" stats = init_stats() info = { "train_ppl": 0.0, "speed": 0.0, "avg_step_time": 0.0, "avg_grad_norm": 0.0, "avg_sequence_count": 0.0, "learning_rate": loaded_train_model.learning_rate.eval(session=train_sess) } start_train_time = time.time() utils.print_out( "# Start step %d, lr %g, %s" % (global_step, info["learning_rate"], time.ctime()), log_f) # Initialize all of the iterators skip_count = hparams.batch_size * hparams.epoch_step utils.print_out("# Init train iterator, skipping %d elements" % skip_count) train_sess.run(train_model.iterator.initializer, feed_dict={train_model.skip_count_placeholder: skip_count}) return stats, info, start_train_time
def eval(eval_model, eval_sess, model_dir, hparams, summary_writer, log_f): with eval_model.graph.as_default(): eval_model, global_step, epoch_num = create_or_load_model( eval_model, model_dir, eval_sess, "eval") eval_sess.run(eval_model.model.iterator.initializer) eval_info = {} total_loss = 0 total_predict_beat_count = 0 total_sequence_count = 0 while True: try: step_result = eval_model.model.eval(eval_sess) total_loss += step_result.eval_loss * step_result.predict_beat_count total_predict_beat_count += step_result.predict_beat_count total_sequence_count += step_result.batch_size except tf.errors.OutOfRangeError: eval_info['epoch_num'] = epoch_num eval_info[ 'eval_avg_beat_loss'] = total_loss / total_predict_beat_count eval_info['eval_predict_beat_count'] = total_predict_beat_count eval_info['eval_sample_num'] = total_sequence_count utils.print_out( "\neval: global step: %d, epoch_num: %d, eval_avg beat loss: %.2f, eval_predict_beat_count: %d, eval_sample_num: %d, time;%s\n" % (global_step, eval_info['epoch_num'], eval_info["eval_avg_beat_loss"], eval_info['eval_predict_beat_count'], eval_info['eval_sample_num'], time.ctime()), log_f) for key in eval_info: summary_writer.add_summary( tf.Summary(value=[ tf.Summary.Value(tag=key, simple_value=eval_info[key]) ]), global_step) break
def build_graph(self, hparams): utils.print_out("# Creating %s graph ..." % self.mode) with tf.variable_scope("network", dtype=self.dtype, reuse=tf.AUTO_REUSE): self.top_scope = tf.get_variable_scope() # Initializer initializer = tf.random_uniform_initializer(-hparams.init_weight, hparams.init_weight, seed=hparams.random_seed) self.top_scope.set_initializer(initializer) ## so the initializer will be the default initializer for the following variable in this variable scope "---------" # the variable scope specification is left for _encode function # common components in three mode if self.architecture == "deepRNN": # lstm (bi_lstm, then stacked with uni_lstm) self.src_bi_lstm, self.src_bi_lstm_condition = self._build_bi_lstm(hparams) self.src_uni_lstm, self.src_uni_lstm_condition = self._build_uni_lstm(hparams) self.tgt_bi_lstm, self.tgt_bi_lstm_condition = self._build_bi_lstm(hparams) self.tgt_uni_lstm, self.tgt_uni_lstm_condition = self._build_uni_lstm(hparams) # Projector self.src_projector = self._build_projector(hparams, field='src') self.tgt_projector = self._build_projector(hparams, field='tgt') else: raise ValueError("Unknown architecture_type %s" % hparams.lstm_type) "------------" # set mode-specific component self.set_mode_phase(hparams) # Saver self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
def eval(self,T,dev_data,hparams,sess): preds=self.infer(dev_data) if hparams.metric=='logloss': log_loss=metrics.log_loss(dev_data[1],preds) if self.best_score>log_loss: self.best_score=log_loss try: os.makedirs('model_tmp/') except: pass self.saver.save(sess,'model_tmp/model') utils.print_out("# Epcho-time %.2fs Eval logloss %.6f. Best logloss %.6f." \ %(T,log_loss,self.best_score)) elif hparams.metric=='auc': fpr, tpr, thresholds = metrics.roc_curve(dev_data[1]+1, preds, pos_label=2) auc=metrics.auc(fpr, tpr) if self.best_score<auc: self.best_score=auc try: os.makedirs('model_tmp/') except: pass self.saver.save(sess,'model_tmp/model') utils.print_out("# Epcho-time %.2fs Eval AUC %.6f. Best AUC %.6f." \ %(T,auc,self.best_score))
def __init__(self): self.config = Config.config if not os.path.exists(self.config.vocab_file): pickle.dump(Vocabulary.Vocabulary(), open(self.config.vocab_file, "wb")) self.vocab = pickle.load(open(self.config.vocab_file, "rb")) utils.print_out("finish reading vocab : {}".format( len(self.vocab.id2word))) self.cate2FK = { "裙": [ "类型", "版型", "材质", "颜色", "风格", "图案", "裙型", "裙下摆", "裙腰型", "裙长", "裙衣长", "裙袖长", "裙领型", "裙袖型", "裙衣门襟", "裙款式" ], "裤": [ "类型", "版型", "材质", "颜色", "风格", "图案", "裤长", "裤型", "裤款式", "裤腰型", "裤口" ], "上衣": [ "类型", "版型", "材质", "颜色", "风格", "图案", "衣样式", "衣领型", "衣长", "衣袖长", "衣袖型", "衣门襟", "衣款式" ] } for key, val in self.cate2FK.items(): self.cate2FK[key] = dict(zip(val, range(len(val)))) self.input_graph = tf.Graph() with self.input_graph.as_default(): proto = tf.ConfigProto() proto.gpu_options.allow_growth = True self.input_sess = tf.Session(config=proto) self.prepare_dataset()
def print_variables_in_ckpt(ckpt_path): """Print a list of variables in a checkpoint together with their shapes.""" utils.print_out("# Variables in ckpt %s" % ckpt_path) reader = tf.train.NewCheckpointReader(ckpt_path) variable_map = reader.get_variable_to_shape_map() for key in sorted(variable_map.keys()): utils.print_out(" %s: %s" % (key, variable_map[key]))
def compute_perplexity(model, sess, name): """Compute perplexity of the output of the model. Args: model: model for compute perplexity. sess: tensorflow session to use. name: name of the batch. Returns: The perplexity of the eval outputs. """ total_loss = 0 total_predict_count = 0 start_time = time.time() step = 0 while True: try: loss, predict_count, batch_size = model.eval(sess) total_loss += loss * batch_size total_predict_count += predict_count step += 1 if step % 500 == 0: ls = total_loss / total_predict_count ppl = misc.safe_exp(ls) print_out(" ## After %d steps, loss %.2f - ppl %.3f" % (step, ls, ppl)) except tf.errors.OutOfRangeError: break perplexity = safe_exp(total_loss / total_predict_count) print_time(" eval %s: perplexity %.2f" % (name, perplexity), start_time) return perplexity
def load_model(model, ckpt, session, name): start_time = time.time() model.saver.restore(session, ckpt) session.run(tf.tables_initializer()) print_out(" loaded %s model parameters from %s, time %.2fs" % (name, ckpt, time.time() - start_time)) return model
def ensure_compatible_hparams(hparams, default_hparams, hparams_path=""): """Make sure the loaded hparams is compatible with new changes.""" default_hparams = utils.maybe_parse_standard_hparams( default_hparams, hparams_path) # Set num encoder/decoder layers (for old checkpoints) if hasattr(hparams, "num_layers"): if not hasattr(hparams, "num_encoder_layers"): hparams.add_hparam("num_encoder_layers", hparams.num_layers) if not hasattr(hparams, "num_decoder_layers"): hparams.add_hparam("num_decoder_layers", hparams.num_layers) # For compatible reason, if there are new fields in default_hparams, # we add them to the current hparams default_config = default_hparams.values() config = hparams.values() for key in default_config: if key not in config: hparams.add_hparam(key, default_config[key]) # Update all hparams' keys if override_loaded_hparams=True if getattr(default_hparams, "override_loaded_hparams", None): overwritten_keys = default_config.keys() else: # For inference overwritten_keys = INFERENCE_KEYS for key in overwritten_keys: if getattr(hparams, key) != default_config[key]: utils.print_out("# Updating hparams.%s: %s -> %s" % (key, str(getattr(hparams, key)), str(default_config[key]))) setattr(hparams, key, default_config[key]) return hparams
def write_tree(affs, filename): f = open(filename, 'wb') print >>f, '<affs>' for aff in affs: print_out(aff, where=f) print >>f, '</affs>'
def _build_uni_lstm(self, hparams): utils.print_out("# Build unidirectional lstm") num_uni_layers = self.num_uni_layers num_uni_residual_layers = self.num_uni_layers - 1 utils.print_out(" num_layers = %d, num_residual_layers=%d" % (num_uni_layers, num_uni_residual_layers)) cell = self._build_cell(num_uni_layers, num_uni_residual_layers) uni_lstm = cell uni_lstm_condition = ("uni", None) return uni_lstm, uni_lstm_condition
def _build_bi_lstm(self, hparams): utils.print_out("# Build bidirectional lstm") num_bi_layers = self.num_bi_layers num_bi_residual_layers = 0 utils.print_out(" num_bi_layers = %d, num_bi_residual_layers=%d" % (num_bi_layers, num_bi_residual_layers)) # Construct forward and backward cells fw_cell = self._build_cell(num_bi_layers, num_bi_residual_layers) bw_cell = self._build_cell(num_bi_layers, num_bi_residual_layers) bi_lstm = (fw_cell, bw_cell) bi_lstm_condition = ("bi", num_bi_layers) return bi_lstm, bi_lstm_condition
def gnmt_encoder(self): print_out("build gnmt encoder") with tf.variable_scope("gnmt_encoder") as scope: inputs = tf.transpose(self.source_embedding,[1,0,2]) inputs_reverse = _reverse( inputs, seq_lengths=self.sequence_length, seq_dim=0, batch_dim=1) encoder_states = [] outputs = [inputs] with tf.variable_scope("fw") as s: cell = tf.contrib.rnn.LSTMBlockFusedCell(self.hparams.num_units,use_peephole=False) fused_outputs_op, fused_state_op = cell(inputs,sequence_length=self.sequence_length,dtype=inputs.dtype) encoder_states.append(fused_state_op) outputs.append(fused_outputs_op) with tf.variable_scope('bw') as s: bw_cell = tf.contrib.rnn.LSTMBlockFusedCell(self.hparams.num_units,use_peephole=False) bw_fused_outputs_op, bw_fused_state_op = bw_cell(inputs_reverse,sequence_length=self.sequence_length,dtype=inputs.dtype) bw_fused_outputs_op = _reverse( bw_fused_outputs_op, seq_lengths=self.sequence_length, seq_dim=0, batch_dim=1) encoder_states.append(bw_fused_state_op) outputs.append(bw_fused_outputs_op) with tf.variable_scope("uni") as s: uni_inputs = tf.concat([fused_outputs_op,bw_fused_outputs_op],axis=-1) for i in range(self.hparams.num_layers-1): with tf.variable_scope("layer_%d" % i) as scope: uni_cell = tf.contrib.rnn.LSTMBlockFusedCell(self.hparams.num_units,use_peephole=False) uni_fused_outputs_op, uni_fused_state_op = uni_cell(uni_inputs,sequence_length=self.sequence_length,dtype=inputs.dtype) encoder_states.append(uni_fused_state_op) outputs.append(uni_fused_outputs_op) if i > 0: uni_fused_outputs_op = uni_fused_outputs_op + uni_inputs uni_inputs = uni_fused_outputs_op final_output = None # embedding + fw + bw + uni n = 3 + self.hparams.num_layers - 1 scalars = tf.get_variable('scalar',initializer=tf.constant([1/(n)]*n)) self.scalars = scalars weight = tf.get_variable('weight',initializer=tf.constant(0.001)) self.weight = weight soft_scalars = tf.nn.softmax(scalars) for i, output in enumerate(outputs): if final_output is None: final_output = soft_scalars[i] * tf.transpose(output,[1,0,2]) else: final_output = final_output + soft_scalars[i] * tf.transpose(output,[1,0,2]) self.final_outputs = weight * final_output self.final_state = tuple(encoder_states)
def build_graph(self, hparams, scope=None): """Subclass must implement this method. Creates a sequence-to-sequence model with dynamic RNN decoder API. Args: hparams: Hyperparameter configurations. scope: VariableScope for the created subgraph; default "dynamic_seq2seq". Returns: A tuple of the form (logits, loss_tuple, final_context_state, sample_id), where: logits: float32 Tensor [batch_size x num_decoder_symbols]. loss: loss = the total loss / batch_size. """ utils.print_out("\n# Creating %s graph ..." % self.mode) with tf.variable_scope(scope or "rnn", dtype=self.dtype): # Encoder self.encoder_outputs, encoder_state = self._build_encoder(hparams) fw_state, bw_state = encoder_state print('encoder_outputs: ', self.encoder_outputs.shape) print('fw_state.h: ', fw_state.h.shape) print('bw_state.h: ', bw_state.h.shape) # Linear layer for classification of intent encoder_last_state = tf.concat([fw_state.h, bw_state.h], axis=1) print('encoder_last_state: ', encoder_last_state.shape) print() encoder_output_size = encoder_last_state.get_shape()[1].value print('encoder_output_size: ', encoder_output_size) w = tf.get_variable('w', [encoder_output_size, self.lbl_vocab_size], dtype=tf.float32) w_t = tf.transpose(w) v = tf.get_variable('v', [self.lbl_vocab_size], dtype=tf.float32) # apply the linear layer label_logits = tf.nn.xw_plus_b(encoder_last_state, w, v) label_pred = tf.argmax(label_logits, 1) print('label_scores: ', label_logits.shape) print() ## Loss if self.mode != tf.contrib.learn.ModeKeys.INFER: with tf.device( model_helper.get_device_str( self.num_encoder_layers - 1, self.num_gpus)): loss = self._compute_loss(label_logits) else: loss = tf.constant(0.0) return label_logits, loss, label_pred
def elmo_encoder(self): print_out("build elmo encoder") with tf.variable_scope("elmo_encoder") as scope: inputs = tf.transpose(self.source_embedding,[1,0,2]) inputs_reverse = _reverse( inputs, seq_lengths=self.sequence_length, seq_dim=0, batch_dim=1) encoder_states = [] outputs = [tf.concat([inputs,inputs],axis=-1)] fw_cell_inputs = inputs bw_cell_inputs = inputs_reverse for i in range(self.hparams.num_layers): with tf.variable_scope("fw_%d" % i) as s: cell = tf.contrib.rnn.LSTMBlockFusedCell(self.hparams.num_units,use_peephole=False) fused_outputs_op, fused_state_op = cell(fw_cell_inputs,sequence_length=self.sequence_length,dtype=inputs.dtype) encoder_states.append(fused_state_op) with tf.variable_scope("bw_%d" % i) as s: bw_cell = tf.contrib.rnn.LSTMBlockFusedCell(self.hparams.num_units,use_peephole=False) bw_fused_outputs_op_reverse, bw_fused_state_op = bw_cell(bw_cell_inputs,sequence_length=self.sequence_length,dtype=inputs.dtype) bw_fused_outputs_op = _reverse( bw_fused_outputs_op_reverse, seq_lengths=self.sequence_length, seq_dim=0, batch_dim=1) encoder_states.append(bw_fused_state_op) output = tf.concat([fused_outputs_op,bw_fused_outputs_op],axis=-1) if i > 0: fw_cell_inputs = output + fw_cell_inputs bw_cell_inputs = _reverse( output, seq_lengths=self.sequence_length, seq_dim=0, batch_dim=1) + bw_cell_inputs else: fw_cell_inputs = output bw_cell_inputs = _reverse( output, seq_lengths=self.sequence_length, seq_dim=0, batch_dim=1) outputs.append(output) final_output = None # embedding + num_layers n = 1 + self.hparams.num_layers scalars = tf.get_variable('scalar',initializer=tf.constant([1/(n)]*n)) self.scalars = scalars weight = tf.get_variable('weight',initializer=tf.constant(0.001)) self.weight = weight soft_scalars = tf.nn.softmax(scalars) for i, output in enumerate(outputs): if final_output is None: final_output = soft_scalars[i] * tf.transpose(output,[1,0,2]) else: final_output = final_output + soft_scalars[i] * tf.transpose(output,[1,0,2]) self.final_outputs = weight * final_output self.final_state = tuple(encoder_states)
def init_embeddings(self, vocab_file, embedding_type, embedding_size, dtype=tf.float32, scope=None): vocab_list, vocab_size = vocab.load_vocab(vocab_file) with tf.variable_scope(scope or "embeddings", dtype=dtype): sqrt3 = math.sqrt(3) if embedding_type == 'random': print_out('# Using random embedding.') self.embeddings = tf.get_variable("emb_random_mat", shape=[vocab_size, embedding_size], initializer=tf.random_uniform_initializer(minval=-sqrt3, maxval=sqrt3, dtype=dtype)) else: print_out('# Using pretrained embedding: %s.' % embedding_type)
def create_or_load_model(model, ckpt_dir, session, name): latest_ckpt = tf.train.latest_checkpoint(ckpt_dir) if latest_ckpt: model = load_model(latest_ckpt) else: start_time = time.time() session.run(tf.global_variables_initializer()) utils.print_out("created %s model with fresh parameters, time %.2fs" % (name, time.time() - start_time)) global_step = session.run(model.global_step) return model, global_step
def remove_tags(root, aratio, cratio, acratio): for aff in root: tags = set([elem.tag for elem in aff]) if not set(['country', 'addr-line', 'institution']) <= tags: print_out(aff) # Tag missing already if random.random() <= aratio: remove_elems('addr-line', aff) elif random.random() <= cratio: remove_elems('country', aff) elif random.random() <= acratio: remove_elems('addr-line', aff) remove_elems('country', aff)
def __init__(self, hparams): self.hparams = hparams if hparams.metric in ['logloss']: self.best_score = 100000 else: self.best_score = 0 self.build_graph(hparams) self.optimizer(hparams) params = tf.trainable_variables() utils.print_out("# Trainable variables") for param in params: utils.print_out( " %s, %s, %s" % (param.name, str(param.get_shape()), param.op.device))
def check_and_save_hparams(out_dir, hparams): """Save hparams.""" hparams_file = os.path.join(out_dir, "hparams") if tf.gfile.Exists(hparams_file): with codecs.getreader("utf-8")(tf.gfile.GFile(hparams_file, "rb")) as f: origin_hparams = json.load(f) origin_hparams = tf.contrib.training.HParams(**origin_hparams) wrong_keys = [] keys = set(list(hparams.values().keys()) + (list(origin_hparams.values().keys()))) for key in keys: if (hparams.values().get(key, None) != origin_hparams.values().get(key, None) or hparams.values().get(key, None) == None or hparams.values().get(key, None) == None): wrong_keys.append(key) try: assert origin_hparams.values() == hparams.values() utils.print_out("using the same hparams of old %s" % hparams_file) except: utils.print_out('new hparams not the same with the existed one') for wrong_key in wrong_keys: utils.print_out(" keys: %s, \norigin_value: %s, \nnew_value: %s\n" % ( wrong_key, origin_hparams.values()[wrong_key], hparams.values()[wrong_key])) raise ValueError else: utils.print_out(" not old hparams found, create new hparams to %s" % hparams_file) with codecs.getwriter("utf-8")(tf.gfile.GFile(hparams_file, "wb")) as f: f.write(hparams.to_json(indent=4))
def _get_infer_maximum_iterations(self, hparams, source_sequence_length): """Maximum decoding steps at inference time.""" if hparams.tgt_max_len_infer: maximum_iterations = hparams.tgt_max_len_infer utils.print_out(" decoding maximum_iterations %d" % maximum_iterations) else: # TODO(thangluong): add decoding_length_factor flag decoding_length_factor = 2.0 max_encoder_length = tf.reduce_max(source_sequence_length) maximum_iterations = tf.to_int32( tf.round( tf.to_float(max_encoder_length) * decoding_length_factor)) return maximum_iterations
def create_or_load_model(model, model_dir, session, name): """Create model and initialize or load parameters in session.""" latest_ckpt = tf.train.latest_checkpoint(model_dir) if latest_ckpt: model = load_model(model, latest_ckpt, session, name) else: start_time = time.time() session.run(tf.global_variables_initializer()) session.run(tf.tables_initializer()) utils.print_out( " created %s model with fresh parameters, time %.2fs" % (name, time.time() - start_time)) global_step = model.global_step.eval(session=session) return model, global_step
def _external_eval(model, global_step, sess, hparams, iterator, iterator_feed_dict, tgt_file, lbl_file, label, summary_writer, save_on_best): """External evaluation such as BLEU and ROUGE scores.""" out_dir = hparams.out_dir decode = global_step > 0 if decode: utils.print_out("# External evaluation, global step %d" % global_step) sess.run(iterator.initializer, feed_dict=iterator_feed_dict) slot_output = os.path.join(out_dir, "slot_output_%s" % label) intent_output = os.path.join(out_dir, "intent_output_%s" % label) scores = nmt_utils.decode_and_evaluate( label, model, sess, slot_output, intent_output, ref_file=tgt_file, ref_lbl_file=lbl_file, metrics=hparams.metrics, subword_option=hparams.subword_option, beam_width=hparams.beam_width, tgt_eos=hparams.eos, task=hparams.task, decode=decode, infer_mode=hparams.infer_mode) # Save on best metrics if decode: for metric in hparams.metrics: best_metric_label = "best_" + metric utils.add_summary(summary_writer, global_step, "%s_%s" % (label, metric), scores[metric]) # metric: larger is better if save_on_best and scores[metric] > getattr( hparams, best_metric_label): setattr(hparams, best_metric_label, scores[metric]) model.saver.save(sess, os.path.join( getattr(hparams, best_metric_label + "_dir"), "translate.ckpt"), global_step=model.global_step) utils.save_hparams(out_dir, hparams) return scores
def _preprocess(self): print_out("# Start to preprocessing data...") content = _tokenize(self.data, self.w2i, self.max_len, self.reverse, self.split_word) item_labels = [] for label_name in self.label_names: labels = [""] labels = self.get_label(labels, self.tag_l2i) item_labels.append(labels) self._raw_data.append( DataItem(content=content, labels=np.asarray(item_labels), length=len(content), id=int("0"))) self.num_batches = 1 self.data_size = len(self._raw_data)
def _get_learning_rate_decay(self, hparams): """Get learning rate decay.""" start_decay_step, decay_steps, decay_factor = self._get_decay_info( hparams) utils.print_out( " decay_scheme=%s, start_decay_step=%d, decay_steps %d, " "decay_factor %g" % (hparams.decay_scheme, start_decay_step, decay_steps, decay_factor)) return tf.cond(self.global_step < start_decay_step, lambda: self.learning_rate, lambda: tf.train.exponential_decay(self.learning_rate, ( self.global_step - start_decay_step), decay_steps, decay_factor, staircase=True), name="learning_rate_decay_cond")
def _build_cell(self, num_layers, num_residual_layers): cell_list = [] for i in range(num_layers): utils.print_out(" cell %d " % i, new_line=False) single_cell = self._single_cell( unit_type=self.unit_type, num_units=self.num_units, forget_bias=self.forget_bias, dropout=self.dropout, mode=self.mode, residual_connection=(i >= (num_layers - num_residual_layers))) utils.print_out("", new_line=True) cell_list.append(single_cell) if len(cell_list) == 1: # Single layer. return cell_list[0] else: # Multi layers return tf.nn.rnn_cell.MultiRNNCell(cell_list)
def __init__(self, hparams, mode): self.mode = mode self.hparams = hparams params = tf.trainable_variables() #define placeholder self.vocab_table_word = lookup_ops.index_table_from_file( 'pre_data/vocab_word.txt', default_value=0) self.vocab_table_char = lookup_ops.index_table_from_file( 'pre_data/vocab_char.txt', default_value=0) self.norm_trainable = tf.placeholder(tf.bool) self.q1 = {} self.q2 = {} self.label = tf.placeholder(shape=(None, ), dtype=tf.float32) for q in [self.q1, self.q2]: q['words'] = tf.placeholder(shape=(None, None), dtype=tf.string) q['words_len'] = tf.placeholder(shape=(None, ), dtype=tf.int32) q['chars'] = tf.placeholder(shape=(None, None), dtype=tf.string) q['chars_len'] = tf.placeholder(shape=(None, ), dtype=tf.int32) q['words_num'] = tf.placeholder( shape=(None, len(hparams.word_num_features)), dtype=tf.float32) q['chars_num'] = tf.placeholder( shape=(None, len(hparams.char_num_features)), dtype=tf.float32) #build graph self.build_graph(hparams) #build optimizer self.optimizer(hparams) params = tf.trainable_variables() self.saver = tf.train.Saver(tf.global_variables()) elmo_param = [] for param in tf.global_variables(): if 'elmo' in param.name and 'elmo/Variable' not in param.name: elmo_param.append(param) self.pretrain_saver = tf.train.Saver(elmo_param) utils.print_out("# Trainable variables") for param in params: if hparams.pretrain is False and 'elmo' in param.name: continue else: utils.print_out( " %s, %s, %s" % (param.name, str(param.get_shape()), param.op.device))
def change_country_by_dict(root): """ <country>123234</country> --> <addr-line>123234</addr-line> <country>Berlin</country> --> <addr-line>Berlin</Berlin> """ country_keywords = set_from_file(COUNTRY_DICT, normal=True, split=True) \ .union(set_from_file(DEPENDENT_DICT, normal=True, split=True)) for k in list(country_keywords): if len(k) == 1: country_keywords.discard(k) #print country_keywords for aff in root: for elem in aff: if elem.tag == 'country': tokens = [normalize(t) for t in tokenize(elem.text, split_alphanum=True)] if not any(t in country_keywords for t in tokens) and \ elem.text.strip() and \ (elem.text.strip() not in ['P.', 'R.', 'O.', 'C.', ')', ',']): elem.tag = 'addr-line' print_out(elem)