def __init__(self, **kwargs): self.tf = import_tf(kwargs['gpu_no'], kwargs['verbose']) self.logger = set_logger('BertNer', kwargs['log_dir'], kwargs['verbose']) self.model_dir = kwargs['ner_model'] from bert.tokenization import FullTokenizer self.tokenizer = FullTokenizer( os.path.join(self.model_dir, 'vocab.txt')) self.ner_sq_len = 128 self.input_ids = self.tf.placeholder(self.tf.int32, (None, self.ner_sq_len), 'input_ids') self.input_mask = self.tf.placeholder(self.tf.int32, (None, self.ner_sq_len), 'input_mask') # init graph self._init_graph() # init ner assist data self._init_predict_var() self.per_proun = [ '甲', '乙', '丙', '丁', '戊', '己', '庚', '辛', '壬', '癸', '子', '丑', '寅', '卯', '辰', '巳', '午', '未', '申', '酉', '戌', '亥' ]
def __init__(self, gpu_no, log_dir, bert_sim_dir, verbose=False): self.bert_sim_dir = bert_sim_dir self.logger = set_logger(colored('BS', 'cyan'), log_dir, verbose) self.tf = import_tf(gpu_no, verbose) # add tokenizer from bert import tokenization self.tokenizer = tokenization.FullTokenizer(os.path.join(bert_sim_dir, 'vocab.txt')) # add placeholder self.input_ids = self.tf.placeholder(self.tf.int32, (None, 45), 'input_ids') self.input_mask = self.tf.placeholder(self.tf.int32, (None, 45), 'input_mask') self.input_type_ids = self.tf.placeholder(self.tf.int32, (None, 45), 'input_type_ids') # init graph self._init_graph()
})) return input_fn args = get_run_args() logger = set_logger(colored('VENTILATOR', 'magenta'), args.verbose) graph_path, bert_config = optimize_graph(args=args) if graph_path: logger.info('optimized graph is stored at: %s' % graph_path) logger.info('use device %s, load graph from %s' % ('cpu', graph_path)) tf = import_tf(device_id=-1, verbose=args.verbose, use_fp16=args.fp16) estimator = get_estimator(args=args, tf=tf, graph_path=graph_path) save_hook = tf.train.CheckpointSaverHook(checkpoint_dir=args.checkpoint_dir, save_secs=1) predicts = estimator.predict(input_fn=input_fn_builder(), hooks=[save_hook]) for predict in predicts: print(predict) feature_spec = { "unique_ids": tf.placeholder(dtype=tf.int32, shape=[None], name="unique_ids"), "input_ids": tf.placeholder(dtype=tf.int32, shape=[None, None], name="input_ids"), "input_mask":
def optimize_graph(args, logger=None): if not logger: logger = set_logger(colored('GRAPHOPT', 'cyan'), args.verbose) try: # we don't need GPU for optimizing the graph tf = import_tf(verbose=args.verbose) from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True) config_fp = os.path.join(args.model_dir, args.config_name) init_checkpoint = os.path.join(args.tuned_model_dir or args.model_dir, args.ckpt_name) if args.fp16: logger.warning( 'fp16 is turned on! ' 'Note that not all CPU GPU support fast fp16 instructions, ' 'worst case you will have degraded performance!') logger.info('model config: %s' % config_fp) logger.info('checkpoint%s: %s' % (' (override by the fine-tuned model)' if args.tuned_model_dir else '', init_checkpoint)) with tf.gfile.GFile(config_fp, 'r') as f: bert_config = modeling.BertConfig.from_dict(json.load(f)) logger.info('build graph...') # input placeholders, not sure if they are friendly to XLA input_ids = tf.placeholder(tf.int32, (None, None), 'input_ids') input_mask = tf.placeholder(tf.int32, (None, None), 'input_mask') input_type_ids = tf.placeholder(tf.int32, (None, None), 'input_type_ids') jit_scope = tf.contrib.compiler.jit.experimental_jit_scope if args.xla else contextlib.suppress with jit_scope(): input_tensors = [input_ids, input_mask, input_type_ids] model = modeling.BertModel(config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=False) tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) minus_mask = lambda x, m: x - tf.expand_dims(1.0 - m, axis=-1 ) * 1e30 mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1) masked_reduce_max = lambda x, m: tf.reduce_max(minus_mask(x, m), axis=1) masked_reduce_mean = lambda x, m: tf.reduce_sum( mul_mask(x, m), axis=1) / (tf.reduce_sum( m, axis=1, keepdims=True) + 1e-10) with tf.variable_scope("pooling"): if len(args.pooling_layer) == 1: encoder_layer = model.all_encoder_layers[ args.pooling_layer[0]] else: all_layers = [ model.all_encoder_layers[l] for l in args.pooling_layer ] encoder_layer = tf.concat(all_layers, -1) input_mask = tf.cast(input_mask, tf.float32) if args.pooling_strategy == PoolingStrategy.REDUCE_MEAN: pooled = masked_reduce_mean(encoder_layer, input_mask) elif args.pooling_strategy == PoolingStrategy.REDUCE_MAX: pooled = masked_reduce_max(encoder_layer, input_mask) elif args.pooling_strategy == PoolingStrategy.REDUCE_MEAN_MAX: pooled = tf.concat([ masked_reduce_mean(encoder_layer, input_mask), masked_reduce_max(encoder_layer, input_mask) ], axis=1) elif args.pooling_strategy == PoolingStrategy.FIRST_TOKEN or \ args.pooling_strategy == PoolingStrategy.CLS_TOKEN: pooled = tf.squeeze(encoder_layer[:, 0:1, :], axis=1) elif args.pooling_strategy == PoolingStrategy.LAST_TOKEN or \ args.pooling_strategy == PoolingStrategy.SEP_TOKEN: seq_len = tf.cast(tf.reduce_sum(input_mask, axis=1), tf.int32) rng = tf.range(0, tf.shape(seq_len)[0]) indexes = tf.stack([rng, seq_len - 1], 1) pooled = tf.gather_nd(encoder_layer, indexes) elif args.pooling_strategy == PoolingStrategy.NONE: pooled = mul_mask(encoder_layer, input_mask) else: raise NotImplementedError() if args.fp16: pooled = tf.cast(pooled, tf.float16) pooled = tf.identity(pooled, 'final_encodes') output_tensors = [pooled] tmp_g = tf.get_default_graph().as_graph_def() with tf.Session(config=config) as sess: logger.info('load parameters from checkpoint...') sess.run(tf.global_variables_initializer()) dtypes = [n.dtype for n in input_tensors] logger.info('optimize...') tmp_g = optimize_for_inference( tmp_g, [n.name[:-2] for n in input_tensors], [n.name[:-2] for n in output_tensors], [dtype.as_datatype_enum for dtype in dtypes], False) logger.info('freeze...') tmp_g = convert_variables_to_constants( sess, tmp_g, [n.name[:-2] for n in output_tensors], use_fp16=args.fp16) tmp_file = tempfile.NamedTemporaryFile('w', delete=False, dir=args.graph_tmp_dir).name logger.info('write graph to a tmp file: %s' % tmp_file) with tf.gfile.GFile(tmp_file, 'wb') as f: f.write(tmp_g.SerializeToString()) return tmp_file, bert_config except Exception: logger.error('fail to optimize the graph!', exc_info=True)