class CheckpointSmallBERT(AbstractBase): def __init__(self, path, training=False, max_seq_length=512): self.max_seq_length = max_seq_length self.graph = tf.Graph() with self.graph.as_default(): self.input_ids = tf.compat.v1.placeholder( tf.int32, shape=(None, self.max_seq_length)) self.input_mask = tf.compat.v1.placeholder( tf.int32, shape=(None, self.max_seq_length)) self.segment_ids = tf.compat.v1.placeholder( tf.int32, shape=(None, self.max_seq_length)) self.bert_config = BertConfig.from_json_file(path + '/bert_config.json') self.bert_module = BertModel(config=self.bert_config, is_training=training, input_ids=self.input_ids, input_mask=self.input_mask, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint( tf.trainable_variables(), path + '/bert_model.ckpt') tf.train.init_from_checkpoint(path + '/bert_model.ckpt', assignment_map) self.sess = tf.compat.v1.Session() self.sess.run( tf.group(tf.compat.v1.global_variables_initializer(), tf.compat.v1.tables_initializer())) self.bert_outputs = { 'sequence_output': self.bert_module.get_sequence_output(), 'pooled_output': self.bert_module.get_pooled_output(), } self.tok = tokenization.FullTokenizer(vocab_file=path + '/vocab.txt', do_lower_case=True)
def _bert_model(self, input_ids, input_tag_embeddings, input_masks, bert_config, bert_checkpoint_file, is_training=False): """Creates the Bert model. Args: input_ids: A [batch, max_seq_len] int tensor. input_masks: A [batch, max_seq_len] int tensor. """ bert_model = BertModel(bert_config, is_training, input_ids=input_ids, input_mask=input_masks, use_tag_embeddings=True, tag_embeddings=input_tag_embeddings) # Restore from checkpoint. assignment_map, _ = get_assignment_map_from_checkpoint( tf.global_variables(), bert_checkpoint_file) if 'global_step' in assignment_map: assignment_map.pop('global_step') tf.compat.v1.train.init_from_checkpoint(bert_checkpoint_file, assignment_map) return bert_model.get_pooled_output()
def _bert_model(self, input_ids, input_tag_features, input_masks): """Creates the Bert model. Args: input_ids: A [batch, max_seq_len] int tensor. input_masks: A [batch, max_seq_len] int tensor. """ is_training = self._is_training options = self._model_proto bert_config = BertConfig.from_json_file(options.bert_config_file) bert_model = BertModel(bert_config, is_training, input_ids=input_ids, input_mask=input_masks, use_tag_embeddings=True, tag_features=input_tag_features) # Restore from checkpoint. assignment_map, _ = get_assignment_map_from_checkpoint( tf.global_variables(), options.bert_checkpoint_file) if 'global_step' in assignment_map: assignment_map.pop('global_step') tf.compat.v1.train.init_from_checkpoint(options.bert_checkpoint_file, assignment_map) return bert_model.get_pooled_output()
def predict(self, inputs, **kwargs): """Predicts the resulting tensors. Args: inputs: A dictionary of input tensors keyed by names. Returns: predictions: A dictionary of prediction tensors keyed by name. """ is_training = self._is_training options = self._model_proto (answer_choices, answer_choices_len, answer_label) = (inputs[InputFields.answer_choices_with_question], inputs[InputFields.answer_choices_with_question_len], inputs[InputFields.answer_label]) # Create model layers. token_to_id_layer = token_to_id.TokenToIdLayer( options.bert_vocab_file, options.bert_unk_token_id) # Convert tokens into token ids. batch_size = answer_choices.shape[0] answer_choices_token_ids = token_to_id_layer(answer_choices) answer_choices_token_ids_reshaped = tf.reshape( answer_choices_token_ids, [batch_size * NUM_CHOICES, -1]) answer_choices_mask = tf.sequence_mask( answer_choices_len, maxlen=tf.shape(answer_choices)[-1]) answer_choices_mask_reshaped = tf.reshape( answer_choices_mask, [batch_size * NUM_CHOICES, -1]) # Bert prediction. bert_config = BertConfig.from_json_file(options.bert_config_file) bert_model = BertModel(bert_config, is_training, input_ids=answer_choices_token_ids_reshaped, input_mask=answer_choices_mask_reshaped) answer_choices_cls_feature_reshaped = bert_model.get_pooled_output() answer_choices_cls_feature = tf.reshape( answer_choices_cls_feature_reshaped, [batch_size, NUM_CHOICES, -1]) assignment_map, _ = get_assignment_map_from_checkpoint( tf.global_variables(), options.bert_checkpoint_file) tf.compat.v1.train.init_from_checkpoint(options.bert_checkpoint_file, assignment_map) # Classification layer. output = tf.compat.v1.layers.dense(answer_choices_cls_feature, units=1, activation=None) output = tf.squeeze(output, axis=-1) return {FIELD_ANSWER_PREDICTION: output}
def body(self, features, mode): """Body of the model, aka Bert Arguments: features {dict} -- feature dict, keys: input_ids, input_mask, segment_ids mode {mode} -- mode Returns: dict -- features extracted from bert. keys: 'seq', 'pooled', 'all', 'embed' seq: tensor, [batch_size, seq_length, hidden_size] pooled: tensor, [batch_size, hidden_size] all: list of tensor, num_hidden_layers * [batch_size, seq_length, hidden_size] embed: tensor, [batch_size, seq_length, hidden_size] """ config = self.config input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = BertModel(config=config.bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=config.use_one_hot_embeddings) feature_dict = {} for logit_type in ['seq', 'pooled', 'all', 'embed', 'embed_table']: if logit_type == 'seq': # tensor, [batch_size, seq_length, hidden_size] feature_dict[logit_type] = model.get_sequence_output() elif logit_type == 'pooled': # tensor, [batch_size, hidden_size] feature_dict[logit_type] = model.get_pooled_output() elif logit_type == 'all': # list, num_hidden_layers * [batch_size, seq_length, hidden_size] feature_dict[logit_type] = model.get_all_encoder_layers() elif logit_type == 'embed': # for res connection feature_dict[logit_type] = model.get_embedding_output() elif logit_type == 'embed_table': feature_dict[logit_type] = model.get_embedding_table() return feature_dict
def _buildModel(self, input_ids, token_type_ids, input_mask): bert_model = BertModel(self.config, self.config.training, input_ids, input_mask, token_type_ids, self.config.use_one_hot_embeddings) bert_output = bert_model.get_pooled_output() output = tf.layers.dense( bert_output, self.config.output_dim, kernel_initializer=tf.truncated_normal_initializer( stddev=self.config.initializer_range), kernel_regularizer=tf.contrib.layers.l2_regularizer(1.0), bias_regularizer=tf.contrib.layers.l2_regularizer(1.0), name='output') return output
def _predict_logits(self, answer_choices, answer_choices_len, token_to_id_fn, bert_config, slim_fc_scope, keep_prob=1.0, is_training=False): """Predicts answer for a particular task. Args: answer_choices: A [batch, NUM_CHOICES, max_answer_len] string tensor. answer_choices_len: A [batch, NUM_CHOICES] int tensor. token_to_id_fn: A callable to convert the token tensor to an int tensor. slim_fc_scope: Slim FC scope. keep_prob: Keep probability of dropout layers. bert_config: A BertConfig instance to initialize BERT model. Returns: logits: A [batch, NUM_CHOICES] float tensor. """ batch_size = answer_choices.shape[0] # Convert tokens into token ids. answer_choices_token_ids = token_to_id_fn(answer_choices) answer_choices_token_ids = tf.reshape(answer_choices_token_ids, [batch_size * NUM_CHOICES, -1]) answer_choices_mask = tf.sequence_mask( answer_choices_len, maxlen=tf.shape(answer_choices)[-1]) answer_choices_mask = tf.reshape(answer_choices_mask, [batch_size * NUM_CHOICES, -1]) # Bert prediction. bert_model = BertModel(bert_config, is_training, input_ids=answer_choices_token_ids, input_mask=answer_choices_mask) output = bert_model.get_pooled_output() # Classification layer. with slim.arg_scope(slim_fc_scope): output = slim.fully_connected(output, num_outputs=1, activation_fn=None, scope='logits') return tf.reshape(output, [batch_size, NUM_CHOICES])
def bert(bert_config_file, mode, dim, input_ids, input_mask, input_type, activation, init_checkpoint=None): bert_config = BertConfig.from_json_file(bert_config_file) bert_model = BertModel(config=bert_config, is_training=mode == tf.estimator.ModeKeys.TRAIN, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type, scope="bert_query") output = bert_model.get_pooled_output() if mode == tf.estimator.ModeKeys.TRAIN: output = tf.nn.dropout(output, keep_prob=0.9) sig = tf.layers.dense(output, dim, activation=activation, kernel_initializer=tf.truncated_normal_initializer( stddev=bert_config.initializer_range), name="bert_query/query") tvars = tf.trainable_variables('bert_query') initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names) = get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) """ for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) """ return sig
def main(_): logging.set_verbosity(logging.INFO) for i in range(_NUM_PARTITIONS): tf.io.gfile.makedirs( os.path.join(FLAGS.output_bert_feature_dir, '%02d' % i)) # Create Bert model. bert_tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.bert_vocab_file, do_lower_case=FLAGS.do_lower_case) # Bert prediction. input_placeholder = tf.placeholder(shape=[None], dtype=tf.string) token_to_id_layer = token_to_id.TokenToIdLayer(FLAGS.bert_vocab_file, unk_token_id=UNK) bert_config = BertConfig.from_json_file(FLAGS.bert_config_file) bert_model = BertModel(bert_config, is_training=False, input_ids=token_to_id_layer( tf.expand_dims(input_placeholder, 0))) sequence_output = bert_model.get_sequence_output()[0] pooled_output = bert_model.get_pooled_output()[0] saver = tf.compat.v1.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=config) sess.run(tf.compat.v1.tables_initializer()) saver.restore(sess, FLAGS.bert_checkpoint_file) for name in sess.run(tf.compat.v1.report_uninitialized_variables()): logging.warn('%s is uninitialized!', name) def _bert_fn(sequence): return sess.run([sequence_output, pooled_output], feed_dict={input_placeholder: sequence}) # Load annotations. annots = _load_annotations(FLAGS.annotations_jsonl_file) logging.info('Loaded %i annotations.', len(annots)) shard_id, num_shards = FLAGS.shard_id, FLAGS.num_shards assert 0 <= shard_id < num_shards for idx, annot in enumerate(annots): if (idx + 1) % 1000 == 0: logging.info('On example %i/%i.', idx + 1, len(annots)) annot_id = int(annot['annot_id'].split('-')[-1]) if annot_id % num_shards != shard_id: continue # Check npy file. part_id = get_partition_id(annot['annot_id']) output_file = os.path.join(FLAGS.output_bert_feature_dir, '%02d' % part_id, annot['annot_id'] + '.npy') if os.path.isfile(output_file): logging.info('%s is there.', output_file) continue annot_id = int(annot['annot_id'].split('-')[-1]) if annot_id % num_shards != shard_id: continue # Create TF example. bert_outputs = _create_bert_embeddings(annot, bert_tokenizer, FLAGS.do_lower_case, _bert_fn) with open(output_file, 'wb') as f: np.save(f, bert_outputs) logging.info('Done')
def build(self, data_iter, bert_config_file): # get the inputs with tf.variable_scope('inputs'): input_map = data_iter.get_next() usrid, prdid, input_x, input_y, doc_len = \ (input_map['usr'], input_map['prd'], input_map['content'], input_map['rating'], input_map['doc_len']) input_x = tf.reshape(input_x, [-1, self.max_sen_len]) sen_len = tf.count_nonzero(input_x, axis=-1) doc_len = doc_len // self.max_sen_len input_x = tf.cast(input_x, tf.int32) self.usr = lookup(self.embeddings['usr_emb'], usrid, name='cur_usr_embedding') self.prd = lookup(self.embeddings['prd_emb'], prdid, name='cur_prd_embedding') input_x = tf.reshape(input_x, [-1, self.max_sen_len]) input_mask = tf.sequence_mask(sen_len, self.max_sen_len) input_mask = tf.cast(input_mask, tf.int32) bert_config = BertConfig.from_json_file(bert_config_file) bert = BertModel(bert_config, is_training=True, input_ids=input_x, input_mask=input_mask, token_type_ids=None, use_one_hot_embeddings=False) bert_output = bert.get_pooled_output() bert_output = tf.reshape(bert_output, [ -1, self.max_doc_len // self.max_sen_len, bert_config.hidden_size ]) doc_mask = tf.sequence_mask(doc_len, self.max_doc_len // self.max_sen_len) bert_output = bert_output * tf.cast(doc_mask[:, :, None], tf.float32) bert_output = tf.reduce_sum(bert_output, axis=1) # bert_output = bert.get_sequence_output() # bert_output = tf.layers.flatten(bert_output) # bert_output = tf.nn.dropout(bert_output, .9) logits = tf.layers.dense( bert_output, self.cls_cnt, kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)) self.bert_output = bert_output self.logits = logits # build the process of model prediction = tf.argmax(logits, 1, name='prediction') self.prediction = prediction with tf.variable_scope("loss"): sce = tf.nn.softmax_cross_entropy_with_logits_v2 log_probs = tf.nn.log_softmax(logits) self.probs = tf.nn.softmax(logits) loss = -tf.reduce_sum(tf.one_hot( input_y, self.cls_cnt, dtype=tf.float32) * log_probs, axis=-1) self.loss = tf.reduce_mean(loss) # self.loss = sce(logits=logits, labels=tf.one_hot(input_y, self.cls_cnt)) # self.loss = tf.reduce_mean(self.loss) self.total_loss = tf.reduce_sum(loss) prediction = tf.argmax(logits, 1, name='prediction') with tf.variable_scope("metrics"): correct_prediction = tf.equal(prediction, input_y) self.correct = correct_prediction mse = tf.reduce_sum(tf.square(prediction - input_y), name="mse") correct_num = tf.reduce_sum(tf.cast(correct_prediction, dtype=tf.int32), name="correct_num") accuracy = tf.reduce_sum(tf.cast(correct_prediction, "float"), name="accuracy") return self.total_loss, mse, correct_num, accuracy
def predict(self, inputs, **kwargs): """Predicts the resulting tensors. Args: inputs: A dictionary of input tensors keyed by names. Returns: predictions: A dictionary of prediction tensors keyed by name. """ is_training = self._is_training options = self._model_proto (image, height, width, num_objects, object_bboxes, object_labels, object_scores, answer_choices, answer_choices_len, answer_label) = (inputs[InputFields.img_data], inputs[InputFields.img_height], inputs[InputFields.img_width], inputs[InputFields.num_objects], inputs[InputFields.object_bboxes], inputs[InputFields.object_labels], inputs[InputFields.object_scores], inputs[InputFields.answer_choices_with_question], inputs[InputFields.answer_choices_with_question_len], inputs[InputFields.answer_label]) # Visualize image and object bboxes. batch_size = image.shape[0] image_batch_shape = tf.shape(image) object_bboxes = _to_batch_coordinates(object_bboxes, height, width, image_batch_shape[1], image_batch_shape[2]) image_with_boxes = visualization.draw_bounding_boxes_on_image_tensors( image, num_objects, object_bboxes, object_labels, object_scores) tf.summary.image('vcr/detection', image_with_boxes, max_outputs=10) # Extract FRCNN feature. frcnn_features = fast_rcnn.FastRCNN(tf.cast(image, tf.float32), object_bboxes, options=options.fast_rcnn_config, is_training=is_training) object_masks = tf.sequence_mask(num_objects, tf.shape(object_bboxes)[1], dtype=tf.float32) image_feature = masked_ops.masked_avg_nd(frcnn_features, object_masks, dim=1) # Convert tokens into token ids. token_to_id_layer = token_to_id.TokenToIdLayer( options.bert_vocab_file, options.bert_unk_token_id) answer_choices_token_ids = token_to_id_layer(answer_choices) answer_choices_token_ids_reshaped = tf.reshape( answer_choices_token_ids, [batch_size * NUM_CHOICES, -1]) answer_choices_mask = tf.sequence_mask( answer_choices_len, maxlen=tf.shape(answer_choices)[-1]) answer_choices_mask_reshaped = tf.reshape( answer_choices_mask, [batch_size * NUM_CHOICES, -1]) # Bert prediction. bert_config = BertConfig.from_json_file(options.bert_config_file) bert_model = BertModel(bert_config, is_training, input_ids=answer_choices_token_ids_reshaped, input_mask=answer_choices_mask_reshaped) answer_choices_cls_feature_reshaped = bert_model.get_pooled_output() answer_choices_cls_feature = tf.reshape( answer_choices_cls_feature_reshaped, [batch_size, NUM_CHOICES, -1]) assignment_map, _ = get_assignment_map_from_checkpoint( tf.global_variables(), options.bert_checkpoint_file) # Fuse image feature. image_feature_tiled = tf.tile(image_feature, [1, NUM_CHOICES, 1]) answer_choices_cls_feature = tf.concat( [answer_choices_cls_feature, image_feature_tiled], -1) # Classification layer. output = tf.compat.v1.layers.dense(answer_choices_cls_feature, units=1, activation=None) output = tf.squeeze(output, axis=-1) return {FIELD_ANSWER_PREDICTION: output}