def __init__(self, hp, voca_size, method, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(data_generator.NLI.nli_info.num_classes) input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) if method in [0, 1, 3, 4, 5, 6]: self.rf_mask = tf.placeholder(tf.float32, [None, seq_length]) elif method in [2]: self.rf_mask = tf.placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu with tf.variable_scope("part1"): self.model1 = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) with tf.variable_scope("part2"): self.model2 = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) enc = tf.concat([ self.model1.get_sequence_output(), self.model2.get_sequence_output() ], axis=2) pred, loss = task.predict(enc, label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc
def network(self, features, mode): config = bert.BertConfig( vocab_size=self.voca_size, hidden_size=self.hp.hidden_units, num_hidden_layers=self.hp.num_blocks, num_attention_heads=self.hp.num_heads, intermediate_size=self.hp.intermediate_size, type_vocab_size=self.hp.type_vocab_size, ) tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] self.label_ids = features["label_ids"] is_training = (tf.estimator.ModeKeys.TRAIN == mode) self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=self.use_one_hot_embeddings) enc = self.model.get_sequence_output() return self.task.predict_ex(enc, self.label_ids, mode)
def __init__(self, hp, voca_size, is_training=True): config = bert.BertConfig(vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = placeholder(tf.int64, [None, seq_length]) input_mask = placeholder(tf.int64, [None, seq_length]) segment_ids = placeholder(tf.int64, [None, seq_length]) label_ids = placeholder(tf.int64, [None]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pooled_output = self.model.get_pooled_output() task = ClassificationB(is_training, hp.hidden_units, 3) task.call(pooled_output, label_ids) self.loss = task.loss self.logits = task.logits self.acc = task.acc
def __init__(self, hp, voca_size, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) s_portion = tf.placeholder(tf.float32, [None]) d_portion = tf.placeholder(tf.float32, [None]) s_sum = tf.placeholder(tf.int64, [None]) d_sum = tf.placeholder(tf.int64, [None]) self.x_list = [input_ids, input_mask, segment_ids] self.y = [s_portion, d_portion] self.y_sum = [s_sum, d_sum] use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) enc = self.model.get_sequence_output() pool = tf.layers.dense(enc[:, 0, :], hp.hidden_units, name="pool") s_logits = tf.layers.dense(pool, 2, name="cls_dense_support") d_logits = tf.layers.dense(pool, 2, name="cls_dense_dispute") loss = 0 self.acc = [] for logits, y, mask_sum in [(s_logits, self.y[0], s_sum), (d_logits, self.y[1], d_sum)]: labels = tf.cast(tf.greater(y, 0.5), tf.int32) labels = tf.one_hot(labels, 2) preds = tf.to_int32(tf.argmax(logits, axis=-1)) acc = tf_module.accuracy(logits, y) self.acc.append(acc) loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=labels) loss_arr = loss_arr * tf.cast(mask_sum, tf.float32) loss += tf.reduce_sum(loss_arr) self.loss = loss tf.summary.scalar('loss', self.loss) tf.summary.scalar('s_acc', self.acc[0]) tf.summary.scalar('d_acc', self.acc[1])
def __init__(self, hp, voca_size, is_training=True): config = bert.BertConfig(vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.float32, [None, 3]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) feature = self.model.get_pooled_output() def dense_softmax(feature, name): logits = tf.layers.dense(feature, 2, name=name) sout = tf.nn.softmax(logits) return sout alpha = dense_softmax(feature, "dense_alpha") # Probability of being Argument P(Arg) beta = dense_softmax(feature, "dense_beta") # P(Arg+|Arg) gamma = dense_softmax(feature, "dense_gamma") # P(not Noise) self.alpha = alpha[:, 0] self.beta = beta[:, 0] self.gamma = gamma[:, 0] p1_prior = 0.2 p2_prior = 0.2 p0_prior = 1 - p1_prior - p2_prior p1 = alpha[:, 0] * beta[:, 0] * gamma[:, 0] + gamma[:, 1] * p1_prior p2 = alpha[:, 0] * beta[:, 1] * gamma[:, 0] + gamma[:, 1] * p2_prior p0 = alpha[:, 1] * gamma[:, 0] + gamma[:, 1] * p0_prior pred = tf.stack([p0,p1,p2], axis=1) log_likelihood = tf.log(pred) * label_ids loss = - tf.reduce_mean(log_likelihood) self.pred = pred self.loss = loss
def __init__(self, hp, voca_size, mode=1): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) scores = tf.placeholder(tf.float32, [None]) self.x_list = [input_ids, input_mask, segment_ids] self.y = scores use_one_hot_embeddings = use_tpu is_training = True self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) enc = self.model.get_sequence_output() enc = tf.layers.dense(enc, hp.hidden_units, name="dense1") # [ None, seq_length, hidden] matching = tf.expand_dims(enc, 3) # [ None, seq_length, hidden, 1] pooled_rep = tf.nn.max_pool(matching, ksize=[1, seq_length, 1, 1], strides=[1, 1, 1, 1], padding='VALID', data_format='NHWC') # [None, 1, hidden, 1] self.doc_v = tf.placeholder_with_default(tf.reshape( pooled_rep, [-1, hp.hidden_units]), (None, hp.hidden_units), name='pooled_rep') logits = tf.layers.dense(self.doc_v, 1, name="dense_reg") self.logits = logits paired = tf.reshape(logits, [-1, 2]) losses = tf.maximum(hp.alpha - (paired[:, 1] - paired[:, 0]), 0) self.loss = tf.reduce_mean(losses) tf.summary.scalar('loss', self.loss)
def fetch_bert_parameter(model_path): hp = hyperparams.HPSENLI() vocab_size = 30522 vocab_filename = "bert_voca.txt" config = bert.BertConfig( vocab_size=vocab_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) hp.compare_deletion_num = 20 seq_length = hp.seq_max is_training = False input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) use_one_hot_embeddings = False model = bert.BertModel(config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) variables = tf.contrib.slim.get_variables_to_restore() for v in variables: print(v) names = list([v.name for v in variables]) loader = tf.train.Saver() loader.restore(sess, model_path) r, = sess.run([variables]) output = dict(zip(names, r)) for k in output: print(k) return output
def __init__(self, hp, num_classes, voca_size, is_training=True): config = bert.BertConfig(vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = placeholder(tf.int64, [None, seq_length]) input_mask = placeholder(tf.int64, [None, seq_length]) segment_ids = placeholder(tf.int64, [None, seq_length]) label_ids = placeholder(tf.int64, [None]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pooled_output = self.model.get_pooled_output() output_weights = tf.get_variable( "output_weights", [num_classes, hp.hidden_units], initializer=tf.truncated_normal_initializer(stddev=0.02) ) output_bias = tf.get_variable( "output_bias", [num_classes], initializer=tf.zeros_initializer() ) logits = tf.matmul(pooled_output, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_ids) loss = tf.reduce_mean(input_tensor=loss_arr) self.loss = loss self.logits = logits self.sout = tf.nn.softmax(self.logits)
def __init__(self, hp, voca_size, mode=1): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(data_generator.NLI.nli_info.num_classes) input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) scores = tf.placeholder(tf.float32, [None]) # self.rf_mask = tf.placeholder(tf.float32, [None, seq_length]) self.rf_mask = tf.placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = scores use_one_hot_embeddings = use_tpu is_training = True self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) if mode == 1: enc = self.model.get_pooled_output() else: enc = self.model.get_all_encoder_layers() self.enc = enc logits = tf.layers.dense(enc, 1, name="reg_dense") # [ None, 1] self.logits = logits paired = tf.reshape(logits, [-1, 2]) y_paired = tf.reshape(self.y, [-1, 2]) raw_l = (paired[:, 1] - paired[:, 0]) losses = tf.maximum(hp.alpha - (paired[:, 1] - paired[:, 0]), 0) self.loss = tf.reduce_mean(losses) tf.summary.scalar('loss', self.loss)
def __init__(self, hp, voca_size): config = bert.BertConfig(vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) scores = tf.placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = scores use_one_hot_embeddings = use_tpu is_training = True self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) enc = self.model.get_sequence_output() enc = tf.layers.dense(enc, hp.hidden_units, name="dense1") # [ None, seq_length, hidden] logits = tf.layers.dense(enc, 1, name="dense2") self.logits = tf.reshape(logits, [-1, seq_length]) self.sout = tf.sigmoid(self.logits) #self.sout = tf.nn.softmax(self.logits, axis=1) #losses = tf.cast(self.y, tf.float32) * -tf.log(self.sout) # [ None, seq_length ] self.loss = tf.reduce_sum(tf.losses.sigmoid_cross_entropy(self.y, logits=self.logits)) tf.summary.scalar('loss', self.loss) p = self.sout pred = tf.less(tf.zeros_like(p), p - 0.5) self.prec = tf_module.precision_b(pred, self.y) self.recall = tf_module.recall_b(pred, self.y) tf.summary.scalar('prec', self.prec)
def __init__(self, hp, voca_size, method, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = placeholder(tf.int64, [None, seq_length]) input_mask = placeholder(tf.int64, [None, seq_length]) segment_ids = placeholder(tf.int64, [None, seq_length]) label_ids = placeholder(tf.int64, [None]) if method in [0, 1, 3, 4, 5, 6]: self.rf_mask = placeholder(tf.float32, [None, seq_length]) elif method in [METHOD_CROSSENT, METHOD_HINGE]: self.rf_mask = placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pooled = self.model.get_pooled_output() pooled = tf.nn.dropout(pooled, hp.dropout_rate) logits = tf.layers.dense(pooled, data_generator.NLI.nli_info.num_classes, name="cls_dense") labels = tf.one_hot(label_ids, data_generator.NLI.nli_info.num_classes) self.acc = tf_module.accuracy(logits, label_ids) self.logits = logits tf.summary.scalar("acc", self.acc) self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=labels) self.loss = tf.reduce_mean(self.loss_arr) tf.summary.scalar("loss", self.loss)
def __init__(self, hp, voca_size, method, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(data_generator.NLI.nli_info.num_classes) input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) if method in [0, 1, 3, 4, 5, 6]: self.rf_mask = tf.placeholder(tf.float32, [None, seq_length]) elif method in [METHOD_CROSSENT, METHOD_HINGE]: self.rf_mask = tf.placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.embedding_output = self.model.get_embedding_output() self.all_layers = self.model.get_all_encoder_layers()
def __init__(self, hp, voca_size): config = bert.BertConfig(vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) score = tf.placeholder(tf.float32, [None]) self.x_list = [input_ids, input_mask, segment_ids] self.y = score use_one_hot_embeddings = use_tpu is_training = True self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) all_layers = self.model.get_all_encoder_layers() enc = tf.concat(all_layers, axis=2) # [None, seq_len, Hidden_dim * num_blocks] per_token_score = tf.layers.dense(enc[0], 1, name="reg_dense") # [ None, seq_len, 1] self.logits = tf.reduce_sum(per_token_score, axis=1) # [ None, 1] paired = tf.reshape(self.logits, [-1, 2]) y_paired = tf.reshape(self.y, [-1,2]) raw_l = (paired[:, 1] - paired[:, 0]) losses = tf.maximum(hp.alpha - (paired[:, 1] - paired[:, 0]) , 0) self.loss = tf.reduce_mean(losses) gain = tf.maximum(paired[:, 1] - paired[:, 0], 0) self.acc = tf.cast(tf.count_nonzero(gain), tf.float32) / tf.reduce_sum(tf.ones_like(gain)) tf.summary.scalar('loss', self.loss) tf.summary.scalar('acc', self.acc)
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """Creates a classification model.""" model = bert.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) #loss = f1_loss(logits, one_hot_labels) return (loss, per_example_loss, logits, probabilities)
def __init__(self, hp, voca_size, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(2) input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc tf.summary.scalar('loss', self.loss) tf.summary.scalar('acc', self.acc)
def __init__(self, hp, voca_size): config = bert.BertConfig(vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) self.begin = tf.placeholder(tf.int32, [None, seq_length]) self.end = tf.placeholder(tf.int32, [None, seq_length]) self.y = tf.stack([self.begin, self.end], axis=2) self.x_list = [input_ids, input_mask, segment_ids] use_one_hot_embeddings = use_tpu is_training = True self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) enc = self.model.get_sequence_output() enc = tf.layers.dense(enc, hp.hidden_units, name="dense1") # [ None, seq_length, hidden] self.logits = tf.layers.dense(enc, 2, name="dense2") self.sout = tf.nn.softmax(self.logits, axis=1) losses = tf.cast(self.y, tf.float32) * -tf.log(self.sout) # [ None, seq_length ] self.loss = tf.reduce_sum(losses) tf.summary.scalar('loss', self.loss)
def __init__(self, hp, voca_size, method, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(data_generator.NLI.nli_info.num_classes) input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) if method in [0, 1, 3, 4, 5, 6]: self.rf_mask = tf.placeholder(tf.float32, [None, seq_length]) elif method in [METHOD_CROSSENT, METHOD_HINGE]: self.rf_mask = tf.placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc tf.summary.scalar('loss', self.loss) tf.summary.scalar('acc', self.acc) if method == 0: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) cl = tf.nn.sigmoid(cl) # cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl # self.pkc = self.conf_logits * self.rf_mask # rl_loss_list = tf.reduce_sum(self.pkc, axis=1) rl_loss_list = tf.reduce_sum(self.conf_logits * tf.cast(self.rf_mask, tf.float32), axis=1) self.rl_loss = tf.reduce_mean(rl_loss_list) elif method == 1: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl #rl_loss_list = tf_module.cossim(cl, self.rf_mask) #self.pkc = self.conf_logits * self.rf_mask rl_loss_list = tf.reduce_sum(self.conf_logits * self.rf_mask, axis=1) self.rl_loss = tf.reduce_mean(rl_loss_list) elif method == METHOD_CROSSENT: cl = tf.layers.dense(self.model.get_sequence_output(), 2, name="aux_conflict") probs = tf.nn.softmax(cl) losses = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot( self.rf_mask, 2), logits=cl) self.conf_logits = probs[:, :, 1] - 0.5 self.rl_loss = tf.reduce_mean(losses) elif method == 3: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) self.bias = tf.Variable(0.0) self.conf_logits = (cl + self.bias) rl_loss_list = tf.nn.relu(1 - self.conf_logits * self.rf_mask) rl_loss_list = tf.reduce_mean(rl_loss_list, axis=1) self.rl_loss = tf.reduce_mean(rl_loss_list) labels = tf.greater(self.rf_mask, 0) hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits) self.hinge_loss = tf.reduce_sum(hinge_losses) elif method == 4: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl labels = tf.greater(self.rf_mask, 0) hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits) self.rl_loss = hinge_losses elif method == 5: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) #cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl self.labels = tf.cast(tf.greater(self.rf_mask, 0), tf.float32) self.rl_loss = tf.reduce_mean( tf_module.correlation_coefficient_loss(cl, -self.rf_mask)) elif method == 6: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") #cl = tf.layers.dense(cl1, 1, name="aux_conflict2") cl = tf.reshape(cl, [-1, seq_length]) #cl = tf.nn.sigmoid(cl) #cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl #rl_loss_list = tf.reduce_sum(self.conf_logits * self.rf_mask , axis=1) self.rl_loss = tf.reduce_mean( tf_module.correlation_coefficient_loss(cl, -self.rf_mask)) elif method == METHOD_HINGE: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) self.conf_logits = cl labels = tf.greater(self.rf_mask, 0) hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits) self.rl_loss = tf.reduce_sum(hinge_losses) self.conf_softmax = tf.nn.softmax(self.conf_logits, axis=-1)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] dummy_data = features["dummy"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, ) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def __init__(self, hp, voca_size, num_class_list, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = tf.placeholder(tf.int64, [None, seq_length], name="input_ids") input_mask = tf.placeholder(tf.int64, [None, seq_length], name="input_mask") segment_ids = tf.placeholder(tf.int64, [None, seq_length], name="segment_ids") self.x_list = [input_ids, input_mask, segment_ids] self.y1 = tf.placeholder(tf.int64, [None], name="y1") self.y2 = tf.placeholder(tf.int64, [None], name="y2") self.y = [self.y1, self.y2] summary1 = {} summary2 = {} self.summary_list = [summary1, summary2] use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) task = Classification(num_class_list[0]) pred, loss = task.predict(self.model.get_sequence_output(), self.y1, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc summary1['loss1'] = tf.summary.scalar('loss', self.loss) summary1['acc1'] = tf.summary.scalar('acc', self.acc) with tf.variable_scope("cls2"): task2 = Classification(num_class_list[1]) pred, loss = task2.predict(self.model.get_sequence_output(), self.y2, True) self.logits2 = task2.logits self.sout2 = tf.nn.softmax(self.logits2) self.pred2 = pred self.loss2 = loss self.acc2 = task2.acc summary2['loss2'] = tf.summary.scalar('loss2', self.loss2) summary2['acc2'] = tf.summary.scalar('acc2', self.acc2) self.logit_list = [self.logits, self.logits2] self.loss_list = [self.loss, self.loss2] self.pred_list = [self.pred, self.pred2]
def __init__(self, hp, voca_size, method, is_training=True): config = bert.BertConfig(vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(data_generator.NLI.nli_info.num_classes) task2_num_classes = 3 input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) if method in [0,1,3,4,5,6]: self.rf_mask = tf.placeholder(tf.float32, [None, seq_length]) elif method in [2]: self.rf_mask = tf.placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids self.y1 = tf.placeholder(tf.int64, [None], name="y1") self.y2 = tf.placeholder(tf.int64, [None], name="y2") self.f_loc1 = tf.placeholder(tf.int64, [None], name="f_loc1") self.f_loc2 = tf.placeholder(tf.int64, [None], name="f_loc2") use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc #tf.summary.scalar('loss', self.loss) #tf.summary.scalar('acc', self.acc) enc = self.model.get_sequence_output() # [Batch, Seq_len, hidden_dim] logits_raw = tf.layers.dense(enc, 3) # [Batch, seq_len, 3] def select(logits, f_loc): mask = tf.reshape(tf.one_hot(f_loc, seq_length), [-1,seq_length, 1]) # [Batch, seq_len, 1] t = tf.reduce_sum(logits * mask, axis=1) return t logits1 = select(logits_raw, self.f_loc1) # [Batch, 3] logits2 = select(logits_raw, self.f_loc2) # [Batch, 3] self.logits1 = logits1 self.logits2 = logits2 label1 = tf.one_hot(self.y1, task2_num_classes) # [Batch, num_class] label2 = tf.one_hot(self.y2, task2_num_classes) losses1_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits1, labels=label1) losses2_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits2, labels=label2) self.loss_paired = tf.reduce_mean(losses1_arr) #+ tf.reduce_mean(losses2_arr)