def test_sequence_accuracy_identical_samples(self): labels_tf = tf.convert_to_tensor(self._fake_labels()) accuracy_tf = metrics.sequence_accuracy(labels_tf, labels_tf, self.rej_char) with self.initialized_session() as sess: accuracy_np = sess.run(accuracy_tf) self.assertAlmostEqual(accuracy_np, 1.0)
def test_sequence_accuracy_one_char_difference(self): ground_truth_np = self._fake_labels() ground_truth_tf = tf.convert_to_tensor(ground_truth_np) prediction_tf = tf.convert_to_tensor( self._incorrect_copy(ground_truth_np, bad_indexes=((0, 0)))) accuracy_tf = metrics.sequence_accuracy(prediction_tf, ground_truth_tf, self.rej_char) with self.initialized_session() as sess: accuracy_np = sess.run(accuracy_tf) # 1 of 4 sequences is incorrect. self.assertAlmostEqual(accuracy_np, 1.0 - 1.0 / self.batch_size)
def main(_): prepare_training_dir() dataset = common_flags.create_dataset(split_name=FLAGS.split_name) print(dataset) model = common_flags.create_model(dataset.num_char_classes, dataset.max_sequence_length, dataset.num_of_views, dataset.null_code) hparams = get_training_hparams() # If ps_tasks is zero, the local device is used. When using multiple # (non-local) replicas, the ReplicaDeviceSetter distributes the variables # across the different devices. device_setter = tf.train.replica_device_setter(FLAGS.ps_tasks, merge_devices=True) with tf.device(device_setter): data = data_provider.get_data( dataset, FLAGS.batch_size, augment=hparams.use_augment_input, central_crop_size=common_flags.get_crop_size(), use_default_augment=hparams.use_default_augment) endpoints = model.create_base(data.images, data.labels_one_hot) total_loss = model.create_loss(data, endpoints) CharacterAccuracy = metrics.char_accuracy( endpoints.predicted_chars, data.labels, streaming=True, rej_char=model._params.null_code) SequenceAccuracy = metrics.sequence_accuracy( endpoints.predicted_chars, data.labels, streaming=True, rej_char=model._params.null_code) model.create_summaries(data, endpoints, dataset.charset, is_training=True) init_fn = model.create_init_fn_to_restore(FLAGS.checkpoint, FLAGS.checkpoint_inception) if FLAGS.show_graph_stats: logging.info('Total number of weights in the graph: %s', calculate_graph_metrics()) train(total_loss, init_fn, CharacterAccuracy, SequenceAccuracy, hparams)
def create_summaries(self, data, endpoints, charset, is_training): """Creates all summaries for the model. Args: data: InputEndpoints namedtuple. endpoints: OutputEndpoints namedtuple. charset: A dictionary with mapping between character codes and unicode characters. Use the one provided by a dataset.charset. is_training: If True will create summary prefixes for training job, otherwise - for evaluation. Returns: A list of evaluation ops """ def sname(label): prefix = 'train' if is_training else 'eval' return '%s/%s' % (prefix, label) max_outputs = 4 # TODO(gorban): uncomment, when tf.summary.text released. # charset_mapper = CharsetMapper(charset) # pr_text = charset_mapper.get_text( # endpoints.predicted_chars[:max_outputs,:]) # tf.summary.text(sname('text/pr'), pr_text) # gt_text = charset_mapper.get_text(data.labels[:max_outputs,:]) # tf.summary.text(sname('text/gt'), gt_text) tf.summary.image(sname('image'), data.images, max_outputs=max_outputs) if is_training: tf.summary.image( sname('image/orig'), data.images_orig, max_outputs=max_outputs) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) return None else: names_to_values = {} names_to_updates = {} def use_metric(name, value_update_tuple): names_to_values[name] = value_update_tuple[0] names_to_updates[name] = value_update_tuple[1] use_metric('CharacterAccuracy', metrics.char_accuracy( endpoints.predicted_chars, data.labels, streaming=True, rej_char=self._params.null_code)) # Sequence accuracy computed by cutting sequence at the first null char use_metric('SequenceAccuracy', metrics.sequence_accuracy( endpoints.predicted_chars, data.labels, streaming=True, rej_char=self._params.null_code)) for name, value in names_to_values.items(): summary_name = 'eval/' + name tf.summary.scalar(summary_name, tf.Print(value, [value], summary_name)) return list(names_to_updates.values())
def convolutional_attention_network(self, vocab_size, max_seq=25, batch_size=8): """ Builds the graph """ inputs = tf.placeholder(tf.float32, [batch_size, 128, 400, 3]) output = tf.placeholder(tf.int32, [batch_size, max_seq]) length = tf.placeholder(tf.int32, [batch_size]) resnet_34 = ResNet(34, 10) def resnet_34_backbone(x): out = resnet_34.network(x) print(out) return out feature_map_resnet = resnet_34_backbone( inputs) #feature map of resnet 34 feature_map = transform_dimension(feature_map_resnet, 1024) for i in range(6): global_representation = bottle_resblock( feature_map_resnet if i == 0 else global_representation, 512, scope='bottle_resblock_' + str(i)) global_representation = global_avg_pooling(global_representation) global_representation = fully_conneted(global_representation, 512) ##########################################################DECODER######################################## def decoder_embedding(y, vocab_size, embed_size=512, shifted=True): embeddings = tf.random_normal(shape=(vocab_size, embed_size)) embedded = tf.nn.embedding_lookup(embeddings, y) return embedded def positional_encoding(x): seq_len, dim = x.get_shape().as_list()[-2:] encoded_vec = np.array([ pos / np.power(10000, 2 * i / dim) for pos in range(seq_len) for i in range(dim) ]) encoded_vec[::2] = np.sin(encoded_vec[::2]) encoded_vec[1::2] = np.cos(encoded_vec[1::2]) encoded_vec_tensor = tf.convert_to_tensor(encoded_vec.reshape( [seq_len, dim]), dtype=tf.float32) return tf.add(x, encoded_vec_tensor) def layer_norm(x): return tf.contrib.layers.layer_norm(x) y = decoder_embedding(output, vocab_size) y = tf.pad(y, [[0, 0], [1, 0], [0, 0] ])[:, :-1, :] #shift right from official transformer y = positional_encoding(y) #(bs, seq_len, 512) #concatenate with global representation decoder_input = [] for i in range(y.get_shape().as_list()[1]): decoder_input.append( tf.concat([global_representation, y[:, i, :]], 1)) #(bs, 1, 512) decoder_input = tf.stack(decoder_input, 1) #(bs, seq_len, 1024) ####MASKED SELF ATTENTION### masked_self_attention = Attention(dropout=0) decoder_output = masked_self_attention.multi_head( decoder_input, decoder_input, decoder_input) norm_1 = layer_norm(decoder_output) decoder_output = decoder_input + norm_1 ###2D self attention### two_D_attention = Attention(masked=False, dropout=0) enc_reshape = tf.reshape(feature_map, [ decoder_output.get_shape().as_list()[0], -1, decoder_output.get_shape().as_list()[-1] ]) decoder_output_2 = two_D_attention.multi_head(decoder_output, enc_reshape, enc_reshape) norm_2 = layer_norm(decoder_output_2) decoder_output = decoder_output + norm_2 def position_wise_feed_forward_network(x): #using conv1D # First linear linear_1 = tf.layers.conv1d(x, 2048, 1) # ReLU operation relu_1 = tf.nn.relu(linear_1) # Second linear linear_2 = tf.layers.conv1d(relu_1, x.get_shape().as_list()[-1], 1) return tf.nn.dropout(linear_2, 1) pwff = position_wise_feed_forward_network(decoder_output) norm_3 = layer_norm(pwff) decoder_output = decoder_output + norm_3 output_probabilities = tf.layers.dense(decoder_output, vocab_size) loss = self._compute_loss(output_probabilities, output, length, batch_size) ids, log_probs, scores = self.char_predictions(output_probabilities, vocab_size, max_seq) char_acc = char_accuracy(ids, output, 0) word_acc = sequence_accuracy(ids, output, 0) with tf.name_scope('summaries'): tf.summary.scalar("loss", loss, collections=["train_summary"]) tf.summary.scalar("character accuracy", char_acc, collections=["train_summary"]) tf.summary.scalar("word accuracy", word_acc, collections=["train_summary"]) summary_op = tf.summary.merge_all(key='train_summary') optimizer = tf.train.AdadeltaOptimizer(learning_rate=1).minimize(loss) init = tf.global_variables_initializer() return inputs, output, length, loss, optimizer, output_probabilities, summary_op, init, word_acc
def create_summaries(self, data, endpoints, charset, is_training): """Creates all summaries for the model. Args: data: InputEndpoints namedtuple. endpoints: OutputEndpoints namedtuple. charset: A dictionary with mapping between character codes and unicode characters. Use the one provided by a dataset.charset. is_training: If True will create summary prefixes for training job, otherwise - for evaluation. Returns: A list of evaluation ops """ def sname(label): prefix = 'train' if is_training else 'eval' return '%s/%s' % (prefix, label) max_outputs = 4 # TODO(gorban): uncomment, when tf.summary.text released. # charset_mapper = CharsetMapper(charset) # pr_text = charset_mapper.get_text( # endpoints.predicted_chars[:max_outputs,:]) # tf.summary.text(sname('text/pr'), pr_text) # gt_text = charset_mapper.get_text(data.labels[:max_outputs,:]) # tf.summary.text(sname('text/gt'), gt_text) tf.summary.image(sname('image'), data.images, max_outputs=max_outputs) if is_training: tf.summary.image( sname('image/orig'), data.images_orig, max_outputs=max_outputs) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) return None else: names_to_values = {} names_to_updates = {} def use_metric(name, value_update_tuple): names_to_values[name] = value_update_tuple[0] names_to_updates[name] = value_update_tuple[1] use_metric('CharacterAccuracy', metrics.char_accuracy( endpoints.predicted_chars, data.labels, streaming=True, rej_char=self._params.null_code)) # Sequence accuracy computed by cutting sequence at the first null char use_metric('SequenceAccuracy', metrics.sequence_accuracy( endpoints.predicted_chars, data.labels, streaming=True, rej_char=self._params.null_code)) for name, value in names_to_values.iteritems(): summary_name = 'eval/' + name tf.summary.scalar(summary_name, tf.Print(value, [value], summary_name)) return names_to_updates.values()