def recurrent_encoder_layer(input_ph, seq_len: int, activation_list, bw_cells: List[int], fw_cells: List[int] = None, name: str = "encoder", feature_sizes: List[int] = None, out_size: int = None, out_activation=None): if fw_cells is None: input_ph = unidirectional_rnn(input_ph=input_ph, seq_len_ph=seq_len, num_layers=len(bw_cells), num_cell_units=bw_cells, name=name, activation_list=activation_list, use_tensorboard=True, tensorboard_scope=name, output_size=feature_sizes) else: input_ph = bidirectional_rnn(input_ph=input_ph, seq_len_ph=seq_len, num_layers=len(bw_cells), num_fw_cell_units=fw_cells, num_bw_cell_units=bw_cells, name=name, activation_fw_list=activation_list, activation_bw_list=activation_list, use_tensorboard=True, tensorboard_scope=name, output_size=feature_sizes) if out_size is not None: input_ph = dense_layer(input_ph, num_units=out_size, name=name + '_out', activation=out_activation, use_batch_normalization=False, train_ph=True, use_tensorboard=True, keep_prob=1, tensorboard_scope=name) return input_ph
def model_fn(features, labels, mode, config, params): feature = features['feature'] feat_len = features['feat_len'] sparse_target = labels global_step = tf.train.get_global_step() with tf.name_scope("seq_len"): input_features_length = feat_len with tf.name_scope("input_features"): input_features = feature with tf.name_scope("input_labels"): input_labels = sparse_target subsample_factor = params["num_reduce_by_half"] if subsample_factor is not None and subsample_factor > 0: for i in range(subsample_factor): input_features_length = tf.div(input_features_length, 2) + tf.cast( input_features_length % 2, dtype=tf.int32) input_features = input_features[:, ::2] if params['noise_stddev'] is not None and params['noise_stddev'] != 0.0: input_features = tf.keras.layers.GaussianNoise( stddev=params['noise_stddev'])( inputs=input_features, training=mode == tf.estimator.ModeKeys.TRAIN) rnn_input = tf.identity(input_features) with tf.name_scope("dense_layer_1"): rnn_input = dense_multilayer( input_ph=rnn_input, num_layers=params['num_dense_layers_1'], num_units=params['num_units_1'], name='dense_layer_1', activation_list=params['dense_activations_1'], use_batch_normalization=params['batch_normalization_1'], batch_normalization_trainable=params[ 'batch_normalization_trainable_1'], train_ph=mode == tf.estimator.ModeKeys.TRAIN, use_tensorboard=True, keep_prob_list=params['keep_prob_1'], kernel_initializers=params['kernel_init_1'], bias_initializers=params['bias_init_1'], tensorboard_scope='dense_layer_1') with tf.name_scope("RNN_cell"): if params['is_bidirectional']: rnn_outputs = bidirectional_rnn( input_ph=rnn_input, seq_len_ph=input_features_length, num_layers=len(params['num_cell_units']), num_cell_units=params['num_cell_units'], activation_list=params['cell_activation'], use_tensorboard=True, tensorboard_scope='RNN', train_ph=mode == tf.estimator.ModeKeys.TRAIN, keep_prob_list=params['keep_prob_rnn'], use_batch_normalization=params["rnn_batch_normalization"] == True) else: rnn_outputs = unidirectional_rnn( input_ph=rnn_input, seq_len_ph=input_features_length, num_layers=len(params['num_cell_units']), num_cell_units=params['num_cell_units'], activation_list=params['cell_activation'], use_tensorboard=True, tensorboard_scope='RNN', train_ph=mode == tf.estimator.ModeKeys.TRAIN, keep_prob_list=params['keep_prob_rnn'], use_batch_normalization=params["rnn_batch_normalization"] == True) with tf.name_scope("dense_layer_2"): rnn_outputs = dense_multilayer( input_ph=rnn_outputs, num_layers=params['num_dense_layers_2'], num_units=params['num_units_2'], name='dense_layer_2', activation_list=params['dense_activations_2'], use_batch_normalization=params['batch_normalization_2'], batch_normalization_trainable=params[ 'batch_normalization_trainable_2'], train_ph=mode == tf.estimator.ModeKeys.TRAIN, use_tensorboard=True, keep_prob_list=params['keep_prob_2'], kernel_initializers=params['kernel_init_2'], bias_initializers=params['bias_init_2'], tensorboard_scope='dense_layer_2', # batch_normalization_training=True ) with tf.name_scope("dense_output"): dense_output_no_activation = dense_layer( input_ph=rnn_outputs, num_units=params['num_classes'], name='dense_output_no_activation', activation=None, use_batch_normalization=False, train_ph=False, use_tensorboard=True, keep_prob=1, tensorboard_scope='dense_output') dense_output = tf.nn.softmax(dense_output_no_activation, name='dense_output') tf.summary.histogram('dense_output', dense_output) with tf.name_scope("decoder"): output_time_major = tf.transpose(dense_output, (1, 0, 2)) if params['beam_width'] == 0: decoded, log_prob = tf.nn.ctc_greedy_decoder(output_time_major, input_features_length, merge_repeated=True) else: decoded, log_prob = tf.nn.ctc_beam_search_decoder( output_time_major, input_features_length, beam_width=params['beam_width'], top_paths=1, merge_repeated=False) dense_decoded = tf.sparse.to_dense(sp_input=decoded[0], validate_indices=True) if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode, predictions=dense_decoded) with tf.name_scope("loss"): rnn_loss = 0 for var in tf.trainable_variables(): if var.name.startswith('RNN_cell') and 'kernel' in var.name: rnn_loss += tf.nn.l2_loss(var) dense_loss = 0 for var in tf.trainable_variables(): if var.name.startswith('dense_layer') or \ var.name.startswith('input_dense_layer') and \ 'kernel' in var.name: dense_loss += tf.nn.l2_loss(var) loss = tf.nn.ctc_loss(input_labels, dense_output_no_activation, input_features_length, time_major=False) logits_loss = tf.reduce_mean(tf.reduce_sum(loss)) loss = logits_loss \ + params['rnn_regularizer'] * rnn_loss \ + params['dense_regularizer'] * dense_loss tf.summary.scalar('loss', loss) with tf.name_scope("label_error_rate"): # Inaccuracy: label error rate ler = tf.reduce_mean( tf.edit_distance(hypothesis=tf.cast(decoded[0], tf.int32), truth=input_labels, normalize=True)) metrics = { 'LER': tf.metrics.mean(ler), } tf.summary.scalar('label_error_rate', tf.reduce_mean(ler)) logging_hook = tf.train.LoggingTensorHook(tensors={ "loss": loss, "ler": ler, }, every_n_iter=1) if mode == tf.estimator.ModeKeys.TRAIN: if params['use_learning_rate_decay']: learning_rate = tf.train.exponential_decay( params['learning_rate'], global_step, decay_steps=params['learning_rate_decay_steps'], decay_rate=params['learning_rate_decay'], staircase=True) else: learning_rate = params['learning_rate'] if params['optimizer'] == 'sgd': optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) elif params['optimizer'] == 'momentum' and params[ 'momentum'] is not None: optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=params['momentum']) elif params['optimizer'] == 'rms': optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) else: optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) loss = tf.tuple([loss], control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS))[0] if params['clip_gradient'] != 0: grads = tf.gradients(loss, tf.trainable_variables()) grads, _ = tf.clip_by_global_norm(grads, params['clip_gradient']) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) else: train_op = optimizer.minimize(loss, global_step=global_step) train_logging_hook = tf.train.LoggingTensorHook( tensors={ 'loss': loss, 'ler': tf.reduce_mean(ler), 'learning_rate': tf.reduce_mean(learning_rate), # 'feal_len': feat_len, # 'feal_len2': input_features_length, # 'feal_len3': tf.shape(input_features), }, every_n_secs=1) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=[train_logging_hook], eval_metric_ops=metrics) if mode == tf.estimator.ModeKeys.EVAL: def _create_alignment_images_summary(outputs): images = outputs images = tf.expand_dims(images, -1) # Scale to range [0, 255] images -= 1 images = -images images *= 255 summary = tf.summary.image("alignment_images", images) return summary with tf.name_scope('alignment'): alignment_summary = _create_alignment_images_summary(dense_output) eval_summary_hook = tf.train.SummarySaverHook( save_steps=10, output_dir=os.path.join(config.model_dir, 'eval'), summary_op=alignment_summary) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, evaluation_hooks=[logging_hook, eval_summary_hook], eval_metric_ops=metrics)
def create_graph(self): with self.graph.as_default(): self.tf_is_traing_pl = tf.placeholder_with_default( True, shape=(), name='is_training') with tf.name_scope("seq_len"): self.seq_len = tf.placeholder(tf.int32, shape=[None], name="sequence_length") with tf.name_scope("input_features"): self.input_feature = tf.placeholder( dtype=tf.float32, shape=[None, None, self.network_data.num_features], name="input") tf.summary.image('feature', [tf.transpose(self.input_feature)]) with tf.name_scope("input_labels"): self.input_label = tf.sparse_placeholder(dtype=tf.int32, shape=[None, None], name="input_label") self.dense_layer_1 = tf.identity(self.input_feature) with tf.name_scope("dense_layer_1"): self.dense_layer_1 = dense_multilayer( input_ph=self.dense_layer_1, num_layers=self.network_data.num_dense_layers_1, num_units=self.network_data.num_dense_units_1, name='dense_layer_1', activation_list=self.network_data.dense_activations_1, use_batch_normalization=self.network_data. batch_normalization_1, train_ph=self.tf_is_traing_pl, use_tensorboard=True, keep_prob_list=self.network_data.keep_dropout_1, kernel_initializers=self.network_data.kernel_init_1, bias_initializers=self.network_data.bias_init_1, tensorboard_scope='dense_layer_1') with tf.name_scope("RNN_1"): if self.network_data.is_bidirectional_1: self.rnn_outputs_1 = bidirectional_rnn( input_ph=self.dense_layer_1, seq_len_ph=self.seq_len, num_layers=len(self.network_data.num_fw_cell_units_1), num_fw_cell_units=self.network_data. num_fw_cell_units_1, num_bw_cell_units=self.network_data. num_bw_cell_units_1, name="RNN_1", activation_fw_list=self.network_data. cell_fw_activation_1, activation_bw_list=self.network_data. cell_bw_activation_1, use_tensorboard=True, tensorboard_scope='RNN_1', output_size=self.network_data.rnn_output_sizes_1) else: self.rnn_outputs_1 = unidirectional_rnn( input_ph=self.dense_layer_1, seq_len_ph=self.seq_len, num_layers=len(self.network_data.num_cell_units_1), num_cell_units=self.network_data.num_cell_units_1, name="RNN_1", activation_list=self.network_data.cell_activation_1, use_tensorboard=True, tensorboard_scope='RNN_1', output_size=self.network_data.rnn_output_sizes_1) with tf.name_scope("dense_layer_2"): self.dense_layer_2 = dense_multilayer( input_ph=self.rnn_outputs_1, num_layers=self.network_data.num_dense_layers_2, num_units=self.network_data.num_dense_units_2, name='dense_layer_2', activation_list=self.network_data.dense_activations_2, use_batch_normalization=self.network_data. batch_normalization_2, train_ph=self.tf_is_traing_pl, use_tensorboard=True, keep_prob_list=self.network_data.keep_dropout_2, kernel_initializers=self.network_data.kernel_init_2, bias_initializers=self.network_data.bias_init_2, tensorboard_scope='dense_layer_2') with tf.name_scope("dense_output_1"): self.dense_output_no_activation_1 = dense_layer( input_ph=self.dense_layer_2, num_units=self.network_data.num_classes, name='dense_output_no_activation_1', activation=None, use_batch_normalization=False, train_ph=False, use_tensorboard=True, keep_prob=1, tensorboard_scope='dense_output_1') self.dense_output_1 = tf.nn.softmax( self.dense_output_no_activation_1, name='dense_output_1') tf.summary.histogram('dense_output_1', self.dense_output_1) with tf.name_scope("decoder_1"): self.output_time_major_1 = tf.transpose( self.dense_output_1, (1, 0, 2)) self.decoded_1, log_prob = self.network_data.decoder_function( self.output_time_major_1, self.seq_len) self.dense_decoded_1 = tf.sparse_to_dense( self.decoded_1[0].indices, self.decoded_1[0].dense_shape, self.decoded_1[0].values) with tf.name_scope("dense_layer_3"): self.dense_layer_3 = dense_multilayer( input_ph=self.dense_output_1, num_layers=self.network_data.num_dense_layers_3, num_units=self.network_data.num_dense_units_3, name='dense_layer_3', activation_list=self.network_data.dense_activations_3, use_batch_normalization=self.network_data. batch_normalization_3, train_ph=self.tf_is_traing_pl, use_tensorboard=True, keep_prob_list=self.network_data.keep_dropout_3, kernel_initializers=self.network_data.kernel_init_3, bias_initializers=self.network_data.bias_init_3, tensorboard_scope='dense_layer_3') with tf.name_scope("RNN_2"): if self.network_data.is_bidirectional_2: self.rnn_outputs_2 = bidirectional_rnn( input_ph=self.dense_layer_3, seq_len_ph=self.seq_len, num_layers=len(self.network_data.num_fw_cell_units_2), num_fw_cell_units=self.network_data. num_fw_cell_units_2, num_bw_cell_units=self.network_data. num_bw_cell_units_2, name="RNN_2", activation_fw_list=self.network_data. cell_fw_activation_2, activation_bw_list=self.network_data. cell_bw_activation_2, use_tensorboard=True, tensorboard_scope='RNN_2', output_size=self.network_data.rnn_output_sizes_2) else: self.rnn_outputs_2 = unidirectional_rnn( input_ph=self.dense_layer_3, seq_len_ph=self.seq_len, num_layers=len(self.network_data.num_cell_units_2), num_cell_units=self.network_data.num_cell_units_2, name="RNN_2", activation_list=self.network_data.cell_activation_2, use_tensorboard=True, tensorboard_scope='RNN_2', output_size=self.network_data.rnn_output_sizes_2) with tf.name_scope("dense_layer_4"): self.dense_layer_4 = dense_multilayer( input_ph=self.rnn_outputs_2, num_layers=self.network_data.num_dense_layers_4, num_units=self.network_data.num_dense_units_4, name='dense_layer_4', activation_list=self.network_data.dense_activations_4, use_batch_normalization=self.network_data. batch_normalization_4, train_ph=self.tf_is_traing_pl, use_tensorboard=True, keep_prob_list=self.network_data.keep_dropout_4, kernel_initializers=self.network_data.kernel_init_4, bias_initializers=self.network_data.bias_init_4, tensorboard_scope='dense_layer_4') with tf.name_scope("dense_output_2"): self.dense_output_no_activation_2 = dense_layer( input_ph=self.dense_layer_4, num_units=self.network_data.num_classes, name='dense_output_no_activation_2', activation=None, use_batch_normalization=False, train_ph=False, use_tensorboard=True, keep_prob=1, tensorboard_scope='dense_output_no_activation_2') self.dense_output_2 = tf.nn.softmax( self.dense_output_no_activation_2, name='dense_output_2') tf.summary.histogram('dense_output_2', self.dense_output_2) with tf.name_scope("decoder_2"): self.output_time_major_2 = tf.transpose( self.dense_output_2, (1, 0, 2)) self.decoded_2, log_prob = self.network_data.decoder_function( self.output_time_major_2, self.seq_len) self.dense_decoded_2 = tf.sparse_to_dense( self.decoded_2[0].indices, self.decoded_2[0].dense_shape, self.decoded_2[0].values) with tf.name_scope("loss"): rnn_loss = 0 for var in tf.trainable_variables(): if var.name.startswith('RNN_') and 'kernel' in var.name: rnn_loss += tf.nn.l2_loss(var) dense_loss = 0 for var in tf.trainable_variables(): if var.name.startswith('dense_layer') or \ var.name.startswith('dense_layer') and \ 'kernel' in var.name: dense_loss += tf.nn.l2_loss(var) loss_1 = tf.nn.ctc_loss(self.input_label, self.dense_output_no_activation_1, self.seq_len, time_major=False) loss_2 = tf.nn.ctc_loss(self.input_label, self.dense_output_no_activation_2, self.seq_len, time_major=False) self.logits_loss = tf.reduce_mean(tf.reduce_sum( loss_2)) + 0.3 * tf.reduce_mean(tf.reduce_sum(loss_1)) self.loss = self.logits_loss \ + self.network_data.rnn_regularizer * rnn_loss \ + self.network_data.dense_regularizer * dense_loss tf.summary.scalar('loss', self.loss) # define the optimizer with tf.name_scope("training"): self.training_op = self.network_data.optimizer.minimize( self.loss) with tf.name_scope("label_error_rate"): # Inaccuracy: label error rate self.ler = tf.reduce_mean( tf.edit_distance(hypothesis=tf.cast( self.decoded_2[0], tf.int32), truth=self.input_label, normalize=True)) tf.summary.scalar('label_error_rate', tf.reduce_mean(self.ler)) self.checkpoint_saver = tf.train.Saver(save_relative_paths=True) self.merged_summary = tf.summary.merge_all()
def create_graph(self): with self.graph.as_default(): self.tf_is_traing_pl = tf.placeholder_with_default( True, shape=(), name='is_training') with tf.name_scope("seq_len"): self.seq_len = tf.placeholder(tf.int32, shape=[None], name="sequence_length") with tf.name_scope("input_features"): self.input_feature = tf.placeholder( dtype=tf.float32, shape=[None, None, self.network_data.num_features], name="input") tf.summary.image('feature', [tf.transpose(self.input_feature)]) with tf.name_scope("input_labels"): self.input_label = tf.sparse_placeholder(dtype=tf.int32, shape=[None, None], name="input_label") self.dense_layer_1 = tf.identity(self.input_feature) with tf.name_scope("dense_layer_1"): self.dense_layer_1 = dense_multilayer( input_ph=self.dense_layer_1, num_layers=self.network_data.num_dense_layers_1, num_units=self.network_data.num_dense_units_1, name='dense_layer_1', activation_list=self.network_data.dense_activations_1, use_batch_normalization=self.network_data. batch_normalization_1, train_ph=self.tf_is_traing_pl, use_tensorboard=True, keep_prob_list=self.network_data.keep_dropout_1, kernel_initializers=self.network_data.kernel_init_1, bias_initializers=self.network_data.bias_init_1, tensorboard_scope='dense_layer_1') with tf.name_scope("RNN_cell"): if self.network_data.is_bidirectional: self.rnn_outputs = bidirectional_rnn( input_ph=self.dense_layer_1, seq_len_ph=self.seq_len, num_layers=len(self.network_data.num_fw_cell_units), num_fw_cell_units=self.network_data.num_fw_cell_units, num_bw_cell_units=self.network_data.num_bw_cell_units, name="RNN_cell", activation_fw_list=self.network_data. cell_fw_activation, activation_bw_list=self.network_data. cell_bw_activation, use_tensorboard=True, tensorboard_scope='RNN', output_size=self.network_data.rnn_output_sizes) else: self.rnn_outputs = unidirectional_rnn( input_ph=self.dense_layer_1, seq_len_ph=self.seq_len, num_layers=len(self.network_data.num_cell_units), num_cell_units=self.network_data.num_cell_units, name="RNN_cell", activation_list=self.network_data.cell_activation, use_tensorboard=True, tensorboard_scope='RNN', output_size=self.network_data.rnn_output_sizes) with tf.name_scope("dense_layer_2"): self.dense_layer_2 = dense_multilayer( input_ph=self.rnn_outputs, num_layers=self.network_data.num_dense_layers_2, num_units=self.network_data.num_dense_units_2, name='dense_layer_2', activation_list=self.network_data.dense_activations_2, use_batch_normalization=self.network_data. batch_normalization_2, train_ph=self.tf_is_traing_pl, use_tensorboard=True, keep_prob_list=self.network_data.keep_dropout_2, kernel_initializers=self.network_data.kernel_init_2, bias_initializers=self.network_data.bias_init_2, tensorboard_scope='dense_layer_2') with tf.name_scope("dense_output"): self.dense_output_no_activation = dense_layer( input_ph=self.rnn_outputs, num_units=self.network_data.num_classes, name='dense_output_no_activation', activation=None, use_batch_normalization=False, train_ph=False, use_tensorboard=True, keep_prob=1, tensorboard_scope='dense_output') self.dense_output = tf.nn.softmax( self.dense_output_no_activation, name='dense_output') tf.summary.histogram('dense_output', self.dense_output) with tf.name_scope("output_classes"): self.output_classes = tf.argmax(self.dense_output, 2) with tf.name_scope("loss"): rnn_loss = 0 for var in tf.trainable_variables(): if var.name.startswith( 'RNN_cell') and 'kernel' in var.name: rnn_loss += tf.nn.l2_loss(var) dense_loss = 0 for var in tf.trainable_variables(): if var.name.startswith( 'dense_layer') and 'kernel' in var.name: dense_loss += tf.nn.l2_loss(var) loss = tf.nn.ctc_loss(self.input_label, self.dense_output_no_activation, self.seq_len, time_major=False) self.logits_loss = tf.reduce_mean(tf.reduce_sum(loss)) self.loss = self.logits_loss \ + self.network_data.rnn_regularizer * rnn_loss \ + self.network_data.dense_regularizer * dense_loss tf.summary.scalar('loss', self.loss) # define the optimizer with tf.name_scope("training"): self.training_op = self.network_data.optimizer.minimize( self.loss) with tf.name_scope("decoder"): self.output_time_major = tf.transpose(self.dense_output, (1, 0, 2)) self.word_beam_search_module = tf.load_op_library( self.network_data.word_beam_search_path) # prepare information about language (dictionary, characters in dataset, characters forming words) chars = str().join(self.network_data.char_list) word_chars = open(self.network_data.word_char_list_path).read( ).splitlines()[0] corpus = open(self.network_data.corpus_path).read() # decode using the "Words" mode of word beam search self.decoded = self.word_beam_search_module.word_beam_search( self.output_time_major, self.network_data.beam_width, self.network_data.scoring_mode, self.network_data.smoothing, corpus.encode('utf8'), chars.encode('utf8'), word_chars.encode('utf8')) with tf.name_scope("label_error_rate"): # No es la mejor forma de calcular el LER, pero ya probé varias y esta fue la que mejor anduvo # Inaccuracy: label error rate dense_label = tf.sparse_to_dense(self.input_label.indices, self.input_label.dense_shape, self.input_label.values) # (self.network_data.num_classes-1) its the blank index decoded_mask = tf.not_equal(self.decoded, self.network_data.num_classes - 1) decoded_mask.set_shape([None, None]) decoded_mask = tf.boolean_mask(self.decoded, decoded_mask) label_mask = tf.not_equal(dense_label, self.network_data.num_classes - 1) label_mask.set_shape([None, None]) label_mask = tf.boolean_mask(dense_label, label_mask) self.edit_distance = tf.edit_distance( hypothesis=tf.cast( tf.contrib.layers.dense_to_sparse([decoded_mask]), tf.int32), truth=tf.cast( tf.contrib.layers.dense_to_sparse([label_mask]), tf.int32), normalize=True) self.ler = tf.reduce_mean(self.edit_distance) tf.summary.scalar('label_error_rate', tf.reduce_mean(self.ler)) self.checkpoint_saver = tf.train.Saver(save_relative_paths=True) self.merged_summary = tf.summary.merge_all()
def create_graph(self): with self.graph.as_default(): self.tf_is_traing_pl = tf.placeholder_with_default( True, shape=(), name='is_training') with tf.name_scope("input_features"): self.input_feature = tf.placeholder( dtype=tf.float32, shape=[None, None, self.network_data.input_features], name="input_features") tf.summary.image('input_features', [tf.transpose(self.input_feature)]) with tf.name_scope("output_features"): self.output_feature = tf.placeholder( dtype=tf.float32, shape=[None, None, self.network_data.input_features], name="output_features") with tf.name_scope("encoder"): self.encoder_out = encoder_layer( input_ph=self.input_feature, num_layers=self.network_data.encoder_num_layers, num_units=self.network_data.encoder_num_units, activation_list=self.network_data.encoder_activation, use_batch_normalization=self.network_data. encoder_batch_norm, train_ph=self.tf_is_traing_pl, use_tensorboard=True, keep_prob_list=self.network_data.encoder_keep_prob, tensorboard_scope='encoder', name="encoder") with tf.name_scope("decoder"): self.decoder_out = decoder_layer( input_ph=self.encoder_out, num_layers=self.network_data.decoder_num_layers, num_units=self.network_data.decoder_num_units, activation_list=self.network_data.decoder_activation, use_batch_normalization=self.network_data. decoder_batch_norm, train_ph=self.tf_is_traing_pl, use_tensorboard=True, keep_prob_list=self.network_data.decoder_keep_prob, tensorboard_scope='decoder', name="decoder") with tf.name_scope('reconstructed'): self.reconstructed_out = dense_layer( self.decoder_out, self.network_data.input_features, "reconstruction", self.network_data.reconstruction_activation, False, True, True, 1, 'reconstructed') with tf.name_scope("loss"): encoder_loss = 0 for var in tf.trainable_variables(): if var.name.startswith('encoder') and 'kernel' in var.name: encoder_loss += tf.nn.l2_loss(var) decoder_loss = 0 for var in tf.trainable_variables(): if var.name.startswith('decoder') and 'kernel' in var.name: decoder_loss += tf.nn.l2_loss(var) self.reconstruction_loss = tf.reduce_mean( tf.reduce_sum(tf.square( tf.subtract(self.output_feature, self.reconstructed_out)), axis=1)) self.loss = self.reconstruction_loss \ + self.network_data.encoder_regularizer * encoder_loss \ + self.network_data.decoder_regularizer * decoder_loss tf.summary.scalar('loss', self.loss) # define the optimizer with tf.name_scope("optimization"): self.optimizer = self.network_data.optimizer.minimize( self.loss) self.checkpoint_saver = tf.train.Saver(save_relative_paths=True) self.merged_summary = tf.summary.merge_all()
def create_graph(self, use_tfrecords=False, features_tensor=None, labels_tensor=None, features_len_tensor=None): with self.graph.as_default(): self.tf_is_traing_pl = tf.placeholder_with_default(True, shape=(), name='is_training') with tf.name_scope("seq_len"): if not use_tfrecords: self.input_features_length = tf.placeholder(tf.int32, shape=[None], name="sequence_length") else: self.input_features_length = features_len_tensor with tf.name_scope("input_features"): if not use_tfrecords: self.input_features = tf.placeholder( dtype=tf.float32, shape=[None, None, self.network_data.num_features], name="input") else: self.input_features = features_tensor with tf.name_scope("input_labels"): if not use_tfrecords: self.input_labels = tf.sparse_placeholder( dtype=tf.int32, shape=[None, None], name="input_label") else: self.input_labels = labels_tensor self.rnn_input = tf.identity(self.input_features) with tf.name_scope("dense_layer_1"): self.rnn_input = dense_multilayer(input_ph=self.rnn_input, num_layers=self.network_data.num_dense_layers_1, num_units=self.network_data.num_units_1, name='dense_layer_1', activation_list=self.network_data.dense_activations_1, use_batch_normalization=self.network_data.batch_normalization_1, train_ph=self.tf_is_traing_pl, use_tensorboard=True, keep_prob_list=self.network_data.keep_prob_1, kernel_initializers=self.network_data.kernel_init_1, bias_initializers=self.network_data.bias_init_1, tensorboard_scope='dense_layer_1') with tf.name_scope("RNN_cell"): if self.network_data.is_bidirectional: self.rnn_outputs = bidirectional_rnn( input_ph=self.rnn_input, seq_len_ph=self.input_features_length, num_layers=len(self.network_data.num_fw_cell_units), num_fw_cell_units=self.network_data.num_fw_cell_units, num_bw_cell_units=self.network_data.num_bw_cell_units, name="RNN_cell", activation_fw_list=self.network_data.cell_fw_activation, activation_bw_list=self.network_data.cell_bw_activation, use_tensorboard=True, tensorboard_scope='RNN', output_size=self.network_data.rnn_output_sizes) else: self.rnn_outputs = unidirectional_rnn( input_ph=self.rnn_input, seq_len_ph=self.input_features_length, num_layers=len(self.network_data.num_cell_units), num_cell_units=self.network_data.num_cell_units, name="RNN_cell", activation_list=self.network_data.cell_activation, use_tensorboard=True, tensorboard_scope='RNN', output_size=self.network_data.rnn_output_sizes) with tf.name_scope("dense_layer_2"): self.rnn_outputs = dense_multilayer(input_ph=self.rnn_outputs, num_layers=self.network_data.num_dense_layers_2, num_units=self.network_data.num_units_2, name='dense_layer_2', activation_list=self.network_data.dense_activations_2, use_batch_normalization=self.network_data.batch_normalization_2, train_ph=self.tf_is_traing_pl, use_tensorboard=True, keep_prob_list=self.network_data.keep_prob_2, kernel_initializers=self.network_data.kernel_init_2, bias_initializers=self.network_data.bias_init_2, tensorboard_scope='dense_layer_2') with tf.name_scope("dense_output"): self.dense_output_no_activation = dense_layer(input_ph=self.rnn_outputs, num_units=self.network_data.num_classes, name='dense_output_no_activation', activation=None, use_batch_normalization=False, train_ph=False, use_tensorboard=True, keep_prob=1, tensorboard_scope='dense_output') self.dense_output = tf.nn.softmax(self.dense_output_no_activation, name='dense_output') tf.summary.histogram('dense_output', self.dense_output) with tf.name_scope("loss"): rnn_loss = 0 for var in tf.trainable_variables(): if var.name.startswith('RNN_cell') and 'kernel' in var.name: rnn_loss += tf.nn.l2_loss(var) dense_loss = 0 for var in tf.trainable_variables(): if var.name.startswith('dense_layer') or \ var.name.startswith('input_dense_layer') and \ 'kernel' in var.name: dense_loss += tf.nn.l2_loss(var) loss = tf.nn.ctc_loss(self.input_labels, self.dense_output_no_activation, self.input_features_length, time_major=False) self.logits_loss = tf.reduce_mean(tf.reduce_sum(loss)) self.loss = self.logits_loss \ + self.network_data.rnn_regularizer * rnn_loss \ + self.network_data.dense_regularizer * dense_loss tf.summary.scalar('loss', self.loss) # define the optimizer with tf.name_scope("training"): self.train_op = self.network_data.optimizer.minimize(self.loss) with tf.name_scope("decoder"): self.output_time_major = tf.transpose(self.dense_output, (1, 0, 2)) self.decoded, log_prob = self.network_data.decoder_function(self.output_time_major, self.input_features_length) with tf.name_scope("label_error_rate"): # Inaccuracy: label error rate self.ler = tf.reduce_mean(tf.edit_distance(hypothesis=tf.cast(self.decoded[0], tf.int32), truth=self.input_labels, normalize=True)) tf.summary.scalar('label_error_rate', tf.reduce_mean(self.ler)) self.checkpoint_saver = tf.train.Saver(save_relative_paths=True) self.merged_summary = tf.summary.merge_all()