def recurrent_encoder_layer(input_ph,
                            seq_len: int,
                            activation_list,
                            bw_cells: List[int],
                            fw_cells: List[int] = None,
                            name: str = "encoder",
                            feature_sizes: List[int] = None,
                            out_size: int = None,
                            out_activation=None):

    if fw_cells is None:
        input_ph = unidirectional_rnn(input_ph=input_ph,
                                      seq_len_ph=seq_len,
                                      num_layers=len(bw_cells),
                                      num_cell_units=bw_cells,
                                      name=name,
                                      activation_list=activation_list,
                                      use_tensorboard=True,
                                      tensorboard_scope=name,
                                      output_size=feature_sizes)
    else:
        input_ph = bidirectional_rnn(input_ph=input_ph,
                                     seq_len_ph=seq_len,
                                     num_layers=len(bw_cells),
                                     num_fw_cell_units=fw_cells,
                                     num_bw_cell_units=bw_cells,
                                     name=name,
                                     activation_fw_list=activation_list,
                                     activation_bw_list=activation_list,
                                     use_tensorboard=True,
                                     tensorboard_scope=name,
                                     output_size=feature_sizes)

    if out_size is not None:
        input_ph = dense_layer(input_ph,
                               num_units=out_size,
                               name=name + '_out',
                               activation=out_activation,
                               use_batch_normalization=False,
                               train_ph=True,
                               use_tensorboard=True,
                               keep_prob=1,
                               tensorboard_scope=name)

    return input_ph
示例#2
0
def model_fn(features, labels, mode, config, params):

    feature = features['feature']
    feat_len = features['feat_len']
    sparse_target = labels

    global_step = tf.train.get_global_step()

    with tf.name_scope("seq_len"):
        input_features_length = feat_len

    with tf.name_scope("input_features"):
        input_features = feature

    with tf.name_scope("input_labels"):
        input_labels = sparse_target

    subsample_factor = params["num_reduce_by_half"]
    if subsample_factor is not None and subsample_factor > 0:
        for i in range(subsample_factor):
            input_features_length = tf.div(input_features_length, 2) + tf.cast(
                input_features_length % 2, dtype=tf.int32)
            input_features = input_features[:, ::2]

    if params['noise_stddev'] is not None and params['noise_stddev'] != 0.0:
        input_features = tf.keras.layers.GaussianNoise(
            stddev=params['noise_stddev'])(
                inputs=input_features,
                training=mode == tf.estimator.ModeKeys.TRAIN)

    rnn_input = tf.identity(input_features)
    with tf.name_scope("dense_layer_1"):
        rnn_input = dense_multilayer(
            input_ph=rnn_input,
            num_layers=params['num_dense_layers_1'],
            num_units=params['num_units_1'],
            name='dense_layer_1',
            activation_list=params['dense_activations_1'],
            use_batch_normalization=params['batch_normalization_1'],
            batch_normalization_trainable=params[
                'batch_normalization_trainable_1'],
            train_ph=mode == tf.estimator.ModeKeys.TRAIN,
            use_tensorboard=True,
            keep_prob_list=params['keep_prob_1'],
            kernel_initializers=params['kernel_init_1'],
            bias_initializers=params['bias_init_1'],
            tensorboard_scope='dense_layer_1')

    with tf.name_scope("RNN_cell"):
        if params['is_bidirectional']:
            rnn_outputs = bidirectional_rnn(
                input_ph=rnn_input,
                seq_len_ph=input_features_length,
                num_layers=len(params['num_cell_units']),
                num_cell_units=params['num_cell_units'],
                activation_list=params['cell_activation'],
                use_tensorboard=True,
                tensorboard_scope='RNN',
                train_ph=mode == tf.estimator.ModeKeys.TRAIN,
                keep_prob_list=params['keep_prob_rnn'],
                use_batch_normalization=params["rnn_batch_normalization"] ==
                True)

        else:
            rnn_outputs = unidirectional_rnn(
                input_ph=rnn_input,
                seq_len_ph=input_features_length,
                num_layers=len(params['num_cell_units']),
                num_cell_units=params['num_cell_units'],
                activation_list=params['cell_activation'],
                use_tensorboard=True,
                tensorboard_scope='RNN',
                train_ph=mode == tf.estimator.ModeKeys.TRAIN,
                keep_prob_list=params['keep_prob_rnn'],
                use_batch_normalization=params["rnn_batch_normalization"] ==
                True)

    with tf.name_scope("dense_layer_2"):
        rnn_outputs = dense_multilayer(
            input_ph=rnn_outputs,
            num_layers=params['num_dense_layers_2'],
            num_units=params['num_units_2'],
            name='dense_layer_2',
            activation_list=params['dense_activations_2'],
            use_batch_normalization=params['batch_normalization_2'],
            batch_normalization_trainable=params[
                'batch_normalization_trainable_2'],
            train_ph=mode == tf.estimator.ModeKeys.TRAIN,
            use_tensorboard=True,
            keep_prob_list=params['keep_prob_2'],
            kernel_initializers=params['kernel_init_2'],
            bias_initializers=params['bias_init_2'],
            tensorboard_scope='dense_layer_2',
            # batch_normalization_training=True
        )

    with tf.name_scope("dense_output"):
        dense_output_no_activation = dense_layer(
            input_ph=rnn_outputs,
            num_units=params['num_classes'],
            name='dense_output_no_activation',
            activation=None,
            use_batch_normalization=False,
            train_ph=False,
            use_tensorboard=True,
            keep_prob=1,
            tensorboard_scope='dense_output')

        dense_output = tf.nn.softmax(dense_output_no_activation,
                                     name='dense_output')
        tf.summary.histogram('dense_output', dense_output)

    with tf.name_scope("decoder"):
        output_time_major = tf.transpose(dense_output, (1, 0, 2))
        if params['beam_width'] == 0:
            decoded, log_prob = tf.nn.ctc_greedy_decoder(output_time_major,
                                                         input_features_length,
                                                         merge_repeated=True)
        else:
            decoded, log_prob = tf.nn.ctc_beam_search_decoder(
                output_time_major,
                input_features_length,
                beam_width=params['beam_width'],
                top_paths=1,
                merge_repeated=False)
        dense_decoded = tf.sparse.to_dense(sp_input=decoded[0],
                                           validate_indices=True)

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=dense_decoded)

    with tf.name_scope("loss"):
        rnn_loss = 0
        for var in tf.trainable_variables():
            if var.name.startswith('RNN_cell') and 'kernel' in var.name:
                rnn_loss += tf.nn.l2_loss(var)

        dense_loss = 0
        for var in tf.trainable_variables():
            if var.name.startswith('dense_layer') or \
                    var.name.startswith('input_dense_layer') and \
                    'kernel' in var.name:
                dense_loss += tf.nn.l2_loss(var)

        loss = tf.nn.ctc_loss(input_labels,
                              dense_output_no_activation,
                              input_features_length,
                              time_major=False)
        logits_loss = tf.reduce_mean(tf.reduce_sum(loss))
        loss = logits_loss \
               + params['rnn_regularizer'] * rnn_loss \
               + params['dense_regularizer'] * dense_loss
        tf.summary.scalar('loss', loss)

    with tf.name_scope("label_error_rate"):
        # Inaccuracy: label error rate
        ler = tf.reduce_mean(
            tf.edit_distance(hypothesis=tf.cast(decoded[0], tf.int32),
                             truth=input_labels,
                             normalize=True))
        metrics = {
            'LER': tf.metrics.mean(ler),
        }
        tf.summary.scalar('label_error_rate', tf.reduce_mean(ler))

    logging_hook = tf.train.LoggingTensorHook(tensors={
        "loss": loss,
        "ler": ler,
    },
                                              every_n_iter=1)

    if mode == tf.estimator.ModeKeys.TRAIN:
        if params['use_learning_rate_decay']:
            learning_rate = tf.train.exponential_decay(
                params['learning_rate'],
                global_step,
                decay_steps=params['learning_rate_decay_steps'],
                decay_rate=params['learning_rate_decay'],
                staircase=True)
        else:
            learning_rate = params['learning_rate']

        if params['optimizer'] == 'sgd':
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        elif params['optimizer'] == 'momentum' and params[
                'momentum'] is not None:
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=params['momentum'])
        elif params['optimizer'] == 'rms':
            optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
        else:
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

        loss = tf.tuple([loss],
                        control_inputs=tf.get_collection(
                            tf.GraphKeys.UPDATE_OPS))[0]
        if params['clip_gradient'] != 0:
            grads = tf.gradients(loss, tf.trainable_variables())
            grads, _ = tf.clip_by_global_norm(grads, params['clip_gradient'])
            grads_and_vars = list(zip(grads, tf.trainable_variables()))
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
        else:
            train_op = optimizer.minimize(loss, global_step=global_step)

        train_logging_hook = tf.train.LoggingTensorHook(
            tensors={
                'loss': loss,
                'ler': tf.reduce_mean(ler),
                'learning_rate': tf.reduce_mean(learning_rate),
                # 'feal_len': feat_len,
                # 'feal_len2': input_features_length,
                # 'feal_len3': tf.shape(input_features),
            },
            every_n_secs=1)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=[train_logging_hook],
                                          eval_metric_ops=metrics)

    if mode == tf.estimator.ModeKeys.EVAL:

        def _create_alignment_images_summary(outputs):
            images = outputs
            images = tf.expand_dims(images, -1)
            # Scale to range [0, 255]
            images -= 1
            images = -images
            images *= 255
            summary = tf.summary.image("alignment_images", images)
            return summary

        with tf.name_scope('alignment'):
            alignment_summary = _create_alignment_images_summary(dense_output)

        eval_summary_hook = tf.train.SummarySaverHook(
            save_steps=10,
            output_dir=os.path.join(config.model_dir, 'eval'),
            summary_op=alignment_summary)

        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            evaluation_hooks=[logging_hook, eval_summary_hook],
            eval_metric_ops=metrics)
    def create_graph(self):

        with self.graph.as_default():
            self.tf_is_traing_pl = tf.placeholder_with_default(
                True, shape=(), name='is_training')

            with tf.name_scope("seq_len"):
                self.seq_len = tf.placeholder(tf.int32,
                                              shape=[None],
                                              name="sequence_length")

            with tf.name_scope("input_features"):
                self.input_feature = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, None, self.network_data.num_features],
                    name="input")
                tf.summary.image('feature', [tf.transpose(self.input_feature)])
            with tf.name_scope("input_labels"):
                self.input_label = tf.sparse_placeholder(dtype=tf.int32,
                                                         shape=[None, None],
                                                         name="input_label")

            self.dense_layer_1 = tf.identity(self.input_feature)
            with tf.name_scope("dense_layer_1"):
                self.dense_layer_1 = dense_multilayer(
                    input_ph=self.dense_layer_1,
                    num_layers=self.network_data.num_dense_layers_1,
                    num_units=self.network_data.num_dense_units_1,
                    name='dense_layer_1',
                    activation_list=self.network_data.dense_activations_1,
                    use_batch_normalization=self.network_data.
                    batch_normalization_1,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_1,
                    kernel_initializers=self.network_data.kernel_init_1,
                    bias_initializers=self.network_data.bias_init_1,
                    tensorboard_scope='dense_layer_1')

            with tf.name_scope("RNN_1"):
                if self.network_data.is_bidirectional_1:
                    self.rnn_outputs_1 = bidirectional_rnn(
                        input_ph=self.dense_layer_1,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_fw_cell_units_1),
                        num_fw_cell_units=self.network_data.
                        num_fw_cell_units_1,
                        num_bw_cell_units=self.network_data.
                        num_bw_cell_units_1,
                        name="RNN_1",
                        activation_fw_list=self.network_data.
                        cell_fw_activation_1,
                        activation_bw_list=self.network_data.
                        cell_bw_activation_1,
                        use_tensorboard=True,
                        tensorboard_scope='RNN_1',
                        output_size=self.network_data.rnn_output_sizes_1)

                else:
                    self.rnn_outputs_1 = unidirectional_rnn(
                        input_ph=self.dense_layer_1,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_cell_units_1),
                        num_cell_units=self.network_data.num_cell_units_1,
                        name="RNN_1",
                        activation_list=self.network_data.cell_activation_1,
                        use_tensorboard=True,
                        tensorboard_scope='RNN_1',
                        output_size=self.network_data.rnn_output_sizes_1)

            with tf.name_scope("dense_layer_2"):
                self.dense_layer_2 = dense_multilayer(
                    input_ph=self.rnn_outputs_1,
                    num_layers=self.network_data.num_dense_layers_2,
                    num_units=self.network_data.num_dense_units_2,
                    name='dense_layer_2',
                    activation_list=self.network_data.dense_activations_2,
                    use_batch_normalization=self.network_data.
                    batch_normalization_2,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_2,
                    kernel_initializers=self.network_data.kernel_init_2,
                    bias_initializers=self.network_data.bias_init_2,
                    tensorboard_scope='dense_layer_2')

            with tf.name_scope("dense_output_1"):
                self.dense_output_no_activation_1 = dense_layer(
                    input_ph=self.dense_layer_2,
                    num_units=self.network_data.num_classes,
                    name='dense_output_no_activation_1',
                    activation=None,
                    use_batch_normalization=False,
                    train_ph=False,
                    use_tensorboard=True,
                    keep_prob=1,
                    tensorboard_scope='dense_output_1')

                self.dense_output_1 = tf.nn.softmax(
                    self.dense_output_no_activation_1, name='dense_output_1')
                tf.summary.histogram('dense_output_1', self.dense_output_1)

            with tf.name_scope("decoder_1"):
                self.output_time_major_1 = tf.transpose(
                    self.dense_output_1, (1, 0, 2))
                self.decoded_1, log_prob = self.network_data.decoder_function(
                    self.output_time_major_1, self.seq_len)
                self.dense_decoded_1 = tf.sparse_to_dense(
                    self.decoded_1[0].indices, self.decoded_1[0].dense_shape,
                    self.decoded_1[0].values)

            with tf.name_scope("dense_layer_3"):
                self.dense_layer_3 = dense_multilayer(
                    input_ph=self.dense_output_1,
                    num_layers=self.network_data.num_dense_layers_3,
                    num_units=self.network_data.num_dense_units_3,
                    name='dense_layer_3',
                    activation_list=self.network_data.dense_activations_3,
                    use_batch_normalization=self.network_data.
                    batch_normalization_3,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_3,
                    kernel_initializers=self.network_data.kernel_init_3,
                    bias_initializers=self.network_data.bias_init_3,
                    tensorboard_scope='dense_layer_3')

            with tf.name_scope("RNN_2"):
                if self.network_data.is_bidirectional_2:
                    self.rnn_outputs_2 = bidirectional_rnn(
                        input_ph=self.dense_layer_3,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_fw_cell_units_2),
                        num_fw_cell_units=self.network_data.
                        num_fw_cell_units_2,
                        num_bw_cell_units=self.network_data.
                        num_bw_cell_units_2,
                        name="RNN_2",
                        activation_fw_list=self.network_data.
                        cell_fw_activation_2,
                        activation_bw_list=self.network_data.
                        cell_bw_activation_2,
                        use_tensorboard=True,
                        tensorboard_scope='RNN_2',
                        output_size=self.network_data.rnn_output_sizes_2)

                else:
                    self.rnn_outputs_2 = unidirectional_rnn(
                        input_ph=self.dense_layer_3,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_cell_units_2),
                        num_cell_units=self.network_data.num_cell_units_2,
                        name="RNN_2",
                        activation_list=self.network_data.cell_activation_2,
                        use_tensorboard=True,
                        tensorboard_scope='RNN_2',
                        output_size=self.network_data.rnn_output_sizes_2)

            with tf.name_scope("dense_layer_4"):
                self.dense_layer_4 = dense_multilayer(
                    input_ph=self.rnn_outputs_2,
                    num_layers=self.network_data.num_dense_layers_4,
                    num_units=self.network_data.num_dense_units_4,
                    name='dense_layer_4',
                    activation_list=self.network_data.dense_activations_4,
                    use_batch_normalization=self.network_data.
                    batch_normalization_4,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_4,
                    kernel_initializers=self.network_data.kernel_init_4,
                    bias_initializers=self.network_data.bias_init_4,
                    tensorboard_scope='dense_layer_4')

            with tf.name_scope("dense_output_2"):
                self.dense_output_no_activation_2 = dense_layer(
                    input_ph=self.dense_layer_4,
                    num_units=self.network_data.num_classes,
                    name='dense_output_no_activation_2',
                    activation=None,
                    use_batch_normalization=False,
                    train_ph=False,
                    use_tensorboard=True,
                    keep_prob=1,
                    tensorboard_scope='dense_output_no_activation_2')

                self.dense_output_2 = tf.nn.softmax(
                    self.dense_output_no_activation_2, name='dense_output_2')
                tf.summary.histogram('dense_output_2', self.dense_output_2)

            with tf.name_scope("decoder_2"):
                self.output_time_major_2 = tf.transpose(
                    self.dense_output_2, (1, 0, 2))
                self.decoded_2, log_prob = self.network_data.decoder_function(
                    self.output_time_major_2, self.seq_len)
                self.dense_decoded_2 = tf.sparse_to_dense(
                    self.decoded_2[0].indices, self.decoded_2[0].dense_shape,
                    self.decoded_2[0].values)

            with tf.name_scope("loss"):
                rnn_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith('RNN_') and 'kernel' in var.name:
                        rnn_loss += tf.nn.l2_loss(var)

                dense_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith('dense_layer') or \
                            var.name.startswith('dense_layer') and \
                            'kernel' in var.name:
                        dense_loss += tf.nn.l2_loss(var)

                loss_1 = tf.nn.ctc_loss(self.input_label,
                                        self.dense_output_no_activation_1,
                                        self.seq_len,
                                        time_major=False)
                loss_2 = tf.nn.ctc_loss(self.input_label,
                                        self.dense_output_no_activation_2,
                                        self.seq_len,
                                        time_major=False)
                self.logits_loss = tf.reduce_mean(tf.reduce_sum(
                    loss_2)) + 0.3 * tf.reduce_mean(tf.reduce_sum(loss_1))
                self.loss = self.logits_loss \
                            + self.network_data.rnn_regularizer * rnn_loss \
                            + self.network_data.dense_regularizer * dense_loss
                tf.summary.scalar('loss', self.loss)

            # define the optimizer
            with tf.name_scope("training"):
                self.training_op = self.network_data.optimizer.minimize(
                    self.loss)

            with tf.name_scope("label_error_rate"):
                # Inaccuracy: label error rate
                self.ler = tf.reduce_mean(
                    tf.edit_distance(hypothesis=tf.cast(
                        self.decoded_2[0], tf.int32),
                                     truth=self.input_label,
                                     normalize=True))
                tf.summary.scalar('label_error_rate', tf.reduce_mean(self.ler))

            self.checkpoint_saver = tf.train.Saver(save_relative_paths=True)
            self.merged_summary = tf.summary.merge_all()
    def create_graph(self):

        with self.graph.as_default():
            self.tf_is_traing_pl = tf.placeholder_with_default(
                True, shape=(), name='is_training')

            with tf.name_scope("seq_len"):
                self.seq_len = tf.placeholder(tf.int32,
                                              shape=[None],
                                              name="sequence_length")

            with tf.name_scope("input_features"):
                self.input_feature = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, None, self.network_data.num_features],
                    name="input")
                tf.summary.image('feature', [tf.transpose(self.input_feature)])
            with tf.name_scope("input_labels"):
                self.input_label = tf.sparse_placeholder(dtype=tf.int32,
                                                         shape=[None, None],
                                                         name="input_label")

            self.dense_layer_1 = tf.identity(self.input_feature)
            with tf.name_scope("dense_layer_1"):
                self.dense_layer_1 = dense_multilayer(
                    input_ph=self.dense_layer_1,
                    num_layers=self.network_data.num_dense_layers_1,
                    num_units=self.network_data.num_dense_units_1,
                    name='dense_layer_1',
                    activation_list=self.network_data.dense_activations_1,
                    use_batch_normalization=self.network_data.
                    batch_normalization_1,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_1,
                    kernel_initializers=self.network_data.kernel_init_1,
                    bias_initializers=self.network_data.bias_init_1,
                    tensorboard_scope='dense_layer_1')

            with tf.name_scope("RNN_cell"):
                if self.network_data.is_bidirectional:
                    self.rnn_outputs = bidirectional_rnn(
                        input_ph=self.dense_layer_1,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_fw_cell_units),
                        num_fw_cell_units=self.network_data.num_fw_cell_units,
                        num_bw_cell_units=self.network_data.num_bw_cell_units,
                        name="RNN_cell",
                        activation_fw_list=self.network_data.
                        cell_fw_activation,
                        activation_bw_list=self.network_data.
                        cell_bw_activation,
                        use_tensorboard=True,
                        tensorboard_scope='RNN',
                        output_size=self.network_data.rnn_output_sizes)

                else:
                    self.rnn_outputs = unidirectional_rnn(
                        input_ph=self.dense_layer_1,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_cell_units),
                        num_cell_units=self.network_data.num_cell_units,
                        name="RNN_cell",
                        activation_list=self.network_data.cell_activation,
                        use_tensorboard=True,
                        tensorboard_scope='RNN',
                        output_size=self.network_data.rnn_output_sizes)

            with tf.name_scope("dense_layer_2"):
                self.dense_layer_2 = dense_multilayer(
                    input_ph=self.rnn_outputs,
                    num_layers=self.network_data.num_dense_layers_2,
                    num_units=self.network_data.num_dense_units_2,
                    name='dense_layer_2',
                    activation_list=self.network_data.dense_activations_2,
                    use_batch_normalization=self.network_data.
                    batch_normalization_2,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_2,
                    kernel_initializers=self.network_data.kernel_init_2,
                    bias_initializers=self.network_data.bias_init_2,
                    tensorboard_scope='dense_layer_2')

            with tf.name_scope("dense_output"):
                self.dense_output_no_activation = dense_layer(
                    input_ph=self.rnn_outputs,
                    num_units=self.network_data.num_classes,
                    name='dense_output_no_activation',
                    activation=None,
                    use_batch_normalization=False,
                    train_ph=False,
                    use_tensorboard=True,
                    keep_prob=1,
                    tensorboard_scope='dense_output')

                self.dense_output = tf.nn.softmax(
                    self.dense_output_no_activation, name='dense_output')
                tf.summary.histogram('dense_output', self.dense_output)

            with tf.name_scope("output_classes"):
                self.output_classes = tf.argmax(self.dense_output, 2)

            with tf.name_scope("loss"):
                rnn_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith(
                            'RNN_cell') and 'kernel' in var.name:
                        rnn_loss += tf.nn.l2_loss(var)

                dense_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith(
                            'dense_layer') and 'kernel' in var.name:
                        dense_loss += tf.nn.l2_loss(var)

                loss = tf.nn.ctc_loss(self.input_label,
                                      self.dense_output_no_activation,
                                      self.seq_len,
                                      time_major=False)
                self.logits_loss = tf.reduce_mean(tf.reduce_sum(loss))
                self.loss = self.logits_loss \
                            + self.network_data.rnn_regularizer * rnn_loss \
                            + self.network_data.dense_regularizer * dense_loss
                tf.summary.scalar('loss', self.loss)

            # define the optimizer
            with tf.name_scope("training"):
                self.training_op = self.network_data.optimizer.minimize(
                    self.loss)

            with tf.name_scope("decoder"):
                self.output_time_major = tf.transpose(self.dense_output,
                                                      (1, 0, 2))

                self.word_beam_search_module = tf.load_op_library(
                    self.network_data.word_beam_search_path)
                # prepare information about language (dictionary, characters in dataset, characters forming words)
                chars = str().join(self.network_data.char_list)
                word_chars = open(self.network_data.word_char_list_path).read(
                ).splitlines()[0]
                corpus = open(self.network_data.corpus_path).read()

                # decode using the "Words" mode of word beam search
                self.decoded = self.word_beam_search_module.word_beam_search(
                    self.output_time_major, self.network_data.beam_width,
                    self.network_data.scoring_mode,
                    self.network_data.smoothing, corpus.encode('utf8'),
                    chars.encode('utf8'), word_chars.encode('utf8'))

            with tf.name_scope("label_error_rate"):
                # No es la mejor forma de calcular el LER, pero ya probé varias y esta fue la que mejor anduvo
                # Inaccuracy: label error rate
                dense_label = tf.sparse_to_dense(self.input_label.indices,
                                                 self.input_label.dense_shape,
                                                 self.input_label.values)
                # (self.network_data.num_classes-1) its the blank index
                decoded_mask = tf.not_equal(self.decoded,
                                            self.network_data.num_classes - 1)
                decoded_mask.set_shape([None, None])
                decoded_mask = tf.boolean_mask(self.decoded, decoded_mask)

                label_mask = tf.not_equal(dense_label,
                                          self.network_data.num_classes - 1)
                label_mask.set_shape([None, None])
                label_mask = tf.boolean_mask(dense_label, label_mask)

                self.edit_distance = tf.edit_distance(
                    hypothesis=tf.cast(
                        tf.contrib.layers.dense_to_sparse([decoded_mask]),
                        tf.int32),
                    truth=tf.cast(
                        tf.contrib.layers.dense_to_sparse([label_mask]),
                        tf.int32),
                    normalize=True)
                self.ler = tf.reduce_mean(self.edit_distance)
                tf.summary.scalar('label_error_rate', tf.reduce_mean(self.ler))

            self.checkpoint_saver = tf.train.Saver(save_relative_paths=True)
            self.merged_summary = tf.summary.merge_all()
示例#5
0
    def create_graph(self):
        with self.graph.as_default():
            self.tf_is_traing_pl = tf.placeholder_with_default(
                True, shape=(), name='is_training')

            with tf.name_scope("input_features"):
                self.input_feature = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, None, self.network_data.input_features],
                    name="input_features")
                tf.summary.image('input_features',
                                 [tf.transpose(self.input_feature)])

            with tf.name_scope("output_features"):
                self.output_feature = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, None, self.network_data.input_features],
                    name="output_features")

            with tf.name_scope("encoder"):
                self.encoder_out = encoder_layer(
                    input_ph=self.input_feature,
                    num_layers=self.network_data.encoder_num_layers,
                    num_units=self.network_data.encoder_num_units,
                    activation_list=self.network_data.encoder_activation,
                    use_batch_normalization=self.network_data.
                    encoder_batch_norm,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.encoder_keep_prob,
                    tensorboard_scope='encoder',
                    name="encoder")

            with tf.name_scope("decoder"):
                self.decoder_out = decoder_layer(
                    input_ph=self.encoder_out,
                    num_layers=self.network_data.decoder_num_layers,
                    num_units=self.network_data.decoder_num_units,
                    activation_list=self.network_data.decoder_activation,
                    use_batch_normalization=self.network_data.
                    decoder_batch_norm,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.decoder_keep_prob,
                    tensorboard_scope='decoder',
                    name="decoder")

            with tf.name_scope('reconstructed'):
                self.reconstructed_out = dense_layer(
                    self.decoder_out, self.network_data.input_features,
                    "reconstruction",
                    self.network_data.reconstruction_activation, False, True,
                    True, 1, 'reconstructed')

            with tf.name_scope("loss"):
                encoder_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith('encoder') and 'kernel' in var.name:
                        encoder_loss += tf.nn.l2_loss(var)

                decoder_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith('decoder') and 'kernel' in var.name:
                        decoder_loss += tf.nn.l2_loss(var)

                self.reconstruction_loss = tf.reduce_mean(
                    tf.reduce_sum(tf.square(
                        tf.subtract(self.output_feature,
                                    self.reconstructed_out)),
                                  axis=1))

                self.loss = self.reconstruction_loss \
                            + self.network_data.encoder_regularizer * encoder_loss \
                            + self.network_data.decoder_regularizer * decoder_loss
                tf.summary.scalar('loss', self.loss)

            # define the optimizer
            with tf.name_scope("optimization"):
                self.optimizer = self.network_data.optimizer.minimize(
                    self.loss)

            self.checkpoint_saver = tf.train.Saver(save_relative_paths=True)
            self.merged_summary = tf.summary.merge_all()
示例#6
0
    def create_graph(self,
                     use_tfrecords=False,
                     features_tensor=None,
                     labels_tensor=None,
                     features_len_tensor=None):

        with self.graph.as_default():
            self.tf_is_traing_pl = tf.placeholder_with_default(True, shape=(), name='is_training')

            with tf.name_scope("seq_len"):
                if not use_tfrecords:
                    self.input_features_length = tf.placeholder(tf.int32, shape=[None], name="sequence_length")
                else:
                    self.input_features_length = features_len_tensor

            with tf.name_scope("input_features"):
                if not use_tfrecords:
                    self.input_features = tf.placeholder(
                        dtype=tf.float32,
                        shape=[None, None, self.network_data.num_features],
                        name="input")
                else:
                    self.input_features = features_tensor

            with tf.name_scope("input_labels"):
                if not use_tfrecords:
                    self.input_labels = tf.sparse_placeholder(
                        dtype=tf.int32,
                        shape=[None, None],
                        name="input_label")
                else:
                    self.input_labels = labels_tensor

            self.rnn_input = tf.identity(self.input_features)
            with tf.name_scope("dense_layer_1"):
                self.rnn_input = dense_multilayer(input_ph=self.rnn_input,
                                                  num_layers=self.network_data.num_dense_layers_1,
                                                  num_units=self.network_data.num_units_1,
                                                  name='dense_layer_1',
                                                  activation_list=self.network_data.dense_activations_1,
                                                  use_batch_normalization=self.network_data.batch_normalization_1,
                                                  train_ph=self.tf_is_traing_pl,
                                                  use_tensorboard=True,
                                                  keep_prob_list=self.network_data.keep_prob_1,
                                                  kernel_initializers=self.network_data.kernel_init_1,
                                                  bias_initializers=self.network_data.bias_init_1,
                                                  tensorboard_scope='dense_layer_1')

            with tf.name_scope("RNN_cell"):
                if self.network_data.is_bidirectional:
                    self.rnn_outputs = bidirectional_rnn(
                        input_ph=self.rnn_input,
                        seq_len_ph=self.input_features_length,
                        num_layers=len(self.network_data.num_fw_cell_units),
                        num_fw_cell_units=self.network_data.num_fw_cell_units,
                        num_bw_cell_units=self.network_data.num_bw_cell_units,
                        name="RNN_cell",
                        activation_fw_list=self.network_data.cell_fw_activation,
                        activation_bw_list=self.network_data.cell_bw_activation,
                        use_tensorboard=True,
                        tensorboard_scope='RNN',
                        output_size=self.network_data.rnn_output_sizes)

                else:
                    self.rnn_outputs = unidirectional_rnn(
                        input_ph=self.rnn_input,
                        seq_len_ph=self.input_features_length,
                        num_layers=len(self.network_data.num_cell_units),
                        num_cell_units=self.network_data.num_cell_units,
                        name="RNN_cell",
                        activation_list=self.network_data.cell_activation,
                        use_tensorboard=True,
                        tensorboard_scope='RNN',
                        output_size=self.network_data.rnn_output_sizes)

            with tf.name_scope("dense_layer_2"):
                self.rnn_outputs = dense_multilayer(input_ph=self.rnn_outputs,
                                                    num_layers=self.network_data.num_dense_layers_2,
                                                    num_units=self.network_data.num_units_2,
                                                    name='dense_layer_2',
                                                    activation_list=self.network_data.dense_activations_2,
                                                    use_batch_normalization=self.network_data.batch_normalization_2,
                                                    train_ph=self.tf_is_traing_pl,
                                                    use_tensorboard=True,
                                                    keep_prob_list=self.network_data.keep_prob_2,
                                                    kernel_initializers=self.network_data.kernel_init_2,
                                                    bias_initializers=self.network_data.bias_init_2,
                                                    tensorboard_scope='dense_layer_2')

            with tf.name_scope("dense_output"):
                self.dense_output_no_activation = dense_layer(input_ph=self.rnn_outputs,
                                                              num_units=self.network_data.num_classes,
                                                              name='dense_output_no_activation',
                                                              activation=None,
                                                              use_batch_normalization=False,
                                                              train_ph=False,
                                                              use_tensorboard=True,
                                                              keep_prob=1,
                                                              tensorboard_scope='dense_output')

                self.dense_output = tf.nn.softmax(self.dense_output_no_activation, name='dense_output')
                tf.summary.histogram('dense_output', self.dense_output)

            with tf.name_scope("loss"):
                rnn_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith('RNN_cell') and 'kernel' in var.name:
                        rnn_loss += tf.nn.l2_loss(var)

                dense_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith('dense_layer') or \
                            var.name.startswith('input_dense_layer') and \
                            'kernel' in var.name:
                        dense_loss += tf.nn.l2_loss(var)

                loss = tf.nn.ctc_loss(self.input_labels, self.dense_output_no_activation, self.input_features_length, time_major=False)
                self.logits_loss = tf.reduce_mean(tf.reduce_sum(loss))
                self.loss = self.logits_loss \
                            + self.network_data.rnn_regularizer * rnn_loss \
                            + self.network_data.dense_regularizer * dense_loss
                tf.summary.scalar('loss', self.loss)

            # define the optimizer
            with tf.name_scope("training"):
                self.train_op = self.network_data.optimizer.minimize(self.loss)

            with tf.name_scope("decoder"):
                self.output_time_major = tf.transpose(self.dense_output, (1, 0, 2))
                self.decoded, log_prob = self.network_data.decoder_function(self.output_time_major, self.input_features_length)

            with tf.name_scope("label_error_rate"):
                # Inaccuracy: label error rate
                self.ler = tf.reduce_mean(tf.edit_distance(hypothesis=tf.cast(self.decoded[0], tf.int32),
                                                           truth=self.input_labels,
                                                           normalize=True))
                tf.summary.scalar('label_error_rate', tf.reduce_mean(self.ler))

            self.checkpoint_saver = tf.train.Saver(save_relative_paths=True)
            self.merged_summary = tf.summary.merge_all()