Exemplo n.º 1
0
    def _decode_lambda(self, args):
        """
        Decoding within tensorflow graph.
        In case kenlm_directory is specified, a modified version of tensorflow 
        (available at https://github.com/timediv/tensorflow-with-kenlm) 
        is needed to run that extends ctc_decode to use a kenlm decoder.
        :return: 
            Most probable decoded sequence.  Important: blank labels are returned as `-1`. 
        """
        import tensorflow as tf

        prediction_batch, prediction_lengths = args

        log_prediction_batch = tf.log(
            tf.transpose(prediction_batch, perm=[1, 0, 2]) + 1e-8)
        prediction_length_batch = tf.to_int32(
            tf.squeeze(prediction_lengths, axis=[1]))

        (decoded, log_prob) = self.ctc_get_decoded_and_log_probability_batch(
            log_prediction_batch, prediction_length_batch)

        return single([
            tf.sparse_to_dense(st.indices,
                               st.dense_shape,
                               st.values,
                               default_value=-1) for st in decoded
        ])
Exemplo n.º 2
0
                def word_with_id(transcription: json) -> Tuple[str, int]:
                    labels = transcription["labels"]

                    matching_labels = [
                        label for label in labels
                        if label["name"] in label_names
                    ]

                    if len(matching_labels) == 0:
                        raise Exception(
                            "No matching label names, found {} instead.".
                            format([label["name"] for label in labels]))

                    matching_label = single(matching_labels)
                    return matching_label["value"], transcription["id"]
Exemplo n.º 3
0
 def get_predicted_graphemes_and_loss_batch(self):
     return backend.function(
         self.loss_net.inputs + [backend.learning_phase()],
         [single(self.decoding_net.outputs),
          single(self.loss_net.outputs)])
Exemplo n.º 4
0
    def __init__(
            self,
            input_size_per_time_step: int,
            allowed_characters: List[chr],
            use_raw_wave_input: bool = False,
            activation: str = "relu",
            output_activation: str = "softmax",
            optimizer: Optimizer = Adam(1e-4),
            dropout: Optional[float] = None,
            load_model_from_directory: Optional[Path] = None,
            load_epoch: Optional[int] = None,
            allowed_characters_for_loaded_model: Optional[List[chr]] = None,
            frozen_layer_count: int = 0,
            reinitialize_trainable_loaded_layers: bool = False,
            use_asg: bool = False,
            asg_transition_probabilities: Optional[ndarray] = None,
            asg_initial_probabilities: Optional[ndarray] = None,
            kenlm_directory: Path = None):

        if frozen_layer_count > 0 and load_model_from_directory is None:
            raise ValueError(
                "Layers cannot be frozen if model is trained from scratch.")

        self.kenlm_directory = kenlm_directory
        self.grapheme_encoding = AsgGraphemeEncoding(allowed_characters=allowed_characters) \
            if use_asg else CtcGraphemeEncoding(allowed_characters=allowed_characters)

        self.asg_transition_probabilities = self._default_asg_transition_probabilities(
            self.grapheme_encoding.grapheme_set_size) \
            if asg_transition_probabilities is None else asg_transition_probabilities

        self.asg_initial_probabilities = self._default_asg_initial_probabilities(
            self.grapheme_encoding.grapheme_set_size) \
            if asg_initial_probabilities is None else asg_initial_probabilities

        self.use_asg = use_asg
        self.frozen_layer_count = frozen_layer_count
        self.output_activation = output_activation
        self.activation = activation
        self.use_raw_wave_input = use_raw_wave_input
        self.input_size_per_time_step = input_size_per_time_step
        self.optimizer = optimizer
        self.load_epoch = load_epoch
        self.dropout = dropout
        self.predictive_net = self.create_predictive_net()
        self.prediction_phase_flag = 0.

        if self.kenlm_directory is not None:
            expected_characters = list(
                single(
                    read_text(self.kenlm_directory / "vocabulary",
                              encoding='utf8').splitlines()).lower())

            if allowed_characters != expected_characters:
                raise ValueError(
                    "Allowed characters {} differ from those expected by kenlm decoder: {}"
                    .format(allowed_characters, expected_characters))

        if load_model_from_directory is not None:
            self.load_weights(allowed_characters_for_loaded_model,
                              load_epoch,
                              load_model_from_directory,
                              loaded_first_layers_count=frozen_layer_count if
                              reinitialize_trainable_loaded_layers else None)