예제 #1
0
    def infer(self, input_file_path, model_file, output_file_path):
        print("Infering ...")
        check_key_in_dict(dictionary=self.configs, keys=["tfrecords_dir"])
        msg = self.load_model(model_file)
        if msg:
            raise Exception(msg)
        tf_infer_dataset = Dataset(data_path=input_file_path,
                                   tfrecords_dir=self.configs["tfrecords_dir"],
                                   mode="infer")
        tf_infer_dataset = tf_infer_dataset(
            batch_size=self.configs["batch_size"],
            text_featurizer=self.text_featurizer,
            speech_conf=self.configs["speech_conf"])

        def infer_step(feature, input_length):
            prediction = self.predict(feature, input_length)
            return bytes_to_string(prediction.numpy())

        for features, inp_length in tf_infer_dataset:
            predictions = infer_step(features, inp_length)

            with open(output_file_path, "a", encoding="utf-8") as of:
                of.write("Predictions\n")
                for pred in predictions:
                    of.write(pred + "\n")
예제 #2
0
    def test_with_noise_filter(self, model_file, output_file_path):
        print("Testing model ...")
        if not self.noise_filter:
            raise ValueError("noise_filter must be defined")

        check_key_in_dict(dictionary=self.configs,
                          keys=["test_data_transcript_paths", "tfrecords_dir"])
        test_dataset = Dataset(
            data_path=self.configs["test_data_transcript_paths"],
            tfrecords_dir=self.configs["tfrecords_dir"],
            mode="test")
        msg = self.load_saved_model(model_file)
        if msg:
            raise Exception(msg)

        tf_test_dataset = test_dataset(text_featurizer=self.text_featurizer,
                                       speech_conf=self.configs["speech_conf"],
                                       batch_size=1,
                                       feature_extraction=False)

        def test_step(signal, label):
            prediction = self.infer_single(signal)
            label = self.decoder.convert_to_string_single(label)

            print(f"Pred: {prediction}")
            print(f"Groundtruth: {label}")
            _wer, _wer_count = wer(decode=prediction, target=label)
            _cer, _cer_count = cer(decode=prediction, target=label)

            gc.collect()

            return _wer, _wer_count, _cer, _cer_count

        total_wer = 0.0
        wer_count = 0.0
        total_cer = 0.0
        cer_count = 0.0

        for signal, label in tf_test_dataset.as_numpy_iterator():
            batch_wer, batch_wer_count, batch_cer, batch_cer_count = test_step(
                signal, label)
            total_wer += batch_wer
            total_cer += batch_cer
            wer_count += batch_wer_count
            cer_count += batch_cer_count

        results = (total_wer / wer_count, total_cer / cer_count)

        print(f"WER: {results[0]}, CER: {results[-1]}")

        with open(output_file_path, "w", encoding="utf-8") as of:
            of.write("WER: " + str(results[0]) + "\n")
            of.write("CER: " + str(results[-1]) + "\n")
예제 #3
0
def create_decoder(decoder_config, index_to_token, num_classes, vocab_array):
    check_key_in_dict(decoder_config, keys=["name"])
    if decoder_config["name"] == "beamsearch":
        check_key_in_dict(decoder_config, keys=["beam_width"])
        if decoder_config.get("lm_path", None) is not None:
            check_key_in_dict(decoder_config, keys=["alpha", "beta"])
            decoder = BeamSearchDecoder(
                index_to_token=index_to_token,
                num_classes=num_classes,
                beam_width=decoder_config["beam_width"],
                lm_path=os.path.expanduser(decoder_config["lm_path"]),
                alpha=decoder_config["alpha"],
                beta=decoder_config["beta"],
                vocab_array=vocab_array)
        else:
            decoder = BeamSearchDecoder(
                index_to_token=index_to_token,
                num_classes=num_classes,
                beam_width=decoder_config["beam_width"],
                vocab_array=vocab_array)
    elif decoder_config["name"] == "greedy":
        decoder = GreedyDecoder(index_to_token=index_to_token,
                                num_classes=num_classes,
                                vocab_array=vocab_array)
    else:
        raise ValueError("'decoder' value must be either 'beamsearch',\
                         'beamsearch_lm' or 'greedy'")
    return decoder
예제 #4
0
 def decorated_func(*args, **kwargs):
     try:
         check_key_in_dict(dictionary=request.files, keys=["payload"])
     except ValueError as e:
         return make_response(({"payload": str(e)}, 400))
     return func(*args, **kwargs)
예제 #5
0
    def test(self, model_file, output_file_path):
        print("Testing model ...")
        check_key_in_dict(dictionary=self.configs,
                          keys=["test_data_transcript_paths", "tfrecords_dir"])
        test_dataset = Dataset(
            data_path=self.configs["test_data_transcript_paths"],
            tfrecords_dir=self.configs["tfrecords_dir"],
            mode="test")
        msg = self.load_saved_model(model_file)
        if msg:
            raise Exception(msg)

        tf_test_dataset = test_dataset(text_featurizer=self.text_featurizer,
                                       speech_conf=self.configs["speech_conf"],
                                       batch_size=self.configs["batch_size"])

        def test_step(features, inp_length, transcripts):
            predictions = self.predict(features, inp_length)
            predictions = bytes_to_string(predictions.numpy())

            transcripts = self.decoder.convert_to_string(transcripts)

            b_wer = 0.0
            b_wer_count = 0.0
            b_cer = 0.0
            b_cer_count = 0.0

            for idx, decoded in enumerate(predictions):
                print(f"Pred: {decoded}")
                print(f"Groundtruth: {transcripts[idx]}")
                _wer, _wer_count = wer(decode=decoded, target=transcripts[idx])
                _cer, _cer_count = cer(decode=decoded, target=transcripts[idx])
                b_wer += _wer
                b_cer += _cer
                b_wer_count += _wer_count
                b_cer_count += _cer_count

            gc.collect()

            return b_wer, b_wer_count, b_cer, b_cer_count

        total_wer = 0.0
        wer_count = 0.0
        total_cer = 0.0
        cer_count = 0.0

        for feature, input_length, label, _ in tf_test_dataset:
            batch_wer, batch_wer_count, batch_cer, batch_cer_count = test_step(
                feature, input_length, label)
            total_wer += batch_wer
            total_cer += batch_cer
            wer_count += batch_wer_count
            cer_count += batch_cer_count

        results = (total_wer / wer_count, total_cer / cer_count)

        print(f"WER: {results[0]}, CER: {results[-1]}")

        with open(output_file_path, "w", encoding="utf-8") as of:
            of.write("WER: " + str(results[0]) + "\n")
            of.write("CER: " + str(results[-1]) + "\n")
예제 #6
0
    def keras_train_and_eval(self, model_file=None):
        print("Training and evaluating model ...")

        check_key_in_dict(dictionary=self.configs,
                          keys=[
                              "tfrecords_dir", "checkpoint_dir",
                              "augmentations", "log_dir",
                              "train_data_transcript_paths"
                          ])
        augmentations = self.configs["augmentations"]
        augmentations.append(None)

        train_dataset = Dataset(
            data_path=self.configs["train_data_transcript_paths"],
            tfrecords_dir=self.configs["tfrecords_dir"],
            mode="train",
            is_keras=True)
        tf_train_dataset = train_dataset(
            text_featurizer=self.text_featurizer,
            speech_conf=self.configs["speech_conf"],
            batch_size=self.configs["batch_size"],
            augmentations=augmentations)
        # tf_train_dataset_sortagrad = train_dataset(text_featurizer=self.text_featurizer,
        #                                            speech_conf=self.configs["speech_conf"],
        #                                            batch_size=self.configs["batch_size"],
        #                                            augmentations=augmentations, sortagrad=True)

        tf_eval_dataset = None
        if self.configs["eval_data_transcript_paths"]:
            eval_dataset = Dataset(
                data_path=self.configs["eval_data_transcript_paths"],
                tfrecords_dir=self.configs["tfrecords_dir"],
                mode="eval",
                is_keras=True)
            tf_eval_dataset = eval_dataset(
                text_featurizer=self.text_featurizer,
                speech_conf=self.configs["speech_conf"],
                batch_size=self.configs["batch_size"])

        train_model = create_ctc_train_model(
            self.model,
            last_activation=self.configs["last_activation"],
            num_classes=self.text_featurizer.num_classes)
        self._create_checkpoints(train_model)

        self.model.summary()

        initial_epoch = 0
        if self.ckpt_manager.latest_checkpoint:
            initial_epoch = int(
                self.ckpt_manager.latest_checkpoint.split('-')[-1])
            # restoring the latest checkpoint in checkpoint_path
            self.ckpt.restore(self.ckpt_manager.latest_checkpoint)

        train_model.compile(optimizer=self.optimizer,
                            loss={
                                "ctc_loss": lambda y_true, y_pred: y_pred
                            })

        callback = [Checkpoint(self.ckpt_manager)]
        if self.configs["log_dir"]:
            if not os.path.exists(self.configs["log_dir"]):
                os.makedirs(self.configs["log_dir"])
            with open(os.path.join(self.configs["log_dir"], "model.json"),
                      "w") as f:
                f.write(self.model.to_json())
            callback.append(
                TimeHistory(os.path.join(self.configs["log_dir"], "time.txt")))
            callback.append(
                tf.keras.callbacks.TensorBoard(
                    log_dir=self.configs["log_dir"]))

        if tf_eval_dataset is not None:
            # if initial_epoch == 0:
            #   train_model.fit(x=tf_train_dataset_sortagrad, epochs=1,
            #                   validation_data=tf_eval_dataset, shuffle="batch",
            #                   initial_epoch=initial_epoch, callbacks=callback)
            #   initial_epoch = 1

            train_model.fit(x=tf_train_dataset,
                            epochs=self.configs["num_epochs"],
                            validation_data=tf_eval_dataset,
                            shuffle="batch",
                            initial_epoch=initial_epoch,
                            callbacks=callback)
        else:
            # if initial_epoch == 0:
            #   train_model.fit(x=tf_train_dataset_sortagrad, epochs=1, shuffle="batch",
            #                   initial_epoch=initial_epoch, callbacks=callback)
            #   initial_epoch = 1

            train_model.fit(x=tf_train_dataset,
                            epochs=self.configs["num_epochs"],
                            shuffle="batch",
                            initial_epoch=initial_epoch,
                            callbacks=callback)

        if model_file:
            self.save_model(model_file)
예제 #7
0
    def train_and_eval(self, model_file=None):
        print("Training and evaluating model ...")
        self._create_checkpoints(self.model)

        check_key_in_dict(dictionary=self.configs,
                          keys=[
                              "tfrecords_dir", "checkpoint_dir",
                              "augmentations", "log_dir",
                              "train_data_transcript_paths"
                          ])
        augmentations = self.configs["augmentations"]
        augmentations.append(None)

        train_dataset = Dataset(
            data_path=self.configs["train_data_transcript_paths"],
            tfrecords_dir=self.configs["tfrecords_dir"],
            mode="train")
        tf_train_dataset = train_dataset.get_dataset_from_generator(
            text_featurizer=self.text_featurizer,
            speech_conf=self.configs["speech_conf"],
            batch_size=self.configs["batch_size"],
            augmentations=augmentations)

        tf_eval_dataset = None

        if self.configs["eval_data_transcript_paths"]:
            eval_dataset = Dataset(
                data_path=self.configs["eval_data_transcript_paths"],
                tfrecords_dir=self.configs["tfrecords_dir"],
                mode="eval")
            tf_eval_dataset = eval_dataset.get_dataset_from_generator(
                text_featurizer=self.text_featurizer,
                speech_conf=self.configs["speech_conf"],
                batch_size=self.configs["batch_size"],
                augmentations=[None])

        self.model.summary()

        initial_epoch = 0
        if self.ckpt_manager.latest_checkpoint:
            initial_epoch = int(
                self.ckpt_manager.latest_checkpoint.split('-')[-1])
            # restoring the latest checkpoint in checkpoint_path
            self.ckpt.restore(self.ckpt_manager.latest_checkpoint)

        if self.configs["log_dir"]:
            if not os.path.exists(self.configs["log_dir"]):
                os.makedirs(self.configs["log_dir"])
            with open(os.path.join(self.configs["log_dir"], "model.json"),
                      "w") as f:
                f.write(self.model.to_json())
            self.writer = tf.summary.create_file_writer(
                os.path.join(self.configs["log_dir"], "train"))

        if self.configs["last_activation"] != "softmax":
            loss = ctc_loss
        else:
            loss = ctc_loss_1

        epochs = self.configs["num_epochs"]

        for epoch in range(initial_epoch, epochs, 1):
            epoch_eval_loss = None
            epoch_eval_wer = None
            start = time.time()

            self.train(self.model, tf_train_dataset, self.optimizer, loss,
                       self.text_featurizer.num_classes, epoch, epochs)

            print(f"\nEnd training on epoch = {epoch}")

            self.ckpt_manager.save()
            print(f"Saved checkpoint at epoch {epoch + 1}")

            if tf_eval_dataset:
                print("Validating ... ")
                epoch_eval_loss, epoch_eval_wer = self.validate(
                    self.model, self.decoder, tf_eval_dataset, loss,
                    self.text_featurizer.num_classes,
                    self.configs["last_activation"])
                print(
                    f"Average_val_loss = {epoch_eval_loss}, val_wer = {epoch_eval_wer}"
                )

            time_epoch = time.time() - start
            print(f"Time for epoch {epoch + 1} is {time_epoch} secs")

            if self.writer:
                with self.writer.as_default():
                    if epoch_eval_loss and epoch_eval_wer:
                        tf.summary.scalar("eval_loss",
                                          epoch_eval_loss,
                                          step=epoch)
                        tf.summary.scalar("eval_wer",
                                          epoch_eval_wer,
                                          step=epoch)
                    tf.summary.scalar("epoch_time", time_epoch, step=epoch)

        if model_file:
            self.save_model(model_file)