def infer(self, input_file_path, model_file, output_file_path): print("Infering ...") check_key_in_dict(dictionary=self.configs, keys=["tfrecords_dir"]) msg = self.load_model(model_file) if msg: raise Exception(msg) tf_infer_dataset = Dataset(data_path=input_file_path, tfrecords_dir=self.configs["tfrecords_dir"], mode="infer") tf_infer_dataset = tf_infer_dataset( batch_size=self.configs["batch_size"], text_featurizer=self.text_featurizer, speech_conf=self.configs["speech_conf"]) def infer_step(feature, input_length): prediction = self.predict(feature, input_length) return bytes_to_string(prediction.numpy()) for features, inp_length in tf_infer_dataset: predictions = infer_step(features, inp_length) with open(output_file_path, "a", encoding="utf-8") as of: of.write("Predictions\n") for pred in predictions: of.write(pred + "\n")
def test_with_noise_filter(self, model_file, output_file_path): print("Testing model ...") if not self.noise_filter: raise ValueError("noise_filter must be defined") check_key_in_dict(dictionary=self.configs, keys=["test_data_transcript_paths", "tfrecords_dir"]) test_dataset = Dataset( data_path=self.configs["test_data_transcript_paths"], tfrecords_dir=self.configs["tfrecords_dir"], mode="test") msg = self.load_saved_model(model_file) if msg: raise Exception(msg) tf_test_dataset = test_dataset(text_featurizer=self.text_featurizer, speech_conf=self.configs["speech_conf"], batch_size=1, feature_extraction=False) def test_step(signal, label): prediction = self.infer_single(signal) label = self.decoder.convert_to_string_single(label) print(f"Pred: {prediction}") print(f"Groundtruth: {label}") _wer, _wer_count = wer(decode=prediction, target=label) _cer, _cer_count = cer(decode=prediction, target=label) gc.collect() return _wer, _wer_count, _cer, _cer_count total_wer = 0.0 wer_count = 0.0 total_cer = 0.0 cer_count = 0.0 for signal, label in tf_test_dataset.as_numpy_iterator(): batch_wer, batch_wer_count, batch_cer, batch_cer_count = test_step( signal, label) total_wer += batch_wer total_cer += batch_cer wer_count += batch_wer_count cer_count += batch_cer_count results = (total_wer / wer_count, total_cer / cer_count) print(f"WER: {results[0]}, CER: {results[-1]}") with open(output_file_path, "w", encoding="utf-8") as of: of.write("WER: " + str(results[0]) + "\n") of.write("CER: " + str(results[-1]) + "\n")
def create_decoder(decoder_config, index_to_token, num_classes, vocab_array): check_key_in_dict(decoder_config, keys=["name"]) if decoder_config["name"] == "beamsearch": check_key_in_dict(decoder_config, keys=["beam_width"]) if decoder_config.get("lm_path", None) is not None: check_key_in_dict(decoder_config, keys=["alpha", "beta"]) decoder = BeamSearchDecoder( index_to_token=index_to_token, num_classes=num_classes, beam_width=decoder_config["beam_width"], lm_path=os.path.expanduser(decoder_config["lm_path"]), alpha=decoder_config["alpha"], beta=decoder_config["beta"], vocab_array=vocab_array) else: decoder = BeamSearchDecoder( index_to_token=index_to_token, num_classes=num_classes, beam_width=decoder_config["beam_width"], vocab_array=vocab_array) elif decoder_config["name"] == "greedy": decoder = GreedyDecoder(index_to_token=index_to_token, num_classes=num_classes, vocab_array=vocab_array) else: raise ValueError("'decoder' value must be either 'beamsearch',\ 'beamsearch_lm' or 'greedy'") return decoder
def decorated_func(*args, **kwargs): try: check_key_in_dict(dictionary=request.files, keys=["payload"]) except ValueError as e: return make_response(({"payload": str(e)}, 400)) return func(*args, **kwargs)
def test(self, model_file, output_file_path): print("Testing model ...") check_key_in_dict(dictionary=self.configs, keys=["test_data_transcript_paths", "tfrecords_dir"]) test_dataset = Dataset( data_path=self.configs["test_data_transcript_paths"], tfrecords_dir=self.configs["tfrecords_dir"], mode="test") msg = self.load_saved_model(model_file) if msg: raise Exception(msg) tf_test_dataset = test_dataset(text_featurizer=self.text_featurizer, speech_conf=self.configs["speech_conf"], batch_size=self.configs["batch_size"]) def test_step(features, inp_length, transcripts): predictions = self.predict(features, inp_length) predictions = bytes_to_string(predictions.numpy()) transcripts = self.decoder.convert_to_string(transcripts) b_wer = 0.0 b_wer_count = 0.0 b_cer = 0.0 b_cer_count = 0.0 for idx, decoded in enumerate(predictions): print(f"Pred: {decoded}") print(f"Groundtruth: {transcripts[idx]}") _wer, _wer_count = wer(decode=decoded, target=transcripts[idx]) _cer, _cer_count = cer(decode=decoded, target=transcripts[idx]) b_wer += _wer b_cer += _cer b_wer_count += _wer_count b_cer_count += _cer_count gc.collect() return b_wer, b_wer_count, b_cer, b_cer_count total_wer = 0.0 wer_count = 0.0 total_cer = 0.0 cer_count = 0.0 for feature, input_length, label, _ in tf_test_dataset: batch_wer, batch_wer_count, batch_cer, batch_cer_count = test_step( feature, input_length, label) total_wer += batch_wer total_cer += batch_cer wer_count += batch_wer_count cer_count += batch_cer_count results = (total_wer / wer_count, total_cer / cer_count) print(f"WER: {results[0]}, CER: {results[-1]}") with open(output_file_path, "w", encoding="utf-8") as of: of.write("WER: " + str(results[0]) + "\n") of.write("CER: " + str(results[-1]) + "\n")
def keras_train_and_eval(self, model_file=None): print("Training and evaluating model ...") check_key_in_dict(dictionary=self.configs, keys=[ "tfrecords_dir", "checkpoint_dir", "augmentations", "log_dir", "train_data_transcript_paths" ]) augmentations = self.configs["augmentations"] augmentations.append(None) train_dataset = Dataset( data_path=self.configs["train_data_transcript_paths"], tfrecords_dir=self.configs["tfrecords_dir"], mode="train", is_keras=True) tf_train_dataset = train_dataset( text_featurizer=self.text_featurizer, speech_conf=self.configs["speech_conf"], batch_size=self.configs["batch_size"], augmentations=augmentations) # tf_train_dataset_sortagrad = train_dataset(text_featurizer=self.text_featurizer, # speech_conf=self.configs["speech_conf"], # batch_size=self.configs["batch_size"], # augmentations=augmentations, sortagrad=True) tf_eval_dataset = None if self.configs["eval_data_transcript_paths"]: eval_dataset = Dataset( data_path=self.configs["eval_data_transcript_paths"], tfrecords_dir=self.configs["tfrecords_dir"], mode="eval", is_keras=True) tf_eval_dataset = eval_dataset( text_featurizer=self.text_featurizer, speech_conf=self.configs["speech_conf"], batch_size=self.configs["batch_size"]) train_model = create_ctc_train_model( self.model, last_activation=self.configs["last_activation"], num_classes=self.text_featurizer.num_classes) self._create_checkpoints(train_model) self.model.summary() initial_epoch = 0 if self.ckpt_manager.latest_checkpoint: initial_epoch = int( self.ckpt_manager.latest_checkpoint.split('-')[-1]) # restoring the latest checkpoint in checkpoint_path self.ckpt.restore(self.ckpt_manager.latest_checkpoint) train_model.compile(optimizer=self.optimizer, loss={ "ctc_loss": lambda y_true, y_pred: y_pred }) callback = [Checkpoint(self.ckpt_manager)] if self.configs["log_dir"]: if not os.path.exists(self.configs["log_dir"]): os.makedirs(self.configs["log_dir"]) with open(os.path.join(self.configs["log_dir"], "model.json"), "w") as f: f.write(self.model.to_json()) callback.append( TimeHistory(os.path.join(self.configs["log_dir"], "time.txt"))) callback.append( tf.keras.callbacks.TensorBoard( log_dir=self.configs["log_dir"])) if tf_eval_dataset is not None: # if initial_epoch == 0: # train_model.fit(x=tf_train_dataset_sortagrad, epochs=1, # validation_data=tf_eval_dataset, shuffle="batch", # initial_epoch=initial_epoch, callbacks=callback) # initial_epoch = 1 train_model.fit(x=tf_train_dataset, epochs=self.configs["num_epochs"], validation_data=tf_eval_dataset, shuffle="batch", initial_epoch=initial_epoch, callbacks=callback) else: # if initial_epoch == 0: # train_model.fit(x=tf_train_dataset_sortagrad, epochs=1, shuffle="batch", # initial_epoch=initial_epoch, callbacks=callback) # initial_epoch = 1 train_model.fit(x=tf_train_dataset, epochs=self.configs["num_epochs"], shuffle="batch", initial_epoch=initial_epoch, callbacks=callback) if model_file: self.save_model(model_file)
def train_and_eval(self, model_file=None): print("Training and evaluating model ...") self._create_checkpoints(self.model) check_key_in_dict(dictionary=self.configs, keys=[ "tfrecords_dir", "checkpoint_dir", "augmentations", "log_dir", "train_data_transcript_paths" ]) augmentations = self.configs["augmentations"] augmentations.append(None) train_dataset = Dataset( data_path=self.configs["train_data_transcript_paths"], tfrecords_dir=self.configs["tfrecords_dir"], mode="train") tf_train_dataset = train_dataset.get_dataset_from_generator( text_featurizer=self.text_featurizer, speech_conf=self.configs["speech_conf"], batch_size=self.configs["batch_size"], augmentations=augmentations) tf_eval_dataset = None if self.configs["eval_data_transcript_paths"]: eval_dataset = Dataset( data_path=self.configs["eval_data_transcript_paths"], tfrecords_dir=self.configs["tfrecords_dir"], mode="eval") tf_eval_dataset = eval_dataset.get_dataset_from_generator( text_featurizer=self.text_featurizer, speech_conf=self.configs["speech_conf"], batch_size=self.configs["batch_size"], augmentations=[None]) self.model.summary() initial_epoch = 0 if self.ckpt_manager.latest_checkpoint: initial_epoch = int( self.ckpt_manager.latest_checkpoint.split('-')[-1]) # restoring the latest checkpoint in checkpoint_path self.ckpt.restore(self.ckpt_manager.latest_checkpoint) if self.configs["log_dir"]: if not os.path.exists(self.configs["log_dir"]): os.makedirs(self.configs["log_dir"]) with open(os.path.join(self.configs["log_dir"], "model.json"), "w") as f: f.write(self.model.to_json()) self.writer = tf.summary.create_file_writer( os.path.join(self.configs["log_dir"], "train")) if self.configs["last_activation"] != "softmax": loss = ctc_loss else: loss = ctc_loss_1 epochs = self.configs["num_epochs"] for epoch in range(initial_epoch, epochs, 1): epoch_eval_loss = None epoch_eval_wer = None start = time.time() self.train(self.model, tf_train_dataset, self.optimizer, loss, self.text_featurizer.num_classes, epoch, epochs) print(f"\nEnd training on epoch = {epoch}") self.ckpt_manager.save() print(f"Saved checkpoint at epoch {epoch + 1}") if tf_eval_dataset: print("Validating ... ") epoch_eval_loss, epoch_eval_wer = self.validate( self.model, self.decoder, tf_eval_dataset, loss, self.text_featurizer.num_classes, self.configs["last_activation"]) print( f"Average_val_loss = {epoch_eval_loss}, val_wer = {epoch_eval_wer}" ) time_epoch = time.time() - start print(f"Time for epoch {epoch + 1} is {time_epoch} secs") if self.writer: with self.writer.as_default(): if epoch_eval_loss and epoch_eval_wer: tf.summary.scalar("eval_loss", epoch_eval_loss, step=epoch) tf.summary.scalar("eval_wer", epoch_eval_wer, step=epoch) tf.summary.scalar("epoch_time", time_epoch, step=epoch) if model_file: self.save_model(model_file)