Exemplo n.º 1
0
def get_eval_callback(eval_dataset):
    _, eval_tensors = create_pipeline(dataset_split=eval_dataset)
    eval_callback = EvaluatorCallback(
        eval_tensors=eval_tensors,
        user_iter_callback=lambda x, y: eval_iter_callback(x, y, schema_preprocessor, eval_dataset),
        user_epochs_done_callback=lambda x: eval_epochs_done_callback(
            x,
            args.task_name,
            eval_dataset,
            args.data_dir,
            nf.work_dir,
            args.tracker_model,
            args.debug_mode,
            dialogues_processor,
            schema_preprocessor,
            args.joint_acc_across_turn,
            args.no_fuzzy_match,
        ),
        tb_writer=nf.tb_writer,
        eval_step=args.eval_epoch_freq * steps_per_epoch,
    )
    return eval_callback
Exemplo n.º 2
0
    def test_jasper_evaluation(self):
        """Integration test that tests EvaluatorCallback and NeuralModuleFactory.eval(). This test is skipped during
        CI as it is redundant with the Jenkins Jasper ASR CI tests.
        """
        # Note this test still has no asserts, but rather checks that the current eval path works
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/jasper_smaller.yaml"))) as file:
            jasper_model_definition = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=8)
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                      num_classes=len(
                                                          self.labels))
        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
        greedy_decoder = nemo_asr.GreedyCTCDecoder()
        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                              length=p_length)
        # logging.info(jasper_encoder)
        log_probs = jasper_decoder(encoder_output=encoded)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )
        predictions = greedy_decoder(log_probs=log_probs)

        from nemo.collections.asr.helpers import (
            process_evaluation_batch,
            process_evaluation_epoch,
        )

        eval_callback = EvaluatorCallback(
            eval_tensors=[loss, predictions, transcript, transcript_len],
            user_iter_callback=lambda x, y: process_evaluation_batch(
                x, y, labels=self.labels),
            user_epochs_done_callback=process_evaluation_epoch,
        )
        # Instantiate an optimizer to perform `train` action
        self.nf.eval(callbacks=[eval_callback])
Exemplo n.º 3
0
        # callbacks
        callback_train = SimpleLossLoggerCallback(
            tensors=[train_tensors[0]],
            step_freq=100,
            print_func=print_loss,
            get_tb_values=lambda x: [["loss", x[0]]],
            tb_writer=nf.tb_writer,
        )

        callbacks = [callback_train]

        # for eval_examples in args.eval_file_preprocessed:
        callback_eval = EvaluatorCallback(
            eval_tensors=eval_tensors,
            user_iter_callback=lambda x, y: eval_iter_callback(
                x, y, tokenizer),
            user_epochs_done_callback=eval_epochs_done_callback,
            eval_step=args.eval_freq,
            tb_writer=nf.tb_writer,
        )
        if eval_examples:
            callbacks.append(callback_eval)

        checkpointer_callback = CheckpointCallback(
            folder=args.work_dir, step_freq=args.checkpoint_save_freq)
        callbacks.append(checkpointer_callback)

        max_steps, warmup_steps = _calculate_steps(len(train_examples),
                                                   args.batch_size,
                                                   args.num_epochs,
                                                   args.warmup_proportion)
Exemplo n.º 4
0
    # Create the callbacks.
    def eval_loss_per_batch_callback(tensors, global_vars):
        if "eval_loss" not in global_vars.keys():
            global_vars["eval_loss"] = []
        for key, value in tensors.items():
            if key.startswith("loss"):
                global_vars["eval_loss"].append(mean(stack(value)))

    def eval_loss_epoch_finished_callback(global_vars):
        eloss = max(tensor(global_vars["eval_loss"]))
        logging.info("Evaluation Loss: {0}".format(eloss))
        return dict({"Evaluation Loss": eloss})

    ecallback = EvaluatorCallback(
        eval_tensors=[loss_e],
        user_iter_callback=eval_loss_per_batch_callback,
        user_epochs_done_callback=eval_loss_epoch_finished_callback,
        eval_step=100,
    )

    # SimpleLossLoggerCallback will print loss values to console.
    callback = SimpleLossLoggerCallback(
        tensors=[loss],
        print_func=lambda x: logging.info(f'Training Loss: {str(x[0].item())}'
                                          ))

    # Invoke the "train" action.
    nf.train(
        training_graph=training_graph,
        callbacks=[callback, ecallback],
        optimization_params={
            "num_epochs": 10,
Exemplo n.º 5
0
# Write predictions to file in DSTC8 format.
prediction_dir = os.path.join(nf.work_dir, 'predictions', 'pred_res_{}_{}'.format(args.eval_dataset, args.task_name))
output_metric_file = os.path.join(nf.work_dir, 'metrics.txt')
os.makedirs(prediction_dir, exist_ok=True)

eval_callback = EvaluatorCallback(
    eval_tensors=eval_tensors,
    user_iter_callback=lambda x, y: eval_iter_callback(x, y, schema_preprocessor, args.eval_dataset),
    user_epochs_done_callback=lambda x: eval_epochs_done_callback(
        x,
        input_json_files,
        args.eval_dataset,
        args.data_dir,
        prediction_dir,
        output_metric_file,
        args.state_tracker,
        args.debug_mode,
        schema_preprocessor,
        args.joint_acc_across_turn,
        args.no_fuzzy_match,
    ),
    tb_writer=nf.tb_writer,
    eval_step=args.eval_epoch_freq * steps_per_epoch,
)

ckpt_callback = CheckpointCallback(
    folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq, checkpoints_to_keep=1
)

lr_policy_fn = get_lr_policy(