Exemplo n.º 1
0
    def finalize_evaluation(self, results_per_batch, training_step=None):
        total_word_lev = 0.0
        total_word_count = 0.0

        for word_lev, word_count in results_per_batch:
            total_word_lev += word_lev
            total_word_count += word_count

        total_wer = 1.0 * total_word_lev / total_word_count
        deco_print("Validation WER: {:.4f}".format(total_wer), offset=4)

        return {"Eval WER": total_wer}
Exemplo n.º 2
0
    def after_run(self, run_context, run_values):
        results, step = run_values.results
        self._iter_count = step

        if not results:
            return

        self._timer.update_last_triggered_step(self._iter_count - 1)

        if self._model.steps_in_epoch is None:
            deco_print("Global step {}:".format(step), end=" ")
        else:
            deco_print("Epoch {}, global step {}:".format(
                step // self._model.steps_in_epoch, step),
                       end=" ")

        loss = results[0]

        deco_print("Train loss: {:.4f}".format(loss), offset=4)

        tm = (time.time() - self._last_time) / self._every_steps
        m, s = divmod(tm, 60)
        h, m = divmod(m, 60)

        deco_print("time per step = {}:{:02}:{:.3f}".format(int(h), int(m), s),
                   start=" ")

        self._last_time = time.time()
Exemplo n.º 3
0
    def _build_forward_pass_graph(self, input_tensors, gpu_id = 0):
        """
        This function connects encoder, decoder and loss together.
        As an input for encoder it will specify source tensors ( as returned from the data layer).
        As an input for decoder it will specify target tensors as well as all output returned from encoder.
        As an input for loss it will specify target tensors and all output returned from decoder.

        Inputs
            input_tensors(dict): 
                "source_tensors"
                "target_tensors" (train or eval)
        Returns
            tuple: tuple containing loss tensor as returned from loss.compute_loss()
            and list of output tensors, which is taken from decoder.decode()["outputs"]
        """
        if not isinstance(input_tensors, dict) or "source_tensors" not in input_tensors:
            raise ValueError("input tensors should be a dict containing 'source_tensors' key")

        if not isinstance(input_tensors["source_tensors"], list):
            raise ValueError("source_tensors should be a list")

        source_tensors = input_tensors["source_tensors"]
        if self.mode == "train" or self.mode == "eval":
            if "target_tensors" not in input_tensors:
                raise ValueError("Input tensors should contain 'target_tensors' key")
            if  not isinstance(input_tensors["target_tensors"], list):
                raise ValueError("target_tensors should be a list")
            target_tensors = input_tensors["target_tensors"]

        with tf.variable_scope("ForwardPass"):
            encoder_input = {"source_tensors": source_tensors}
            encoder_output = self.encoder.encode(input_dict = encoder_input)

            decoder_input = {"encoder_output": encoder_output}
            if self.mode == "train" or self.mode == "eval":
                decoder_input["target_tensors"] = target_tensors
            decoder_output = self.decoder.decode(input_dict = decoder_input)

            model_outputs = decoder_output.get("outputs", None)

            if self.mode == "train" or self.mode == "eval":
                with tf.variable_scope("Loss"):
                    loss_input_dict = {
                            "decoder_output": decoder_output,
                            "target_tensors": target_tensors}
                    loss = self.loss_computator.compute_loss(loss_input_dict)
            else:
                deco_print("Inference mode, Loss part of graph isn't build")
                loss = None
        return loss, model_outputs
Exemplo n.º 4
0
    def _build_forward_pass_graph(self, input_tensors, gpu_id=0):
        if not isinstance(input_tensors, dict):
            raise ValueError(
                "Input tensors should be dict containing 'source_tensors' key")

        if not isinstance(input_tensors["source_tensors"], list):
            raise ValueError("source tensors should be a list")

        source_tensors = input_tensors["source_tensors"]

        if self.mode == "train" or self.mode == "eval":
            if "target_tensors" not in input_tensors:
                raise ValueError(
                    "Input tensors  should contain 'target_tensors' key in train and eval mode"
                )

            if not isinstance(input_tensors["target_tensors"], list):
                raise ValueError("target_tensors should be a list")

            target_tensors = input_tensors["target_tensors"]

        with tf.variable_scope("ForwardPass"):
            """
            这里的self.encoder是DeepSpeech2Encoder类的实例
            self.decoder是FullyConnectedCTCDecoder类的实例
            """
            encoder_input = {"source_tensors": source_tensors}
            encoder_output = self.encoder.encode(input_dict=encoder_input)

            decoder_input = {"encoder_output": encoder_output}
            if self.mode == "train" or self.mode == "eval":
                decoder_input["target_tensors"] = target_tensors

            decoder_output = self.decoder.decode(input_dict=decoder_input)
            model_outputs = decoder_output.get("outputs", None)

            if self.mode == "train" or self.mode == "eval":
                with tf.variable_scope("Loss"):
                    loss_input_dict = {
                        "decoder_output": decoder_output,
                        "target_tensors": target_tensors
                    }
                    loss = self.loss_computator.compute_loss(loss_input_dict)
            else:
                deco_print("Inference Mode. Loss part of graph isn't built.")
                loss = None

        return loss, model_outputs
Exemplo n.º 5
0
def main():

    """
    Parse args and create config 
    e.g. python3 run.py --mode=train --config_file=config/ds2_small_1gpu.py
    """
    import sys
    args, base_config, base_model, config_module = get_base_config(sys.argv[1:])

    # load_model: model directory
    load_model = base_config.get('load_model', None)
    restore_best_checkpoint = base_config.get('restore_best_checkpoint', False)
    base_ckpt_dir = check_base_model_logdir(load_model, args, restore_best_checkpoint)
    base_config['load_model'] = base_ckpt_dir

    checkpoint = check_logdir(args, base_config, restore_best_checkpoint)

    if args.enable_logs:
        old_stdout, old_stderr, stdout_log, stderr_log = create_logdir(args, base_config)
        base_config["logdir"] = os.path.join(base_config["logdir"], 'logs')

    if args.mode == "train":
        if checkpoint is None:
            if base_ckpt_dir:
                deco_print("Starting training from the base model")
            else:
                deco_print("Starting training from scratch")
        else:
            deco_print("Resroring checkpoint from {}".format(checkpoint))

    elif args.mode == "eval" or args.mode == "infer":
        deco_print("Loading model from {}".format(checkpoint))

    # Create model and train/eval
    with tf.Graph().as_default():
        model = create_model(args, base_config, config_module, base_model, checkpoint)
        print(model)
        # sys.exit(0)
        if args.mode == "train":
            train(model, eval_model = None, debug_port = None)
        elif args.mode == "eval":
            evaluate(model, checkpoint)
        elif args.mode == "infer":
            infer(model, checkpoint, args.infer_output_file)

    if args.enable_logs:
        sys.stdout = old_stdout
        sys,stderr = old_stderr
        stdout_log.close()
        stderr_log.close()
Exemplo n.º 6
0
    def maybe_print_logs(self, input_values, output_values, training_step):

        y, len_y = input_values["target_tensors"]
        decoded_sequence = output_values
        y_one_sample = y[0]
        len_y_one_sample = len_y[0]
        decoded_sequence_one_batch = decoded_sequence[0]

        if self.is_bpe:
            dec_list = sparse_tensor_to_chars_bpe(
                decoded_sequence_one_batch)[0]
            true_text = self.get_data_layer().sp.DecodeIds(
                y_one_sample[:len_y_one_sample].tolist())
            pred_text = self.get_data_layer().sp.DecodeIds(dec_list)

        else:
            true_text = "".join(
                map(self.get_data_layer().params["idx2char"].get,
                    y_one_sample[:len_y_one_sample]))
            pred_text = "".join(
                self.tensor_to_chars(decoded_sequence_one_batch,
                                     self.get_data_layer().params["idx2char"],
                                     **self.tensor_to_char_params)[0])

        sample_wer = levenshtein(true_text.split(), pred_text.split()) / len(
            true_text.split())

        self.autoaregressive = self.get_data_layer().params.get(
            "autoaregressive", False)
        self.plot_attention = False

        deco_print("Sample WER: {:.4f}".format(sample_wer), offset=4)
        deco_print("Sample target:    " + true_text, offset=4)
        deco_print("Sample prediction:    " + pred_text, offset=4)

        return {"Sample WER": sample_wer}
Exemplo n.º 7
0
    def compile(self, force_var_reuse=False, checkpoint=None):
        """
        Tensorflow graph is built here.
        """
        if "initializer" not in self.params:
            initializer = None
        else:
            init_dict = self.params.get("initializer_params", {})
            initializer = self.params["initializer"](**init_dict)

        losses = []
        for gpu_cnt, gpu_id in enumerate(self._gpu_ids):
            """
            如果GPU>=2,启用reuse模式,即多个GPU上的图共用相同名称的变量
            单个GPU的话共用没有意义,所以这里用gpu_cnt>0判断一下
            """
            with tf.device("/gpu:{}".format(gpu_id)), tf.variable_scope(
                    name_or_scope=tf.get_variable_scope(),
                    reuse=force_var_reuse or (gpu_cnt > 0),
                    initializer=initializer,
                    dtype=self.get_tf_dtype()):

                deco_print("Building graph on GPU:{}".format(gpu_id))

                if self._interactive:
                    self.get_data_layer(
                        gpu_cnt).create_interactive_placeholders()
                else:
                    self.get_data_layer(gpu_cnt).build_graph()
                input_tensors = self.get_data_layer(gpu_cnt).input_tensors
                """
                _build_forward_pass_graph 在Speech2Text中实现

                """
                loss, self._outputs[gpu_cnt] = self._build_forward_pass_graph(
                    input_tensors, gpu_id=gpu_cnt)
                if self._outputs[gpu_cnt] is not None and not isinstance(
                        self._outputs[gpu_cnt], list):
                    raise ValueError(
                        "Decoder outputs have to be either None or list")
                if self._mode == "train" or self._mode == "eval":
                    losses.append(loss)

        # end of for gpu_ind loop
        if self._mode == "train":
            self.loss = tf.reduce_mean(losses)
        if self._mode == "eval":
            self.eval_losses = losses

        try:
            self._num_objects_per_step = [
                self._get_num_objects_per_step(worker_id)
                for worker_id in range(self.num_gpus)
            ]
        except NotImplementedError:
            pass

        if self._mode == "train":
            if "lr_policy" not in self.params:
                lr_policy = None
            else:
                lr_params = self.params.get("lr_policy_params", {})

                func_params = signature(self.params["lr_policy"]).parameters

                if "decay_steps" in func_params and "decay_steps" not in lr_params:
                    lr_params["decay_steps"] = self._last_step
                    if "begin_decay_at" in func_params:
                        if "warmup_steps" in func_params:
                            lr_params["begin_decay_at"] = max(
                                lr_params.get("begin_decay_at", 0),
                                lr_prams.get("warmup_steps", 0))
                        lr_params["decay_steps"] -= lr_params.get(
                            "begin_decay_at", 0)

                if "steps_per_epoch" in func_params and "steps_per_epoch" not in lr_params and "num_epochs" in self.params:
                    lr_params["steps_per_epoch"] = self.steps_in_epoch
                lr_policy = lambda gs: self.params["lr_policy"](global_step=gs,
                                                                **lr_params)

            if self.params.get("iter_size", 1) > 1:
                self.skip_update_ph = tf.placeholder(tf.bool)

            var_list = tf.trainable_variables()
            freeze_variables_regex = self.params.get("freeze_variables_regex",
                                                     None)

            if freeze_variables_regex is not None:
                pattern = re.compile(freeze_variables_regex)
                var_list = [
                    var for var in tf.trainable_variables()
                    if not pattern.match(var.name)
                ]

            self.train_op, _ = optimize_loss(
                loss=tf.cast(self.loss, tf.float32),
                dtype=self.params["dtype"],
                optimizer=self.params["optimizer"],
                optimizer_params=self.params["optimizer_params"],
                var_list=var_list,
                clip_gradients=self.params.get("max_grad_norm", None),
                learning_rate_decay_fn=lr_policy,
                summaries=self.params.get("summaries", None),
                larc_params=self.params.get("larc_params", None),
                loss_scaling=self.params.get("loss_scaling", 1.0),
                loss_scaling_params=self.params.get("loss_scaling_params",
                                                    None),
                iter_size=self.params.get("iter_size", 1),
                skip_update_ph=self.skip_update_ph,
                model=self)

            tf.summary.scalar(name="train_loss", tensor=self.loss)
            if self.steps_in_epoch:
                tf.summary.scalar(
                    name="epoch",
                    tensor=tf.floor(
                        tf.train.get_global_step() /
                        tf.constant(self.steps_in_epoch, dtype=tf.int64)))

            if freeze_variables_regex is not None:
                deco_print("Complete list of variables:")
                for var in tf.trainable_variables():
                    deco_print("{}".format(var.name), offset=2)

            deco_print("Trainable variables:")
            total_params = 0
            unknown_shapes = False

            for var in var_list:
                var_params = 1
                deco_print("{}".format(var.name), offset=2)
                deco_print("shape: {}, {}".format(var.get_shape(), var.dtype),
                           offset=2)

                if var.get_shape():
                    for dim in var.get_shape():
                        var_params *= dim.value
                    total_params += var_params
                else:
                    unknown_shapes = True

            if unknown_shapes:
                deco_print(
                    "Encountered unknown variable shape, can't compute total number of parameters"
                )
            else:
                deco_print(
                    "Total trainable parameters: {}".format(total_params))