Пример #1
0
    def evaluate_books(self, books, models, mode="auto", sample=-1):
        if type(books) == str:
            books = [books]
        if type(models) == str:
            models = [models]
        results = {}
        if mode == "auto":
            with h5py.File(self.cachefile, 'r', libver='latest', swmr=True) as cache:
                for b in books:
                    for p in cache[b]:
                        for s in cache[b][p]:
                            if "text" in cache[b][p][s].attrs:
                                mode = "eval"
                                break
                        if mode != "auto":
                            break
                    if mode != "auto":
                        break
            if mode == "auto":
                mode = "conf"

        if mode == "conf":
            dset = Nash5DataSet(DataSetMode.PREDICT, self.cachefile, books)
        else:
            dset = Nash5DataSet(DataSetMode.EVAL, self.cachefile, books)

        if 0 < sample < len(dset):
            delsamples = random.sample(dset._samples, len(dset) - sample)
            for s in delsamples:
                dset._samples.remove(s)

        if mode == "conf":
            for model in models:
                if isinstance(model, str):
                    model = [model]
                predictor = MultiPredictor(checkpoints=model, data_preproc=NoopDataPreprocessor(), batch_size=1, processes=1)
                voter_params = VoterParams()
                voter_params.type = VoterParams.Type.Value("confidence_voter_default_ctc".upper())
                voter = voter_from_proto(voter_params)
                do_prediction = predictor.predict_dataset(dset, progress_bar=True)
                avg_sentence_confidence = 0
                n_predictions = 0
                for result, sample in do_prediction:
                    n_predictions += 1
                    prediction = voter.vote_prediction_result(result)
                    avg_sentence_confidence += prediction.avg_char_probability
                results["/".join(model)] = avg_sentence_confidence / n_predictions

        else:
            for model in models:
                if isinstance(model, str):
                    model = [model]
                predictor = MultiPredictor(checkpoint=model, data_preproc=NoopDataPreprocessor(), batch_size=1, processes=1, with_gt=True)
                out_gen = predictor.predict_dataset(dset, progress_bar=True, apply_preproc=False)
                result = Evaluator.evaluate_single_list(map(Evaluator.evaluate_single_args,
                            map(lambda d: tuple([''.join(d[0].ground_truth), ''.join(d[0].chars)]), out_gen)))
                results["/".join(model)] = 1 - result["avg_ler"]
        return results
Пример #2
0
    def _run_train(self, train_net, test_net, codec, train_start_time,
                   progress_bar):
        checkpoint_params = self.checkpoint_params
        validation_dataset = test_net.input_dataset
        iters_per_epoch = max(
            1,
            int(train_net.input_dataset.epoch_size() /
                checkpoint_params.batch_size))

        loss_stats = RunningStatistics(checkpoint_params.stats_size,
                                       checkpoint_params.loss_stats)
        ler_stats = RunningStatistics(checkpoint_params.stats_size,
                                      checkpoint_params.ler_stats)
        dt_stats = RunningStatistics(checkpoint_params.stats_size,
                                     checkpoint_params.dt_stats)

        display = checkpoint_params.display
        display_epochs = display <= 1
        if display <= 0:
            display = 0  # to not display anything
        elif display_epochs:
            display = max(1,
                          int(display * iters_per_epoch))  # relative to epochs
        else:
            display = max(1, int(display))  # iterations

        checkpoint_frequency = checkpoint_params.checkpoint_frequency
        early_stopping_frequency = checkpoint_params.early_stopping_frequency
        if early_stopping_frequency < 0:
            # set early stopping frequency to half epoch
            early_stopping_frequency = int(0.5 * iters_per_epoch)
        elif 0 < early_stopping_frequency <= 1:
            early_stopping_frequency = int(
                early_stopping_frequency *
                iters_per_epoch)  # relative to epochs
        else:
            early_stopping_frequency = int(early_stopping_frequency)
        early_stopping_frequency = max(1, early_stopping_frequency)

        if checkpoint_frequency < 0:
            checkpoint_frequency = early_stopping_frequency
        elif 0 < checkpoint_frequency <= 1:
            checkpoint_frequency = int(checkpoint_frequency *
                                       iters_per_epoch)  # relative to epochs
        else:
            checkpoint_frequency = int(checkpoint_frequency)

        early_stopping_enabled = self.validation_dataset is not None \
                                 and checkpoint_params.early_stopping_frequency > 0 \
                                 and checkpoint_params.early_stopping_nbest > 1
        early_stopping_best_accuracy = checkpoint_params.early_stopping_best_accuracy
        early_stopping_best_cur_nbest = checkpoint_params.early_stopping_best_cur_nbest
        early_stopping_best_at_iter = checkpoint_params.early_stopping_best_at_iter

        early_stopping_predictor = Predictor(codec=codec,
                                             text_postproc=self.txt_postproc,
                                             network=test_net)

        # Start the actual training
        # ====================================================================================

        iter = checkpoint_params.iter

        # helper function to write a checkpoint
        def make_checkpoint(base_dir, prefix, version=None):
            if version:
                checkpoint_path = os.path.abspath(
                    os.path.join(base_dir, "{}{}.ckpt".format(prefix,
                                                              version)))
            else:
                checkpoint_path = os.path.abspath(
                    os.path.join(base_dir,
                                 "{}{:08d}.ckpt".format(prefix, iter + 1)))
            print("Storing checkpoint to '{}'".format(checkpoint_path))
            train_net.save_checkpoint(checkpoint_path)
            checkpoint_params.version = Checkpoint.VERSION
            checkpoint_params.iter = iter
            checkpoint_params.loss_stats[:] = loss_stats.values
            checkpoint_params.ler_stats[:] = ler_stats.values
            checkpoint_params.dt_stats[:] = dt_stats.values
            checkpoint_params.total_time = time.time() - train_start_time
            checkpoint_params.early_stopping_best_accuracy = early_stopping_best_accuracy
            checkpoint_params.early_stopping_best_cur_nbest = early_stopping_best_cur_nbest
            checkpoint_params.early_stopping_best_at_iter = early_stopping_best_at_iter

            with open(checkpoint_path + ".json", 'w') as f:
                f.write(json_format.MessageToJson(checkpoint_params))

            return checkpoint_path

        try:
            last_checkpoint = None
            n_infinite_losses = 0
            n_max_infinite_losses = 5

            # Training loop, can be interrupted by early stopping
            for iter in range(iter, checkpoint_params.max_iters):
                checkpoint_params.iter = iter

                iter_start_time = time.time()
                result = train_net.train_step()

                if not np.isfinite(result['loss']):
                    n_infinite_losses += 1

                    if n_max_infinite_losses == n_infinite_losses:
                        print(
                            "Error: Loss is not finite! Trying to restart from last checkpoint."
                        )
                        if not last_checkpoint:
                            raise Exception(
                                "No checkpoint written yet. Training must be stopped."
                            )
                        else:
                            # reload also non trainable weights, such as solver-specific variables
                            train_net.load_weights(
                                last_checkpoint, restore_only_trainable=False)
                            continue
                    else:
                        continue

                n_infinite_losses = 0

                loss_stats.push(result['loss'])
                ler_stats.push(result['ler'])

                dt_stats.push(time.time() - iter_start_time)

                if display > 0 and iter % display == 0:
                    # apply postprocessing to display the true output
                    pred_sentence = self.txt_postproc.apply("".join(
                        codec.decode(result["decoded"][0])))
                    gt_sentence = self.txt_postproc.apply("".join(
                        codec.decode(result["gt"][0])))

                    if display_epochs:
                        print("#{:08f}: loss={:.8f} ler={:.8f} dt={:.8f}s".
                              format(iter / iters_per_epoch, loss_stats.mean(),
                                     ler_stats.mean(), dt_stats.mean()))
                    else:
                        print("#{:08d}: loss={:.8f} ler={:.8f} dt={:.8f}s".
                              format(iter, loss_stats.mean(), ler_stats.mean(),
                                     dt_stats.mean()))

                    # Insert utf-8 ltr/rtl direction marks for bidi support
                    lr = "\u202A\u202B"
                    print(" PRED: '{}{}{}'".format(
                        lr[bidi.get_base_level(pred_sentence)], pred_sentence,
                        "\u202C"))
                    print(" TRUE: '{}{}{}'".format(
                        lr[bidi.get_base_level(gt_sentence)], gt_sentence,
                        "\u202C"))

                if checkpoint_frequency > 0 and (
                        iter + 1) % checkpoint_frequency == 0:
                    last_checkpoint = make_checkpoint(
                        checkpoint_params.output_dir,
                        checkpoint_params.output_model_prefix)

                if early_stopping_enabled and (
                        iter + 1) % early_stopping_frequency == 0:
                    print("Checking early stopping model")

                    out_gen = early_stopping_predictor.predict_input_dataset(
                        validation_dataset, progress_bar=progress_bar)
                    result = Evaluator.evaluate_single_list(
                        map(
                            Evaluator.evaluate_single_args,
                            map(
                                lambda d: tuple(
                                    self.txt_preproc.apply([
                                        ''.join(d.ground_truth), d.sentence
                                    ])), out_gen)))
                    accuracy = 1 - result["avg_ler"]

                    if accuracy > early_stopping_best_accuracy:
                        early_stopping_best_accuracy = accuracy
                        early_stopping_best_cur_nbest = 1
                        early_stopping_best_at_iter = iter + 1
                        # overwrite as best model
                        last_checkpoint = make_checkpoint(
                            checkpoint_params.
                            early_stopping_best_model_output_dir,
                            prefix="",
                            version=checkpoint_params.
                            early_stopping_best_model_prefix,
                        )
                        print(
                            "Found better model with accuracy of {:%}".format(
                                early_stopping_best_accuracy))
                    else:
                        early_stopping_best_cur_nbest += 1
                        print(
                            "No better model found. Currently accuracy of {:%} at iter {} (remaining nbest = {})"
                            .format(
                                early_stopping_best_accuracy,
                                early_stopping_best_at_iter,
                                checkpoint_params.early_stopping_nbest -
                                early_stopping_best_cur_nbest))

                    if accuracy > 0 and early_stopping_best_cur_nbest >= checkpoint_params.early_stopping_nbest:
                        print("Early stopping now.")
                        break

                    if accuracy >= 1:
                        print(
                            "Reached perfect score on validation set. Early stopping now."
                        )
                        break

        except KeyboardInterrupt as e:
            print("Storing interrupted checkpoint")
            make_checkpoint(checkpoint_params.output_dir,
                            checkpoint_params.output_model_prefix,
                            "interrupted")
            raise e

        print("Total time {}s for {} iterations.".format(
            time.time() - train_start_time, iter))
Пример #3
0
    def _run_train(self, train_net, test_net, codec, train_start_time, progress_bar):
        checkpoint_params = self.checkpoint_params
        validation_dataset = test_net.input_dataset
        iters_per_epoch = max(1, int(len(train_net.input_dataset) / checkpoint_params.batch_size))

        loss_stats = RunningStatistics(checkpoint_params.stats_size, checkpoint_params.loss_stats)
        ler_stats = RunningStatistics(checkpoint_params.stats_size, checkpoint_params.ler_stats)
        dt_stats = RunningStatistics(checkpoint_params.stats_size, checkpoint_params.dt_stats)

        display = checkpoint_params.display
        display_epochs = display <= 1
        if display <= 0:
            display = 0                                       # to not display anything
        elif display_epochs:
            display = max(1, int(display * iters_per_epoch))  # relative to epochs
        else:
            display = max(1, int(display))                    # iterations

        checkpoint_frequency = checkpoint_params.checkpoint_frequency
        early_stopping_frequency = checkpoint_params.early_stopping_frequency
        if early_stopping_frequency < 0:
            # set early stopping frequency to half epoch
            early_stopping_frequency = int(0.5 * iters_per_epoch)
        elif 0 < early_stopping_frequency <= 1:
            early_stopping_frequency = int(early_stopping_frequency * iters_per_epoch)  # relative to epochs
        else:
            early_stopping_frequency = int(early_stopping_frequency)

        if checkpoint_frequency < 0:
            checkpoint_frequency = early_stopping_frequency
        elif 0 < checkpoint_frequency <= 1:
            checkpoint_frequency = int(checkpoint_frequency * iters_per_epoch)  # relative to epochs
        else:
            checkpoint_frequency = int(checkpoint_frequency)

        early_stopping_enabled = self.validation_dataset is not None \
                                 and checkpoint_params.early_stopping_frequency > 0 \
                                 and checkpoint_params.early_stopping_nbest > 1
        early_stopping_best_accuracy = checkpoint_params.early_stopping_best_accuracy
        early_stopping_best_cur_nbest = checkpoint_params.early_stopping_best_cur_nbest
        early_stopping_best_at_iter = checkpoint_params.early_stopping_best_at_iter

        early_stopping_predictor = Predictor(codec=codec, text_postproc=self.txt_postproc,
                                             network=test_net)

        # Start the actual training
        # ====================================================================================

        iter = checkpoint_params.iter

        # helper function to write a checkpoint
        def make_checkpoint(base_dir, prefix, version=None):
            if version:
                checkpoint_path = os.path.abspath(os.path.join(base_dir, "{}{}.ckpt".format(prefix, version)))
            else:
                checkpoint_path = os.path.abspath(os.path.join(base_dir, "{}{:08d}.ckpt".format(prefix, iter + 1)))
            print("Storing checkpoint to '{}'".format(checkpoint_path))
            train_net.save_checkpoint(checkpoint_path)
            checkpoint_params.version = Checkpoint.VERSION
            checkpoint_params.iter = iter
            checkpoint_params.loss_stats[:] = loss_stats.values
            checkpoint_params.ler_stats[:] = ler_stats.values
            checkpoint_params.dt_stats[:] = dt_stats.values
            checkpoint_params.total_time = time.time() - train_start_time
            checkpoint_params.early_stopping_best_accuracy = early_stopping_best_accuracy
            checkpoint_params.early_stopping_best_cur_nbest = early_stopping_best_cur_nbest
            checkpoint_params.early_stopping_best_at_iter = early_stopping_best_at_iter

            with open(checkpoint_path + ".json", 'w') as f:
                f.write(json_format.MessageToJson(checkpoint_params))

            return checkpoint_path

        try:
            last_checkpoint = None
            n_infinite_losses = 0
            n_max_infinite_losses = 5

            # Training loop, can be interrupted by early stopping
            for iter in range(iter, checkpoint_params.max_iters):
                checkpoint_params.iter = iter

                iter_start_time = time.time()
                result = train_net.train_step()

                if not np.isfinite(result['loss']):
                    n_infinite_losses += 1

                    if n_max_infinite_losses == n_infinite_losses:
                        print("Error: Loss is not finite! Trying to restart from last checkpoint.")
                        if not last_checkpoint:
                            raise Exception("No checkpoint written yet. Training must be stopped.")
                        else:
                            # reload also non trainable weights, such as solver-specific variables
                            train_net.load_weights(last_checkpoint, restore_only_trainable=False)
                            continue
                    else:
                        continue

                n_infinite_losses = 0

                loss_stats.push(result['loss'])
                ler_stats.push(result['ler'])

                dt_stats.push(time.time() - iter_start_time)

                if display > 0 and iter % display == 0:
                    # apply postprocessing to display the true output
                    pred_sentence = self.txt_postproc.apply("".join(codec.decode(result["decoded"][0])))
                    gt_sentence = self.txt_postproc.apply("".join(codec.decode(result["gt"][0])))

                    if display_epochs:
                        print("#{:08f}: loss={:.8f} ler={:.8f} dt={:.8f}s".format(
                            iter / iters_per_epoch, loss_stats.mean(), ler_stats.mean(), dt_stats.mean()))
                    else:
                        print("#{:08d}: loss={:.8f} ler={:.8f} dt={:.8f}s".format(
                            iter, loss_stats.mean(), ler_stats.mean(), dt_stats.mean()))

                    # Insert utf-8 ltr/rtl direction marks for bidi support
                    lr = "\u202A\u202B"
                    print(" PRED: '{}{}{}'".format(lr[bidi.get_base_level(pred_sentence)], pred_sentence, "\u202C"))
                    print(" TRUE: '{}{}{}'".format(lr[bidi.get_base_level(gt_sentence)], gt_sentence, "\u202C"))

                if checkpoint_frequency > 0 and (iter + 1) % checkpoint_frequency == 0:
                    last_checkpoint = make_checkpoint(checkpoint_params.output_dir, checkpoint_params.output_model_prefix)

                if early_stopping_enabled and (iter + 1) % early_stopping_frequency == 0:
                    print("Checking early stopping model")

                    out_gen = early_stopping_predictor.predict_input_dataset(validation_dataset,
                                                                             progress_bar=progress_bar)
                    result = Evaluator.evaluate_single_list(map(
                        Evaluator.evaluate_single_args,
                        map(lambda d: tuple(self.txt_preproc.apply([''.join(d.ground_truth), d.sentence])), out_gen)))
                    accuracy = 1 - result["avg_ler"]

                    if accuracy > early_stopping_best_accuracy:
                        early_stopping_best_accuracy = accuracy
                        early_stopping_best_cur_nbest = 1
                        early_stopping_best_at_iter = iter + 1
                        # overwrite as best model
                        last_checkpoint = make_checkpoint(
                            checkpoint_params.early_stopping_best_model_output_dir,
                            prefix="",
                            version=checkpoint_params.early_stopping_best_model_prefix,
                        )
                        print("Found better model with accuracy of {:%}".format(early_stopping_best_accuracy))
                    else:
                        early_stopping_best_cur_nbest += 1
                        print("No better model found. Currently accuracy of {:%} at iter {} (remaining nbest = {})".
                              format(early_stopping_best_accuracy, early_stopping_best_at_iter,
                                     checkpoint_params.early_stopping_nbest - early_stopping_best_cur_nbest))

                    if accuracy > 0 and early_stopping_best_cur_nbest >= checkpoint_params.early_stopping_nbest:
                        print("Early stopping now.")
                        break

                    if accuracy >= 1:
                        print("Reached perfect score on validation set. Early stopping now.")
                        break

        except KeyboardInterrupt as e:
            print("Storing interrupted checkpoint")
            make_checkpoint(checkpoint_params.output_dir,
                            checkpoint_params.output_model_prefix,
                            "interrupted")
            raise e

        print("Total time {}s for {} iterations.".format(time.time() - train_start_time, iter))