示例#1
0
    def run(args):
        config = UserConfig(DEFAULT_YAML, args.config, learning=True)
        speech_featurizer = TFSpeechFeaturizer(config["speech_config"])
        text_featurizer = TextFeaturizer(config["decoder_config"])

        tf.random.set_seed(0)
        assert args.saved_model

        if args.tfrecords:
            test_dataset = ASRTFRecordDataset(
                config["learning_config"]["dataset_config"]["test_paths"],
                config["learning_config"]["dataset_config"]["tfrecords_dir"],
                speech_featurizer,
                text_featurizer,
                "test",
                augmentations=config["learning_config"]["augmentations"],
                shuffle=False).create(
                    config["learning_config"]["running_config"]["batch_size"])
        else:
            test_dataset = ASRSliceDataset(
                stage="test",
                speech_featurizer=speech_featurizer,
                text_featurizer=text_featurizer,
                data_paths=config["learning_config"]["dataset_config"]
                ["eval_paths"],
                shuffle=False).create(
                    config["learning_config"]["running_config"]["batch_size"])

        # build model
        f, c = speech_featurizer.compute_feature_dim()
        conformer = Conformer(vocabulary_size=text_featurizer.num_classes,
                              **config["model_config"])
        conformer._build([1, 50, f, c])
        conformer.summary(line_length=100)

        conformer_tester = BaseTester(
            config=config["learning_config"]["running_config"],
            saved_path=args.saved_model,
            from_weights=args.from_weights)
        conformer_tester.compile(conformer, speech_featurizer, text_featurizer)
        conformer_tester.run(test_dataset)
示例#2
0
parser.add_argument("--saved",
                    type=str,
                    default=None,
                    help="Path to saved model")

parser.add_argument("output",
                    type=str,
                    default=None,
                    help="TFLite file path to be exported")

args = parser.parse_args()

assert args.saved and args.output

config = UserConfig(DEFAULT_YAML, args.config, learning=True)
speech_featurizer = TFSpeechFeaturizer(config["speech_config"])
text_featurizer = CharFeaturizer(config["decoder_config"])

# build model
conformer = Conformer(**config["model_config"],
                      vocabulary_size=text_featurizer.num_classes)
conformer._build(speech_featurizer.shape)
conformer.load_weights(args.saved)
conformer.summary(line_length=150)
conformer.add_featurizers(speech_featurizer, text_featurizer)

concrete_func = conformer.make_tflite_function(
    greedy=True).get_concrete_function()
converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
converter.optimizations = [tf.lite.Optimize.DEFAULT]
示例#3
0
def main():
    parser = argparse.ArgumentParser(prog="SelfAttentionDS2 Histogram")

    parser.add_argument("--config", type=str, default=None,
                        help="Config file")

    parser.add_argument("--audio", type=str, default=None,
                        help="Audio file")

    parser.add_argument("--saved_model", type=str, default=None,
                        help="Saved model")

    parser.add_argument("--from_weights", type=bool, default=False,
                        help="Load from weights")

    parser.add_argument("--output", type=str, default=None,
                        help="Output dir storing histograms")

    args = parser.parse_args()

    config = UserConfig(args.config, args.config, learning=False)
    speech_featurizer = SpeechFeaturizer(config["speech_config"])
    text_featurizer = TextFeaturizer(config["decoder_config"])
    text_featurizer.add_scorer(Scorer(**text_featurizer.decoder_config["lm_config"],
                                      vocabulary=text_featurizer.vocab_array))

    f, c = speech_featurizer.compute_feature_dim()
    satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c],
                                      arch_config=config["model_config"],
                                      num_classes=text_featurizer.num_classes)
    satt_ds2_model._build([1, 50, f, c])

    if args.from_weights:
        satt_ds2_model.load_weights(args.saved_model)
    else:
        saved_model = tf.keras.models.load_model(args.saved_model)
        satt_ds2_model.set_weights(saved_model.get_weights())

    satt_ds2_model.summary(line_length=100)

    satt_ds2_model.add_featurizers(speech_featurizer, text_featurizer)

    signal = read_raw_audio(args.audio, speech_featurizer.sample_rate)
    features = speech_featurizer.extract(signal)
    decoded = satt_ds2_model.recognize_beam(tf.expand_dims(features, 0), lm=True)
    print(bytes_to_string(decoded.numpy()))

    for i in range(1, len(satt_ds2_model.base_model.layers)):
        func = tf.keras.backend.function([satt_ds2_model.base_model.input],
                                         [satt_ds2_model.base_model.layers[i].output])
        data = func([np.expand_dims(features, 0), 1])[0][0]
        print(data.shape)
        data = data.flatten()
        plt.hist(data, 200, color='green', histtype="stepfilled")
        plt.title(f"Output of {satt_ds2_model.base_model.layers[i].name}", fontweight="bold")
        plt.savefig(os.path.join(
            args.output, f"{i}_{satt_ds2_model.base_model.layers[i].name}.png"))
        plt.clf()
        plt.cla()
        plt.close()

    fc = satt_ds2_model(tf.expand_dims(features, 0), training=False)
    plt.hist(fc[0].numpy().flatten(), 200, color="green", histtype="stepfilled")
    plt.title(f"Output of {satt_ds2_model.layers[-1].name}", fontweight="bold")
    plt.savefig(os.path.join(args.output, f"{satt_ds2_model.layers[-1].name}.png"))
    plt.clf()
    plt.cla()
    plt.close()
    fc = tf.nn.softmax(fc)
    plt.hist(fc[0].numpy().flatten(), 10, color="green", histtype="stepfilled")
    plt.title("Output of softmax", fontweight="bold")
    plt.savefig(os.path.join(args.output, "softmax_hist.png"))
    plt.clf()
    plt.cla()
    plt.close()
    plt.hist(features.flatten(), 200, color="green", histtype="stepfilled")
    plt.title("Log Mel Spectrogram", fontweight="bold")
    plt.savefig(os.path.join(args.output, "log_mel_spectrogram.png"))
    plt.clf()
    plt.cla()
    plt.close()
示例#4
0
    def run(args):
        config = UserConfig(DEFAULT_YAML, args.config, learning=True)
        speech_featurizer = TFSpeechFeaturizer(config["speech_config"])
        text_featurizer = TextFeaturizer(config["decoder_config"])

        tf.random.set_seed(2020)

        if args.mixed_precision:
            policy = tf.keras.mixed_precision.experimental.Policy(
                "mixed_float16")
            tf.keras.mixed_precision.experimental.set_policy(policy)
            print("Enabled mixed precision training")

        if args.tfrecords:
            train_dataset = ASRTFRecordDataset(
                config["learning_config"]["dataset_config"]["train_paths"],
                config["learning_config"]["dataset_config"]["tfrecords_dir"],
                speech_featurizer,
                text_featurizer,
                "train",
                augmentations=config["learning_config"]["augmentations"],
                shuffle=True,
            )

            eval_dataset = ASRTFRecordDataset(
                config["learning_config"]["dataset_config"]["eval_paths"],
                config["learning_config"]["dataset_config"]["tfrecords_dir"],
                speech_featurizer,
                text_featurizer,
                "eval",
                shuffle=False)
        else:
            train_dataset = ASRSliceDataset(
                stage="train",
                speech_featurizer=speech_featurizer,
                text_featurizer=text_featurizer,
                data_paths=config["learning_config"]["dataset_config"]
                ["train_paths"],
                augmentations=config["learning_config"]["augmentations"],
                shuffle=True,
            )

            eval_dataset = ASRSliceDataset(stage="eval",
                                           speech_featurizer=speech_featurizer,
                                           text_featurizer=text_featurizer,
                                           data_paths=config["learning_config"]
                                           ["dataset_config"]["eval_paths"],
                                           shuffle=False)

        conformer_trainer = TransducerTrainer(
            config=config["learning_config"]["running_config"],
            text_featurizer=text_featurizer,
            is_mixed_precision=args.mixed_precision)

        with conformer_trainer.strategy.scope():
            # build model
            f, c = speech_featurizer.compute_feature_dim()
            conformer = Conformer(**config["model_config"],
                                  vocabulary_size=text_featurizer.num_classes)
            conformer._build([1, 50, f, c])

            optimizer_config = config["learning_config"]["optimizer_config"]
            optimizer = tf.keras.optimizers.Adam(
                TransformerSchedule(
                    d_model=config["model_config"]["dmodel"],
                    warmup_steps=optimizer_config["warmup_steps"],
                    max_lr=(0.05 /
                            math.sqrt(config["model_config"]["dmodel"]))),
                beta_1=float(optimizer_config["beta1"]),
                beta_2=float(optimizer_config["beta2"]),
                epsilon=float(optimizer_config["epsilon"]))

        conformer_trainer.compile(model=conformer,
                                  optimizer=optimizer,
                                  max_to_keep=args.max_ckpts)

        conformer_trainer.fit(train_dataset, eval_dataset,
                              args.eval_train_ratio)

        if args.export:
            if args.from_weights:
                conformer_trainer.model.save_weights(args.export)
            else:
                conformer_trainer.model.save(args.export)
示例#5
0
    def run(args):
        assert args.mode in modes, f"Mode must in {modes}"

        config = UserConfig(DEFAULT_YAML, args.config, learning=True)
        speech_featurizer = SpeechFeaturizer(config["speech_config"])
        text_featurizer = TextFeaturizer(config["decoder_config"])

        if args.mode == "train":
            tf.random.set_seed(2020)

            if args.mixed_precision:
                policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16")
                tf.keras.mixed_precision.experimental.set_policy(policy)
                print("Enabled mixed precision training")

            ctc_trainer = CTCTrainer(speech_featurizer, text_featurizer,
                                     config["learning_config"]["running_config"],
                                     args.mixed_precision)

            if args.tfrecords:
                train_dataset = ASRTFRecordDataset(
                    config["learning_config"]["dataset_config"]["train_paths"],
                    config["learning_config"]["dataset_config"]["tfrecords_dir"],
                    speech_featurizer, text_featurizer, "train",
                    augmentations=config["learning_config"]["augmentations"], shuffle=True,
                )
                eval_dataset = ASRTFRecordDataset(
                    config["learning_config"]["dataset_config"]["eval_paths"],
                    config["learning_config"]["dataset_config"]["tfrecords_dir"],
                    speech_featurizer, text_featurizer, "eval", shuffle=False
                )
            else:
                train_dataset = ASRSliceDataset(
                    stage="train", speech_featurizer=speech_featurizer,
                    text_featurizer=text_featurizer,
                    data_paths=config["learning_config"]["dataset_config"]["train_paths"],
                    augmentations=config["learning_config"]["augmentations"], shuffle=True,
                )
                eval_dataset = ASRSliceDataset(
                    stage="eval", speech_featurizer=speech_featurizer,
                    text_featurizer=text_featurizer,
                    data_paths=config["learning_config"]["dataset_config"]["eval_paths"],
                    shuffle=False
                )

            # Build DS2 model
            f, c = speech_featurizer.compute_feature_dim()
            with ctc_trainer.strategy.scope():
                satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c],
                                                  arch_config=config["model_config"],
                                                  num_classes=text_featurizer.num_classes)
                satt_ds2_model._build([1, 50, f, c])
                optimizer = create_optimizer(
                    name=config["learning_config"]["optimizer_config"]["name"],
                    d_model=config["model_config"]["att"]["head_size"],
                    **config["learning_config"]["optimizer_config"]["config"]
                )
            # Compile
            ctc_trainer.compile(satt_ds2_model, optimizer, max_to_keep=args.max_ckpts)

            ctc_trainer.fit(train_dataset, eval_dataset, args.eval_train_ratio)

            if args.export:
                if args.from_weights:
                    ctc_trainer.model.save_weights(args.export)
                else:
                    ctc_trainer.model.save(args.export)

        elif args.mode == "test":
            tf.random.set_seed(0)
            assert args.export

            text_featurizer.add_scorer(
                Scorer(**text_featurizer.decoder_config["lm_config"],
                       vocabulary=text_featurizer.vocab_array))

            # Build DS2 model
            f, c = speech_featurizer.compute_feature_dim()
            satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c],
                                              arch_config=config["model_config"],
                                              num_classes=text_featurizer.num_classes)
            satt_ds2_model._build([1, 50, f, c])
            satt_ds2_model.summary(line_length=100)
            optimizer = create_optimizer(
                name=config["learning_config"]["optimizer_config"]["name"],
                d_model=config["model_config"]["att"]["head_size"],
                **config["learning_config"]["optimizer_config"]["config"]
            )

            batch_size = config["learning_config"]["running_config"]["batch_size"]
            if args.tfrecords:
                test_dataset = ASRTFRecordDataset(
                    config["learning_config"]["dataset_config"]["test_paths"],
                    config["learning_config"]["dataset_config"]["tfrecords_dir"],
                    speech_featurizer, text_featurizer, "test",
                    augmentations=config["learning_config"]["augmentations"], shuffle=False
                ).create(batch_size * args.eval_train_ratio)
            else:
                test_dataset = ASRSliceDataset(
                    stage="test", speech_featurizer=speech_featurizer,
                    text_featurizer=text_featurizer,
                    data_paths=config["learning_config"]["dataset_config"]["test_paths"],
                    augmentations=config["learning_config"]["augmentations"], shuffle=False
                ).create(batch_size * args.eval_train_ratio)

            ctc_tester = BaseTester(
                config=config["learning_config"]["running_config"],
                saved_path=args.export, from_weights=args.from_weights
            )
            ctc_tester.compile(satt_ds2_model, speech_featurizer, text_featurizer)
            ctc_tester.run(test_dataset)

        else:
            assert args.export

            # Build DS2 model
            f, c = speech_featurizer.compute_feature_dim()
            satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c],
                                              arch_config=config["model_config"],
                                              num_classes=text_featurizer.num_classes)
            satt_ds2_model._build([1, 50, f, c])
            optimizer = create_optimizer(
                name=config["learning_config"]["optimizer_config"]["name"],
                d_model=config["model_config"]["att"]["head_size"],
                **config["learning_config"]["optimizer_config"]["config"]
            )

            def save_func(**kwargs):
                if args.from_weights:
                    kwargs["model"].save_weights(args.export)
                else:
                    kwargs["model"].save(args.export)

            save_from_checkpoint(func=save_func,
                                 outdir=config["learning_config"]["running_config"]["outdir"],
                                 model=satt_ds2_model, optimizer=optimizer)
示例#6
0
def main():
    tf.keras.backend.clear_session()

    parser = argparse.ArgumentParser(prog="Deep Speech 2 Tester")

    parser.add_argument("--config",
                        "-c",
                        type=str,
                        default=DEFAULT_YAML,
                        help="The file path of model configuration file")

    parser.add_argument("--saved_path",
                        "-e",
                        type=str,
                        default=None,
                        help="Path to the model file to be exported")

    parser.add_argument("--from_weights",
                        type=bool,
                        default=False,
                        help="Whether to save or load only weights")

    parser.add_argument("--tfrecords",
                        type=bool,
                        default=False,
                        help="Whether to use tfrecords dataset")

    parser.add_argument("--batch_size",
                        type=int,
                        default=1,
                        help="Batch size for testing")

    args = parser.parse_args()

    tf.random.set_seed(0)
    assert args.export

    config = UserConfig(DEFAULT_YAML, args.config, learning=True)
    speech_featurizer = TFSpeechFeaturizer(config["speech_config"])
    text_featurizer = TextFeaturizer(config["decoder_config"])
    # Build DS2 model
    f, c = speech_featurizer.compute_feature_dim()
    ds2_model = DeepSpeech2(input_shape=[None, f, c],
                            arch_config=config["model_config"],
                            num_classes=text_featurizer.num_classes,
                            name="deepspeech2")
    ds2_model._build([1, 50, f, c])
    ds2_model.summary(line_length=100)

    if args.tfrecords:
        test_dataset = ASRTFRecordDataset(
            config["learning_config"]["dataset_config"]["test_paths"],
            config["learning_config"]["dataset_config"]["tfrecords_dir"],
            speech_featurizer,
            text_featurizer,
            "test",
            augmentations=config["learning_config"]["augmentations"],
            shuffle=False).create(args.batch_size)
    else:
        test_dataset = ASRSliceDataset(stage="test",
                                       speech_featurizer=speech_featurizer,
                                       text_featurizer=text_featurizer,
                                       data_paths=config["learning_config"]
                                       ["dataset_config"]["eval_paths"],
                                       shuffle=False).create(args.batch_size)

    ctc_tester = BaseTester(config=config["learning_config"]["running_config"],
                            saved_path=args.saved_path,
                            from_weights=args.from_weights)
    ctc_tester.compile(ds2_model, speech_featurizer, text_featurizer)
    ctc_tester.run(test_dataset)
示例#7
0
def main():
    tf.keras.backend.clear_session()

    parser = argparse.ArgumentParser(prog="Deep Speech 2 Training")

    parser.add_argument("--config",
                        "-c",
                        type=str,
                        default=DEFAULT_YAML,
                        help="The file path of model configuration file")

    parser.add_argument("--export",
                        "-e",
                        type=str,
                        default=None,
                        help="Path to the model file to be exported")

    parser.add_argument("--mixed_precision",
                        type=bool,
                        default=False,
                        help="Whether to use mixed precision training")

    parser.add_argument("--save_weights",
                        type=bool,
                        default=False,
                        help="Whether to save or load only weights")

    parser.add_argument("--max_ckpts",
                        type=int,
                        default=10,
                        help="Max number of checkpoints to keep")

    parser.add_argument(
        "--eval_train_ratio",
        type=int,
        default=1,
        help="ratio between train batch size and eval batch size")

    parser.add_argument("--tfrecords",
                        type=bool,
                        default=False,
                        help="Whether to use tfrecords dataset")

    args = parser.parse_args()

    config = UserConfig(DEFAULT_YAML, args.config, learning=True)
    speech_featurizer = TFSpeechFeaturizer(config["speech_config"])
    text_featurizer = TextFeaturizer(config["decoder_config"])

    tf.random.set_seed(2020)

    if args.mixed_precision:
        policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16")
        tf.keras.mixed_precision.experimental.set_policy(policy)
        print("Enabled mixed precision training")

    if args.tfrecords:
        train_dataset = ASRTFRecordDataset(
            config["learning_config"]["dataset_config"]["train_paths"],
            config["learning_config"]["dataset_config"]["tfrecords_dir"],
            speech_featurizer,
            text_featurizer,
            "train",
            augmentations=config["learning_config"]["augmentations"],
            shuffle=True,
        )
        eval_dataset = ASRTFRecordDataset(
            config["learning_config"]["dataset_config"]["eval_paths"],
            config["learning_config"]["dataset_config"]["tfrecords_dir"],
            speech_featurizer,
            text_featurizer,
            "eval",
            shuffle=False)
    else:
        train_dataset = ASRSliceDataset(
            stage="train",
            speech_featurizer=speech_featurizer,
            text_featurizer=text_featurizer,
            data_paths=config["learning_config"]["dataset_config"]
            ["eval_paths"],
            augmentations=config["learning_config"]["augmentations"],
            shuffle=True)
        eval_dataset = ASRSliceDataset(stage="train",
                                       speech_featurizer=speech_featurizer,
                                       text_featurizer=text_featurizer,
                                       data_paths=config["learning_config"]
                                       ["dataset_config"]["eval_paths"],
                                       shuffle=True)

    ctc_trainer = CTCTrainer(speech_featurizer, text_featurizer,
                             config["learning_config"]["running_config"],
                             args.mixed_precision)
    # Build DS2 model
    f, c = speech_featurizer.compute_feature_dim()
    with ctc_trainer.strategy.scope():
        ds2_model = DeepSpeech2(input_shape=[None, f, c],
                                arch_config=config["model_config"],
                                num_classes=text_featurizer.num_classes,
                                name="deepspeech2")
        ds2_model._build([1, 50, f, c])
    # Compile
    ctc_trainer.compile(ds2_model,
                        config["learning_config"]["optimizer_config"],
                        max_to_keep=args.max_ckpts)

    ctc_trainer.fit(train_dataset, eval_dataset, args.eval_train_ratio)

    if args.export:
        if args.save_weights:
            ctc_trainer.model.save_weights(args.export)
        else:
            ctc_trainer.model.save(args.export)
示例#8
0
def main():
    parser = argparse.ArgumentParser(prog="SelfAttentionDS2 Histogram")

    parser.add_argument("--config", type=str, default=None, help="Config file")

    parser.add_argument("--audio", type=str, default=None, help="Audio file")

    parser.add_argument("--saved_model",
                        type=str,
                        default=None,
                        help="Saved model")

    parser.add_argument("--from_weights",
                        type=bool,
                        default=False,
                        help="Load from weights")

    parser.add_argument("--output",
                        type=str,
                        default=None,
                        help="Output dir storing histograms")

    args = parser.parse_args()

    config = UserConfig(args.config, args.config, learning=False)
    speech_featurizer = SpeechFeaturizer(config["speech_config"])
    text_featurizer = CharFeaturizer(config["decoder_config"])
    text_featurizer.add_scorer(
        Scorer(**text_featurizer.decoder_config["lm_config"],
               vocabulary=text_featurizer.vocab_array))

    f, c = speech_featurizer.compute_feature_dim()
    satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c],
                                      arch_config=config["model_config"],
                                      num_classes=text_featurizer.num_classes)
    satt_ds2_model._build([1, 50, f, c])

    if args.from_weights:
        satt_ds2_model.load_weights(args.saved_model)
    else:
        saved_model = tf.keras.models.load_model(args.saved_model)
        satt_ds2_model.set_weights(saved_model.get_weights())

    satt_ds2_model.summary(line_length=100)

    satt_ds2_model.add_featurizers(speech_featurizer, text_featurizer)

    signal = read_raw_audio(args.audio, speech_featurizer.sample_rate)
    features = speech_featurizer.extract(signal)
    decoded = satt_ds2_model.recognize_beam(tf.expand_dims(features, 0),
                                            lm=True)
    print(bytes_to_string(decoded.numpy()))

    # for i in range(1, len(satt_ds2_model.base_model.layers)):
    #     func = tf.keras.backend.function([satt_ds2_model.base_model.input],
    #                                      [satt_ds2_model.base_model.layers[i].output])
    #     data = func([np.expand_dims(features, 0), 1])[0][0]
    #     print(data.shape)
    #     plt.figure(figsize=(16, 5))
    #     ax = plt.gca()
    #     im = ax.imshow(data.T, origin="lower", aspect="auto")
    #     ax.set_title(f"{satt_ds2_model.base_model.layers[i].name}", fontweight="bold")
    #     divider = make_axes_locatable(ax)
    #     cax = divider.append_axes("right", size="5%", pad=0.05)
    #     plt.colorbar(im, cax=cax)
    #     plt.savefig(os.path.join(
    #         args.output, f"{i}_{satt_ds2_model.base_model.layers[i].name}.png"))
    #     plt.clf()
    #     plt.cla()
    #     plt.close()

    fc = satt_ds2_model(tf.expand_dims(features, 0), training=False)
    plt.figure(figsize=(16, 5))
    ax = plt.gca()
    ax.set_title(f"{satt_ds2_model.layers[-1].name}", fontweight="bold")
    im = ax.imshow(fc[0].numpy().T, origin="lower", aspect="auto")
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    plt.colorbar(im, cax=cax)
    plt.savefig(
        os.path.join(args.output, f"{satt_ds2_model.layers[-1].name}.png"))
    plt.clf()
    plt.cla()
    plt.close()
    fc = tf.nn.softmax(fc)
    plt.figure(figsize=(16, 5))
    ax = plt.gca()
    ax.set_title("Softmax", fontweight="bold")
    im = ax.imshow(fc[0].numpy().T, origin="lower", aspect="auto")
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    plt.colorbar(im, cax=cax)
    plt.savefig(os.path.join(args.output, "softmax.png"))
    plt.clf()
    plt.cla()
    plt.close()
    plt.figure(figsize=(16, 5))
    ax = plt.gca()
    ax.set_title("Log Mel Spectrogram", fontweight="bold")
    im = ax.imshow(features[:, :, 0].T, origin="lower", aspect="auto")
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    plt.colorbar(im, cax=cax)
    plt.savefig(os.path.join(args.output, "features.png"))
    plt.clf()
    plt.cla()
    plt.close()
示例#9
0
    def run(args):
        assert args.mode in modes, f"Mode must in {modes}"

        config = UserConfig(DEFAULT_YAML, args.config, learning=True)

        if args.mode == "train":
            tf.random.set_seed(2020)

            if args.mixed_precision:
                policy = tf.keras.mixed_precision.experimental.Policy(
                    "mixed_float16")
                tf.keras.mixed_precision.experimental.set_policy(policy)
                print("Enabled mixed precision training")

            dataset = SeganDataset(
                "train",
                config["learning_config"]["dataset_config"]["train_paths"],
                config["learning_config"]["dataset_config"]["noise_config"],
                config["speech_config"],
                shuffle=True)

            segan_trainer = SeganTrainer(
                config["speech_config"],
                config["learning_config"]["running_config"],
                args.mixed_precision)

            segan_trainer.compile(
                config["model_config"],
                config["learning_config"]["optimizer_config"],
                max_to_keep=args.max_ckpts)
            segan_trainer.fit(train_dataset=dataset)

            if args.export:
                if args.from_weights:
                    segan_trainer.generator.save_weights(args.export)
                else:
                    segan_trainer.generator.save(args.export)
        elif args.mode == "test":
            tf.random.set_seed(0)
            assert args.export

            dataset = SeganDataset(
                "test",
                config["learning_config"]["dataset_config"]["test_paths"],
                config["learning_config"]["dataset_config"]["noise_config"],
                config["speech_config"],
                shuffle=False).create_test()

            segan_tester = SeganTester(
                config["speech_config"],
                config["learning_config"]["running_config"],
                args.export,
                from_weights=args.from_weights)

            segan_tester.compile(config["model_config"])
            segan_tester.run(dataset)

        else:
            assert args.export
            segan_trainer = SeganTrainer(
                config["speech_config"],
                config["learning_config"]["running_config"],
                args.mixed_precision)
            segan_trainer.compile(
                config["model_config"],
                config["learning_config"]["optimizer_config"])
            segan_trainer.load_checkpoint()

            if args.from_weights:
                segan_trainer.generator.save_weights(args.export)
            else:
                segan_trainer.generator.save(args.export)