Пример #1
0
def synthesis_post():
    global model, waveglow_model
    if not model:
        return redirect("/synthesis-setup")

    if request.method == "GET":
        return render_template("synthesis.html")
    else:
        text = request.form["text"]
        folder_name = get_suffix()
        results_folder = os.path.join(paths["results"], folder_name)
        os.makedirs(results_folder)
        graph_path = os.path.join(results_folder, GRAPH_FILE)
        audio_path = os.path.join(results_folder, RESULTS_FILE)
        graph_web_path = graph_path.replace("\\", "/")
        audio_web_path = audio_path.replace("\\", "/")

        synthesize(model, waveglow_model, text, inflect_engine, graph_path,
                   audio_path)
        return render_template(
            "synthesis.html",
            text=text.strip(),
            alertnative_words=get_alternative_word_suggestions(
                audio_path, text),
            graph=graph_web_path,
            audio=audio_web_path,
        )
Пример #2
0
def test_synthesize():
    model_path = os.path.join("files", "tacotron2_statedict.pt")
    waveglow_path = os.path.join("files",
                                 "waveglow_256channels_universal_v5.pt")
    graph_path = "graph.png"
    audio_path = "synthesized_audio.wav"

    model = load_model(model_path)
    assert model

    waveglow = load_waveglow_model(waveglow_path)
    assert waveglow

    text = "hello everybody my name is david attenborough"
    inflect_engine = inflect.engine()
    synthesize(model,
               waveglow,
               text,
               inflect_engine,
               graph=graph_path,
               audio=audio_path)

    assert text_similarity(text, transcribe(audio_path)) > 0.5
    assert os.path.isfile(graph_path)
    assert os.path.isfile(audio_path)

    os.remove(graph_path)
    os.remove(audio_path)
Пример #3
0
def synthesis_post():
    global model, vocoder, symbols
    if not model or not vocoder or not symbols:
        return redirect("/synthesis-setup")

    if request.method == "GET":
        return render_template("synthesis.html")
    else:
        text = request.form.getlist("text")
        if len(text) == 1:
            text = text[0]
        method = request.form["text_method"]
        split_text = method == "paragraph"
        parent_folder = os.path.join(paths["results"],
                                     datetime.now().strftime("%Y-%m"))
        os.makedirs(parent_folder, exist_ok=True)
        first_line = text[0] if type(text) == list else text
        results_folder = os.path.join(
            parent_folder,
            get_suffix() + "-" +
            re.sub("[^0-9a-zA-Z _]+", "", first_line.replace(" ", "_"))[:20])
        os.makedirs(results_folder)
        graph_path = os.path.join(results_folder, GRAPH_FILE)
        audio_path = os.path.join(results_folder, RESULTS_FILE)
        graph_web_path = os.path.relpath(graph_path).replace("\\", "/")
        audio_web_path = os.path.relpath(audio_path).replace("\\", "/")
        silence = float(request.form["silence"])
        max_decoder_steps = int(request.form["max_decoder_steps"])

        synthesize(
            model,
            text,
            symbols,
            graph_path,
            audio_path,
            vocoder,
            silence,
            max_decoder_steps=max_decoder_steps,
            split_text=split_text,
        )
        return render_template(
            "synthesis.html",
            text=text,
            method=method,
            graph=graph_web_path,
            audio=audio_web_path,
            silence=silence,
            max_decoder_steps=max_decoder_steps,
        )
def test_hifigan_synthesis():
    hifigan_model_path = os.path.join("test_samples", "hifigan.pt")
    hifigan_config_path = os.path.join("test_samples", "config.json")
    audio_path = "synthesized_audio.wav"
    transcription_model = Silero()

    hifigan = Hifigan(hifigan_model_path, hifigan_config_path)
    text = "the monkeys live"
    synthesize(
        model=FakeModelForSynthesis(),
        text=text,
        graph_path=None,
        audio_path=audio_path,
        vocoder=hifigan,
    )

    assert os.path.isfile(audio_path)
    assert similarity(
        text, transcription_model.transcribe(audio_path)) > MIN_SYNTHESIS_SCORE

    os.remove(audio_path)
def test_synthesize():
    graph_path = "graph.png"
    audio_path = "synthesized_audio.wav"

    model = FakeModel()
    vocoder = FakeVocoder()

    # Single line
    text = "hello everybody my name is david attenborough"
    synthesize(
        model=model,
        text=text,
        graph_path=graph_path,
        audio_path=audio_path,
        vocoder=vocoder,
        sample_rate=22050,
    )

    assert os.path.isfile(graph_path)
    assert os.path.isfile(audio_path)
    assert librosa.get_duration(filename=audio_path) == 1

    os.remove(graph_path)
    os.remove(audio_path)

    # Multi line
    text = [
        "the monkeys live in the jungle with their families.",
        "however, i prefer to live on the beach and enjoy the sun.",
    ]
    synthesize(
        model=model,
        text=text,
        graph_path=graph_path,
        audio_path=audio_path,
        vocoder=vocoder,
        silence_padding=0.5,
        sample_rate=22050,
    )

    assert os.path.isfile(graph_path)
    assert os.path.isfile(audio_path)
    assert librosa.get_duration(filename=audio_path) == 2.5

    os.remove(graph_path)
    os.remove(audio_path)

    # Split text
    text = (
        "the monkeys live in the jungle with their families. however, i prefer to live on the beach and enjoy the sun."
    )
    synthesize(
        model=model,
        text=text,
        graph_path=None,
        audio_path=audio_path,
        vocoder=vocoder,
        silence_padding=0.5,
        sample_rate=22050,
        split_text=True,
    )

    assert os.path.isfile(audio_path)
    assert librosa.get_duration(filename=audio_path) == 2.5

    os.remove(audio_path)