Пример #1
0
def get_f0(args):
    fn, f0_path, f0min, f0max, tstep, semitones, outf0dir = args
    basename = os.path.basename(fn).split(".")[0]
    print("PROCESSING: " + basename)
    t = Track()
    t.name = basename
    t.get_f0(fn, f0min, f0max, timestep=tstep, semitones=semitones)
    ttslab.tofile(t, os.path.join(outf0dir, basename + "." + TRACK_EXT))
Пример #2
0
def draw_sylstruct_graph_pitch_waveform(u):
    #use seg end times to calculate start and end times for all
    #items...
    u.fill_startendtimes()

    g = nx.Graph()

    posdict = {}
    nodelist = []
    nodesizelist = []
    for word in u.get_relation("SylStructure"):
        nodelist.append(word)
        nodesizelist.append(300 * len(str(word)))
        posdict[word] = [word["end"] + word["start"] / 2, 3]
        if word.prev_item:
            g.add_edge(word.prev_item, word)
        if word.next_item:
            g.add_edge(word.next_item, word)
        g.add_edge(word.first_daughter, word)
        g.add_edge(word.last_daughter, word)
        for syl in word.get_daughters():
            nodelist.append(syl)
            nodesizelist.append(400)
            posdict[syl] = [syl["end"] + syl["start"] / 2, 2]
            if syl.prev_item:
                g.add_edge(syl.prev_item, syl)
            if syl.next_item:
                g.add_edge(syl.next_item, syl)
            g.add_edge(syl.first_daughter, syl)
            g.add_edge(syl.last_daughter, syl)
            for seg in syl.get_daughters():
                nodelist.append(seg)
                nodesizelist.append(350)
                posdict[seg] = [seg["end"] + seg["start"] / 2, 1]
                if seg.prev_item:
                    g.add_edge(seg.prev_item, seg)
                if seg.next_item:
                    g.add_edge(seg.next_item, seg)

    uttendtime = u.get_relation("Segment").tail_item["end"]
    bounds = np.array([word["end"] for word in u.get_relation("Word")])

    #get the pitch:
    d = mkdtemp()
    u["waveform"].write(os.path.join(d, "utt.wav"))
    f0t = Track()
    f0t.get_f0(os.path.join(d, "utt.wav"), semitones=True)
    shutil.rmtree(d)

    fig1 = plt.figure(
    )  #edgecolor=(0.921568627451, 0.921568627451, 0.921568627451))
    ax = fig1.add_subplot(111)
    ax.set_title("Utterance")
    # ax.set_ylim(0, 5)
    nx.draw(g, pos=posdict, ax=ax, nodelist=nodelist, node_size=nodesizelist)
    plt.xticks([], [])
    plt.yticks([1.0, 2.0, 3.0], ["segment", "syllable", "word"])

    fig2 = plt.figure()
    ax1 = fig2.add_subplot(111)
    ax1.set_title("Pitch")
    ax1.set_ylabel("Semitones (relative to 1 Hz)")
    ax1.set_xlabel("Syllables")
    plt.plot(f0t.times, f0t.values, color='green')
    ax1.set_ylim(bottom=75.0)
    plt.xticks([syl["end"] for syl in u.gr("Syllable")],
               [getsylsegstr(syl) for syl in u.gr("Syllable")])
    ax1.grid()

    fig3 = plt.figure()
    ax2 = fig3.add_subplot(111)
    decimate_factor = 10
    ax2.set_title("Waveform (decimation factor: %s)" % decimate_factor)
    ax2.set_ylabel("Amplitude")
    ax2.set_xlabel("Syllables")
    waveform = ss.decimate(u["waveform"].samples, decimate_factor)
    plt.plot(np.arange(len(waveform)) *
             (1.0 / u["waveform"].samplerate * decimate_factor),
             waveform,
             color='b')
    #ax2.set_xticks(bounds*u["waveform"].samplerate, [''] * len(bounds))
    plt.xticks([syl["end"] for syl in u.gr("Syllable")],
               [getsylsegstr(syl) for syl in u.gr("Syllable")])
    #    fig3.set_facecolor((0.921568627451, 0.921568627451, 0.921568627451))
    ax2.grid()

    #plt.show()

    return fig1, fig2, fig3
Пример #3
0
def draw_sylstruct_graph_pitch_waveform(u):
    #use seg end times to calculate start and end times for all
    #items...
    u.fill_startendtimes()

    g = nx.Graph()

    posdict = {}
    nodelist = []
    nodesizelist = []
    for word in u.get_relation("SylStructure"):
        nodelist.append(word)
        nodesizelist.append(300 * len(str(word)))
        posdict[word] = [word["end"] + word["start"] / 2, 3]
        if word.prev_item:
            g.add_edge(word.prev_item, word)
        if word.next_item:
            g.add_edge(word.next_item, word)
        g.add_edge(word.first_daughter, word)
        g.add_edge(word.last_daughter, word)
        for syl in word.get_daughters():
            nodelist.append(syl)
            nodesizelist.append(400)
            posdict[syl] = [syl["end"] + syl["start"] / 2, 2]
            if syl.prev_item:
                g.add_edge(syl.prev_item, syl)
            if syl.next_item:
                g.add_edge(syl.next_item, syl)
            g.add_edge(syl.first_daughter, syl)
            g.add_edge(syl.last_daughter, syl)
            for seg in syl.get_daughters():
                nodelist.append(seg)
                nodesizelist.append(350)
                posdict[seg] = [seg["end"] + seg["start"] / 2, 1]
                if seg.prev_item:
                    g.add_edge(seg.prev_item, seg)
                if seg.next_item:
                    g.add_edge(seg.next_item, seg)

    uttendtime = u.get_relation("Segment").tail_item["end"]
    bounds = np.array([word["end"] for word in u.get_relation("Word")])

    #get the pitch:
    d = mkdtemp()
    u["waveform"].write(os.path.join(d, "utt.wav"))
    f0t = Track()
    f0t.get_f0(os.path.join(d, "utt.wav"))
    shutil.rmtree(d)

    fig = pl.figure(
    )  #edgecolor=(0.921568627451, 0.921568627451, 0.921568627451))
    ax = fig.add_subplot(311)
    ax.set_title("Utterance")
    #    ax.set_ylim(0, 5)
    #    ax.set_xticks(bounds)
    #    ax.grid()
    nx.draw(g, pos=posdict, ax=ax, nodelist=nodelist, node_size=nodesizelist)

    ax1 = fig.add_subplot(312)
    ax1.set_title("Pitch")
    ax1.set_ylim(20.0, 300.0)
    ax1.set_ylabel("Hertz")
    pl.plot(f0t.times, f0t.values, color='green')
    pl.xticks([syl["end"] for syl in u.gr("Syllable")],
              [syl["tone"] for syl in u.gr("Syllable")])
    ax1.grid()

    ax2 = fig.add_subplot(313)
    ax2.set_title("Waveform")
    ax2.set_xlim(0, uttendtime * u["waveform"].samplerate)
    pl.plot(u["waveform"].samples, color='b')
    ax2.set_xticks(bounds * u["waveform"].samplerate, [''] * len(bounds))
    fig.set_facecolor((0.921568627451, 0.921568627451, 0.921568627451))
    #    ax2.grid()
    #    pl.show()

    #    fig.savefig("output.png")

    return fig
Пример #4
0
import sys
import array
import math

import numpy as np

import ttslab
from ttslab.trackfile import Track
ttslab.extend(Track, "ttslab.trackfile.funcs.tfuncs_praat")

def friendly_log(f):
    try:
        return math.log(f)
    except ValueError:
        return float('-1e+10')

if __name__ == "__main__":
    fn = sys.argv[1]
    outfn = sys.argv[2]
    minf0 = float(sys.argv[3])
    maxf0 = float(sys.argv[4])

    t = Track()
    t.get_f0(fn, minpitch=minf0, maxpitch=maxf0, timestep=0.005, fixocterrs=True)  #timestep hardcoded here because of hack below...
    #hack aligns samples with equiv from HTS script:
    pad = np.array([0.0, 0.0]).reshape(-1, 1)
    f0hzvalues = np.concatenate([pad, t.values, pad])
    lf0 = array.array(b"f", map(friendly_log, f0hzvalues))
    with open(outfn, "wb") as outfh:
        lf0.tofile(outfh)