示例#1
0
def relative_local_speechrate(reft,
                              targett,
                              s=0.03,
                              realigntimes=False,
                              debug=False):
    """ calculate the relative local speech rate between targett and
        reft by DTW aligning the tracks, fitting a smoothing spline
        to the frame time difference function (smoothing factor 's')
        and using this to calculate derivative contour.
    """
    try:
        assert (targett.times[1] - targett.times[0]) == (
            reft.times[1] - reft.times[0]
        ), "constant timestep for reference and target need to be equal..."
    except AssertionError:
        print(
            "WARNING: timesteps must be equal.... (this may be spurious if 'remove_pau' was used)"
        )

    path = dtw_align(reft, targett)[-1]
    ltd = _local_timediff(path, reft.times, targett.times)
    if realigntimes:
        newreftimes = np.arange(1,
                                len(ltd) + 1) * (ltd.times[1] - ltd.times[0])
    else:
        newreftimes = ltd.times.copy()
    spline = UnivariateSpline(newreftimes, ltd.values, k=5, s=s)
    if debug:
        import pylab as pl
        pl.plot(ltd.times, ltd.values)
        pl.plot(ltd.times, spline(ltd.times))
    rlstrack = Track()
    rlstrack.times = newreftimes
    rlstrack.values = spline(rlstrack.times, 1).reshape((-1, 1))
    return rlstrack
示例#2
0
def relative_local_x(reft, targett, dtwpath, s=100, debug=False):
    """Determines the rate of change of target relative to ref, using
       alignment provided by dtwpath.
    """

    t = reft.times.copy()
    delta = np.zeros((len(t)), dtype=np.float64)
    for rti, tti in dtwpath:
        delta[rti] = targett.values[tti] - reft.values[rti]

    spline = UnivariateSpline(t, delta, k=5, s=s)
    if debug:
        import pylab as pl
        pl.subplot(211)
        pl.plot(t, reft.values, label="ref")
        temp = np.zeros((len(t)), dtype=np.float64)
        for rti, tti in dtwpath:
            temp[rti] = targett.values[tti]
        pl.plot(t, temp, label="tgt")
        pl.legend()
        pl.subplot(212)
        pl.plot(t, delta)
        pl.plot(t, spline(t))

    rlxtrack = Track()
    rlxtrack.times = t
    rlxtrack.values = spline(rlxtrack.times, 1).reshape((-1, 1))
    return rlxtrack
示例#3
0
def get_f0(args):
    fn, f0_path, f0min, f0max, tstep, semitones, outf0dir = args
    basename = os.path.basename(fn).split(".")[0]
    print("PROCESSING: " + basename)
    t = Track()
    t.name = basename
    t.get_f0(fn, f0min, f0max, timestep=tstep, semitones=semitones)
    ttslab.tofile(t, os.path.join(outf0dir, basename + "." + TRACK_EXT))
示例#4
0
def _local_timediff(path, reftimes, targettimes):
    """ determine the local time difference mapped onto reftimes...
    """
    t = reftimes[:path[-1][0] + 1]
    delta = np.zeros((len(t)), dtype=np.float64)
    for rti, tti in path:
        delta[rti] = targettimes[tti] - reftimes[rti]
    ttrack = Track()
    ttrack.times = t
    ttrack.values = delta
    return ttrack
示例#5
0
def utt_mceps(utt, shift=0.005, remove_pau=False, resettimes=False):
    temppath = mkdtemp()

    #wavs
    wfn1 = os.path.join(temppath, "1." + WAV_EXT)
    utt["waveform"].write(wfn1)
    #feats
    ffn1 = os.path.join(temppath, "1." + FEAT_EXT)
    cmds = SIG2FV % {"inputfile": wfn1,
                     "outputfile": ffn1,
                     "shift": shift}
    #print(cmds)
    os.system(cmds)

    #tracks
    t1 = Track()
    t1.load_track(ffn1)

    #cleanup
    shutil.rmtree(temppath)

    keep_intervals = []
    if remove_pau:
        u = deepcopy(utt)
        fill_startendtimes(u)
        for seg in u.gr("Segment"):
            if seg["name"] != "pau":
                keep_intervals.append((seg["start"], seg["end"]))
        indices = t1.mask_indices(keep_intervals)
        t1.times = t1.times[indices]
        t1.values = t1.values[indices]
    if resettimes:
        t1.times = np.arange(1, len(t1.times) + 1, dtype=np.float) * shift
    return t1
示例#6
0
def linearpath_distances(track, track2, metric="euclidean", VI=None):

    dist = cdist(track.values, track2.values, metric=str(metric), VI=VI)
    framedists = []
    try:
        for i in range(len(track.times)):
            framedists.append(dist[i][i])
    except IndexError:
        pass
    t = Track()
    t.values = np.array(framedists)
    t.values = t.values.reshape(-1, 1)
    t.times = np.array([track.times[i] for i in range(len(t.values))])
    if track2.numframes != track.numframes:
        print("linearpath_distances: WARNING: num frames difference is %s" % (track2.numframes - track.numframes))
    return t
示例#7
0
def dtw_distances(track, track2, metric="euclidean", VI=None):

    cumdist, dist, path = track.dtw_align(track2, metric=str(metric), VI=VI)

    framedists = []
    frametimes = []
    for pathcoord in path:
        x, y = pathcoord
        framedists.append(dist[x][y])
        frametimes.append(track.times[x])

    t = Track()
    t.values = np.array(framedists)
    t.values = t.values.reshape(-1, 1)
    t.times = np.array(frametimes)

    return t
示例#8
0
def dtw_distances(track, track2, metric="euclidean", VI=None):

    cumdist, dist, path = track.dtw_align(track2, metric=str(metric), VI=VI)

    framedists = []
    frametimes = []
    for pathcoord in path:
        x, y = pathcoord
        framedists.append(dist[x][y])
        frametimes.append(track.times[x])

    t = Track()
    t.values = np.array(framedists)
    t.values = t.values.reshape(-1, 1)
    t.times = np.array(frametimes)
    
    return t
示例#9
0
def qta_synth_utt(utt, synthfunc=synth):
    times = np.array([])
    values = np.array([])
    for phr in utt.gr("Phrase"):
        synthparms = []
        for word in phr.get_daughters():
            for syl in word.gir("SylStructure").get_daughters():
                synthparms.append([
                    syl[STARTLAB], syl[ENDLAB], syl[QTAPREFIX + "_endheight"],
                    syl[QTAPREFIX + "_slope"], syl[QTAPREFIX + "_lambd"]
                ])
        phrf0track = synthfunc(phr[QTAPREFIX + "_startpitch"], synthparms)
        times = np.concatenate((times, phrf0track.times))
        values = np.concatenate((values, phrf0track.values.flatten()))
    f0track = Track()
    f0track.times = times
    f0track.values = values.reshape((-1, 1))
    return f0track
示例#10
0
def gradient(track, h=2):
    """Estimate of the gradient using a window length 'h'... must be even... number of points is h+1
    """
    assert h % 2 == 0
    n = h // 2

    #timesteps must be constant
    period = track.times[1] - track.times[0]

    times = track.times[n:-n].copy()
    values = np.zeros(len(times), dtype=track.values.dtype)

    for i in range(len(values)):
        values[i] = (track.values[i + h] - track.values[i]) / (h * period)

    t = Track()
    t.times = times
    t.values = values
    return t
示例#11
0
    def synth(self, voice, utt, args):
        synthparms = args  #not yet implemented...
        htslabel = "\n".join(utt["hts_label"]).encode(
            "utf-8").splitlines()  #to utf-8 bytestring
        if synthparms and "use_labalignments" in synthparms:
            use_labalignments = True
        else:
            use_labalignments = False
        with HTS_EngineME(self.htsvoice_bin, self.mixfilter_bin,
                          self.pdfilter_bin) as htsengine:
            htsengine.synth(htslabel, use_labalignments=use_labalignments)
            for segt, seg in zip(htsengine.get_segtimes(), utt.gr("Segment")):
                seg["start"], seg["end"] = segt
#            utt["debug_waveform"] = htsengine.get_wav()
            f0st = 12.0 * np.log2(htsengine.get_f0())
            f0st[f0st == -np.inf] = 0.0
            f0times = np.arange(len(f0st)) * STEPSIZE
            f0track = Track()
            f0track.times = f0times
            f0track.values = f0st.reshape((-1, 1))
            #            utt["debug_f0track"] = f0track
            utt.fill_startendtimes()
            #add qta_startpitch
            for phr in utt.gr("Phrase"):
                syl = phr.first_daughter.gir("SylStructure").first_daughter
                syltrackvals = f0track.slice(f0track.index_at(syl["start"]),
                                             f0track.index_at(
                                                 syl["end"])).values.flatten()
                validvals = syltrackvals[syltrackvals.nonzero()]
                if len(validvals) > 3:
                    phr["qta_startpitch"] = np.mean(
                        validvals[:len(validvals) // 4])
                else:
                    phr["qta_startpitch"] = BACKOFF_STARTPITCH
            utt = voice.pitchmodel(utt, ("synth", None))
            f0spline = InterpolatedUnivariateSpline(utt["f0track"].times,
                                                    utt["f0track"].values)
            newf0 = f0spline(f0track.times)
            #HEURISTIC ADJUSTMENT CLOSER TO HTS DYNAMICS
            m = np.mean(f0track.values[f0track.values.nonzero()])
            newf0 *= 1.3  #more dynamic
            m2 = np.mean(newf0)
            newf0 += m - m2
            ### TRANSFER UNVOICED SECTIONS
            # newf0[f0track.values.flatten() == 0.0] = 0.0
            # import pylab as pl
            # pl.plot(f0track.times, f0track.values)
            # pl.plot(f0track.times, newf0)
            ###
            newf0 = 2**(newf0 / 12.0)
            newf0 = tolf0(newf0)
            htsengine.synth(htslabel,
                            lf0=newf0,
                            use_labalignments=use_labalignments)
            #populate utt with waveform and segment alignments
            utt["waveform"] = htsengine.get_wav()
        return utt
示例#12
0
def synth(startpitch, synthparms, numpoints=100, plot=False):
    times = np.zeros(len(synthparms) * numpoints)
    contour = np.zeros(len(synthparms) * numpoints)
    for i, synthparm in enumerate(synthparms):
        if i == 0:
            p0 = startpitch
            dp0 = 0.0
            ddp0 = 0.0
        if synthparm[0] != synthparms[
                i - 1][1]:  #not contiguous (e.g. a pause is present)
            dp0 = 0.0
            ddp0 = 0.0
        if any([e is None for e in synthparm
                ]):  #no parameters available for this syllable, skip...
            dp0 = 0.0
            ddp0 = 0.0
            continue
        utt_t = np.linspace(synthparm[0],
                            synthparm[1],
                            numpoints,
                            endpoint=False)
        times[i * numpoints:i * numpoints + numpoints] = utt_t
        syl_t = utt_t - synthparm[0]  #start at 0.0
        #y = mx + c
        syltarget_m = synthparm[3]
        syltarget_c = get_intercept(syl_t[-1], synthparm[2], synthparm[3])
        scontour = sylcontour(syl_t, syltarget_m, syltarget_c, p0, dp0, ddp0,
                              synthparm[4])
        if plot:
            pl.plot(syl_t + synthparm[0],
                    np.polyval(coefs, syl_t),
                    linestyle="dashed",
                    color="red")
            pl.plot(syl_t + synthparm[0], scontour, color="green")
        spline = InterpolatedUnivariateSpline(syl_t, scontour)
        contour[i * numpoints:i * numpoints + numpoints] = scontour
        p0, dp0, ddp0, temp = spline.derivatives(syl_t[-1])
    synthtrack = Track()
    synthtrack.times = times[contour.nonzero()].copy()
    synthtrack.values = contour[contour.nonzero()].reshape((-1, 1)).copy()
    return synthtrack
示例#13
0
def make_joincoefs(featconfig, wav_dir):
    """ Make joincoefs...
    """
    
    mcep_dir = os.path.join(os.getcwd(), MCEP_DIR)
    os.mkdir(mcep_dir)
    join_dir = os.path.join(os.getcwd(), JOIN_DIR)
    os.mkdir(join_dir)
    pm_dir = os.path.join(os.getcwd(), PM_DIR)
    f0_dir = os.path.join(os.getcwd(), F0_DIR)

    fbank_order = featconfig.get("SIG2FV_MCEP", "FBANK_ORDER")
    melcep_order = featconfig.get("SIG2FV_MCEP", "MELCEP_ORDER")
    melcep_coefs = featconfig.get("SIG2FV_MCEP", "MELCEP_COEFS")
    preemph_coef = featconfig.get("SIG2FV_MCEP", "PREEMPH_COEF")
    window_factor = featconfig.get("SIG2FV_MCEP", "WINDOW_FACTOR")
    window_type = featconfig.get("SIG2FV_MCEP", "WINDOW_TYPE")
    
    print("MAKING JOINCOEFS...")
    map(extract_mceps,
        [(wavfilename, fbank_order, window_factor, preemph_coef, melcep_order, window_type, melcep_coefs, mcep_dir, pm_dir)
         for wavfilename in sorted(glob(os.path.join(wav_dir, ".".join(["*", WAV_EXT]))))])

    print("NORMALISING AND JOINING F0 AND MCEPS...")
    #Normalising mceps and f0s:
    upper = +1.0
    lower = -1.0

    mceptracks = {}
    for fn in glob(os.path.join(mcep_dir, ".".join(["*", MCEP_EXT]))):
        t = Track()
        t.load_track(fn)
        mceptracks[os.path.basename(fn)] = t

    allmcepvecs = np.concatenate([mceptracks[tn].values for tn in sorted(mceptracks)])
    mcepmean = allmcepvecs.mean(0)
    mcepstd = allmcepvecs.std(0)
    for k in mceptracks:
        mceptracks[k].values = (mceptracks[k].values - mcepmean) / (4 * mcepstd) * (upper - lower)

    f0tracks = {}
    for fn in glob(os.path.join(f0_dir, ".".join(["*", F0_EXT]))):
        t = Track()
        t.load_track(fn)
        f0tracks[os.path.basename(fn)] = t

    #allf0vecs = np.concatenate([f0tracks[tn].values for tn in sorted(f0tracks)])
    allf0vecs = np.concatenate([f0tracks[tn].values[f0tracks[tn].values.nonzero()] for tn in sorted(f0tracks)])
    f0mean = allf0vecs.mean(0)
    f0std = allf0vecs.std(0)
    for k in f0tracks:
        f0tracks[k].values = (f0tracks[k].values - f0mean) / (4 * f0std) * (upper - lower)

    #Add f0 to mcep track:
    for k1, k2 in zip(sorted(mceptracks), sorted(f0tracks)):
        mceptracks[k1].values = np.concatenate((mceptracks[k1].values, f0tracks[k2].values), 1)

    for fn in mceptracks:
        basename = os.path.splitext(os.path.basename(fn))[0]
        ttslab.tofile(mceptracks[fn], os.path.join(join_dir, basename + "." + JOIN_EXT))
示例#14
0
def timenorm_tonecontour_dtw(reftrack, track):
    #normalise location:
    tmean = deepcopy(reftrack)
    tmean.values = tmean.values - tmean.values.mean()
    #normalise location:
    t1 = deepcopy(track)
    t1.values = t1.values - t1.values.mean()
    #smooth function to facilitate smoother warping:
    s = UnivariateSpline(t1.times, t1.values)
    t1.values = s(t1.times).reshape((-1, 1))
    #align:
    dtw = dtw_align(tmean, t1)
    #construct new track using mapping:
    newtrack = Track()
    newtrack.times = np.copy(tmean.times)
    values = np.zeros(len(tmean), np.float64)
    for i in range(len(tmean)):
        es = [e[1] for e in dtw[2] if e[0] == i]
        values[i] = np.mean(track.values[es])
    newtrack.values = values.reshape((-1, 1))
    smoothtrack = newtrack.newtrack_from_sspline(newtrack.times,
                                                 s=len(newtrack.times) / 10.0)
    return smoothtrack
示例#15
0
def utt_distance(utt,
                 utt2,
                 method="dtw",
                 metric="euclidean",
                 sig2fv=SIG2FV,
                 VI=None):
    """ Uses Trackfile class' distance measurements to compare utts...
        See docstring in tfuncs_analysis.py for more details...
    """

    temppath = mkdtemp()

    #wavs
    wfn1 = os.path.join(temppath, "1." + WAV_EXT)
    wfn2 = os.path.join(temppath, "2." + WAV_EXT)
    utt["waveform"].write(wfn1)
    utt2["waveform"].write(wfn2)
    #feats
    ffn1 = os.path.join(temppath, "1." + FEAT_EXT)
    ffn2 = os.path.join(temppath, "2." + FEAT_EXT)
    cmds = SIG2FV % {"inputfile": wfn1, "outputfile": ffn1}
    #print(cmds)
    os.system(cmds)
    cmds = SIG2FV % {"inputfile": wfn2, "outputfile": ffn2}
    #print(cmds)
    os.system(cmds)

    #tracks
    t1 = Track()
    t1.load_track(ffn1)
    t2 = Track()
    t2.load_track(ffn2)

    #compare and save
    t3 = t1.distances(t2, method=method, metric=metric, VI=VI)

    shutil.rmtree(temppath)

    return t3
示例#16
0
def utt_distance(utt, utt2, method="dtw", metric="euclidean", sig2fv=SIG2FV, VI=None):
    """ Uses Trackfile class' distance measurements to compare utts...
        See docstring in tfuncs_analysis.py for more details...
    """

    temppath = mkdtemp()

    #wavs
    wfn1 = os.path.join(temppath, "1." + WAV_EXT)
    wfn2 = os.path.join(temppath, "2." + WAV_EXT)
    utt["waveform"].write(wfn1)
    utt2["waveform"].write(wfn2)
    #feats
    ffn1 = os.path.join(temppath, "1." + FEAT_EXT)
    ffn2 = os.path.join(temppath, "2." + FEAT_EXT)
    cmds = SIG2FV % {"inputfile": wfn1,
                     "outputfile": ffn1}
    #print(cmds)
    os.system(cmds)
    cmds = SIG2FV % {"inputfile": wfn2,
                     "outputfile": ffn2}
    #print(cmds)
    os.system(cmds)

    #tracks
    t1 = Track()
    t1.load_track(ffn1)
    t2 = Track()
    t2.load_track(ffn2)

    #compare and save
    t3 = t1.distances(t2, method=method, metric=metric, VI=VI)

    shutil.rmtree(temppath)

    return t3
示例#17
0
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()
    for unit, word in zip(u.gr("Unit"), u.gr("Word")):
        assert unit["name"] == word["name"]
        unit["start"] = word["start"]
        unit["end"] = word["end"]

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join([os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    for i, unit in enumerate(u.gr("Unit")):
        if i == 0:
            boundarytimes.append(unit["start"])
        boundarytimes.append(unit["end"])

    #convert boundtimes into sample ranges:
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []
    for bound in boundarytimes:
        lpcsampleranges.append(lpctrack.index_at(bound))
        f0sampleranges.append(f0track.index_at(bound))
        joinsamples.append(jointrack.values[jointrack.index_at(bound)])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()
    
    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, i in zip(joinsamples[:-1], joinsamples[1:],
                                                   lpcsampleranges[:-1], lpcsampleranges[1:],
                                                   f0sampleranges[:-1], f0sampleranges[1:],
                                                   units):
#        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(lti0, lti1, copy=True) #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
                                        restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
示例#18
0
def synth2(startpitch,
           synthparms,
           numpoints=100,
           plot=False,
           minlambd=10.0,
           dlambd=5.0):
    """ Limit the strength of articulation to avoid acceleration in
        opposite direction of endheight target...
    """
    times = np.zeros(len(synthparms) * numpoints)
    contour = np.zeros(len(synthparms) * numpoints)
    for i, synthparm in enumerate(synthparms):
        if i == 0:
            p0 = startpitch
            dp0 = 0.0
            ddp0 = 0.0
        if synthparm[0] != synthparms[
                i - 1][1]:  #not contiguous (e.g. a pause is present)
            dp0 = 0.0
            ddp0 = 0.0
        if any([e is None for e in synthparm
                ]):  #no parameters available for this syllable, skip...
            dp0 = 0.0
            ddp0 = 0.0
            continue
        utt_t = np.linspace(synthparm[0],
                            synthparm[1],
                            numpoints,
                            endpoint=False)
        times[i * numpoints:i * numpoints + numpoints] = utt_t
        syl_t = utt_t - synthparm[0]  #start at 0.0
        #y = mx + c
        syltarget_m = synthparm[3]
        syltarget_c = get_intercept(syl_t[-1], synthparm[2], synthparm[3])
        while True:  #resynthesise with lower strength until constraint met
            scontour = sylcontour(syl_t, syltarget_m, syltarget_c, p0, dp0,
                                  ddp0, synthparm[4])
            spline = InterpolatedUnivariateSpline(syl_t, scontour)
            #check acceleration
            if synthparm[4] <= minlambd:
                break
            accels = spline(syl_t, 2)
            if synthparm[2] > p0:
                if np.all(accels > 0.0):
                    break
            elif synthparm[2] < p0:
                if np.all(accels < 0.0):
                    break
            else:
                break
            synthparm[4] -= dlambd
            if synthparm[4] < minlambd:
                synthparm[4] = minlambd
        if plot:
            pl.plot(syl_t + synthparm[0],
                    np.polyval(coefs, syl_t),
                    linestyle="dashed",
                    color="red")
            pl.plot(syl_t + synthparm[0], scontour, color="green")
        contour[i * numpoints:i * numpoints + numpoints] = scontour
        p0, dp0, ddp0, temp = spline.derivatives(syl_t[-1])
    synthtrack = Track()
    synthtrack.times = times[contour.nonzero()].copy()
    synthtrack.values = contour[contour.nonzero()].reshape((-1, 1)).copy()
    return synthtrack
示例#19
0
def draw_sylstruct_graph_pitch_waveform(u):
    #use seg end times to calculate start and end times for all
    #items...
    u.fill_startendtimes()

    g = nx.Graph()

    posdict = {}
    nodelist = []
    nodesizelist = []
    for word in u.get_relation("SylStructure"):
        nodelist.append(word)
        nodesizelist.append(300 * len(str(word)))
        posdict[word] = [word["end"] + word["start"] / 2, 3]
        if word.prev_item:
            g.add_edge(word.prev_item, word)
        if word.next_item:
            g.add_edge(word.next_item, word)
        g.add_edge(word.first_daughter, word)
        g.add_edge(word.last_daughter, word)
        for syl in word.get_daughters():
            nodelist.append(syl)
            nodesizelist.append(400)
            posdict[syl] = [syl["end"] + syl["start"] / 2, 2]
            if syl.prev_item:
                g.add_edge(syl.prev_item, syl)
            if syl.next_item:
                g.add_edge(syl.next_item, syl)
            g.add_edge(syl.first_daughter, syl)
            g.add_edge(syl.last_daughter, syl)
            for seg in syl.get_daughters():
                nodelist.append(seg)
                nodesizelist.append(350)
                posdict[seg] = [seg["end"] + seg["start"] / 2, 1]
                if seg.prev_item:
                    g.add_edge(seg.prev_item, seg)
                if seg.next_item:
                    g.add_edge(seg.next_item, seg)

    uttendtime = u.get_relation("Segment").tail_item["end"]
    bounds = np.array([word["end"] for word in u.get_relation("Word")])

    #get the pitch:
    d = mkdtemp()
    u["waveform"].write(os.path.join(d, "utt.wav"))
    f0t = Track()
    f0t.get_f0(os.path.join(d, "utt.wav"), semitones=True)
    shutil.rmtree(d)

    fig1 = plt.figure(
    )  #edgecolor=(0.921568627451, 0.921568627451, 0.921568627451))
    ax = fig1.add_subplot(111)
    ax.set_title("Utterance")
    # ax.set_ylim(0, 5)
    nx.draw(g, pos=posdict, ax=ax, nodelist=nodelist, node_size=nodesizelist)
    plt.xticks([], [])
    plt.yticks([1.0, 2.0, 3.0], ["segment", "syllable", "word"])

    fig2 = plt.figure()
    ax1 = fig2.add_subplot(111)
    ax1.set_title("Pitch")
    ax1.set_ylabel("Semitones (relative to 1 Hz)")
    ax1.set_xlabel("Syllables")
    plt.plot(f0t.times, f0t.values, color='green')
    ax1.set_ylim(bottom=75.0)
    plt.xticks([syl["end"] for syl in u.gr("Syllable")],
               [getsylsegstr(syl) for syl in u.gr("Syllable")])
    ax1.grid()

    fig3 = plt.figure()
    ax2 = fig3.add_subplot(111)
    decimate_factor = 10
    ax2.set_title("Waveform (decimation factor: %s)" % decimate_factor)
    ax2.set_ylabel("Amplitude")
    ax2.set_xlabel("Syllables")
    waveform = ss.decimate(u["waveform"].samples, decimate_factor)
    plt.plot(np.arange(len(waveform)) *
             (1.0 / u["waveform"].samplerate * decimate_factor),
             waveform,
             color='b')
    #ax2.set_xticks(bounds*u["waveform"].samplerate, [''] * len(bounds))
    plt.xticks([syl["end"] for syl in u.gr("Syllable")],
               [getsylsegstr(syl) for syl in u.gr("Syllable")])
    #    fig3.set_facecolor((0.921568627451, 0.921568627451, 0.921568627451))
    ax2.grid()

    #plt.show()

    return fig1, fig2, fig3
示例#20
0
def draw_sylstruct_graph_pitch_waveform(u):
    #use seg end times to calculate start and end times for all
    #items...
    u.fill_startendtimes()

    g = nx.Graph()

    posdict = {}
    nodelist = []
    nodesizelist = []
    for word in u.get_relation("SylStructure"):
        nodelist.append(word)
        nodesizelist.append(300 * len(str(word)))
        posdict[word] = [word["end"] + word["start"] / 2, 3]
        if word.prev_item:
            g.add_edge(word.prev_item, word)
        if word.next_item:
            g.add_edge(word.next_item, word)
        g.add_edge(word.first_daughter, word)
        g.add_edge(word.last_daughter, word)
        for syl in word.get_daughters():
            nodelist.append(syl)
            nodesizelist.append(400)
            posdict[syl] = [syl["end"] + syl["start"] / 2, 2]
            if syl.prev_item:
                g.add_edge(syl.prev_item, syl)
            if syl.next_item:
                g.add_edge(syl.next_item, syl)
            g.add_edge(syl.first_daughter, syl)
            g.add_edge(syl.last_daughter, syl)
            for seg in syl.get_daughters():
                nodelist.append(seg)
                nodesizelist.append(350)
                posdict[seg] = [seg["end"] + seg["start"] / 2, 1]
                if seg.prev_item:
                    g.add_edge(seg.prev_item, seg)
                if seg.next_item:
                    g.add_edge(seg.next_item, seg)

    uttendtime = u.get_relation("Segment").tail_item["end"]
    bounds = np.array([word["end"] for word in u.get_relation("Word")])

    #get the pitch:
    d = mkdtemp()
    u["waveform"].write(os.path.join(d, "utt.wav"))
    f0t = Track()
    f0t.get_f0(os.path.join(d, "utt.wav"))
    shutil.rmtree(d)

    fig = pl.figure(
    )  #edgecolor=(0.921568627451, 0.921568627451, 0.921568627451))
    ax = fig.add_subplot(311)
    ax.set_title("Utterance")
    #    ax.set_ylim(0, 5)
    #    ax.set_xticks(bounds)
    #    ax.grid()
    nx.draw(g, pos=posdict, ax=ax, nodelist=nodelist, node_size=nodesizelist)

    ax1 = fig.add_subplot(312)
    ax1.set_title("Pitch")
    ax1.set_ylim(20.0, 300.0)
    ax1.set_ylabel("Hertz")
    pl.plot(f0t.times, f0t.values, color='green')
    pl.xticks([syl["end"] for syl in u.gr("Syllable")],
              [syl["tone"] for syl in u.gr("Syllable")])
    ax1.grid()

    ax2 = fig.add_subplot(313)
    ax2.set_title("Waveform")
    ax2.set_xlim(0, uttendtime * u["waveform"].samplerate)
    pl.plot(u["waveform"].samples, color='b')
    ax2.set_xticks(bounds * u["waveform"].samplerate, [''] * len(bounds))
    fig.set_facecolor((0.921568627451, 0.921568627451, 0.921568627451))
    #    ax2.grid()
    #    pl.show()

    #    fig.savefig("output.png")

    return fig
示例#21
0
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()
    for unit, word in zip(u.gr("Unit"), u.gr("Word")):
        assert unit["name"] == word["name"]
        unit["start"] = word["start"]
        unit["end"] = word["end"]

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join(
        [os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    for i, unit in enumerate(u.gr("Unit")):
        if i == 0:
            boundarytimes.append(unit["start"])
        boundarytimes.append(unit["end"])

    #convert boundtimes into sample ranges:
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []
    for bound in boundarytimes:
        lpcsampleranges.append(lpctrack.index_at(bound))
        f0sampleranges.append(f0track.index_at(bound))
        joinsamples.append(jointrack.values[jointrack.index_at(bound)])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()

    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, i in zip(joinsamples[:-1],
                                                   joinsamples[1:],
                                                   lpcsampleranges[:-1],
                                                   lpcsampleranges[1:],
                                                   f0sampleranges[:-1],
                                                   f0sampleranges[1:], units):
        #        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(
            lti0, lti1, copy=True)  #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(
            restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
            restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
示例#22
0
import sys
import array
import math

import numpy as np

import ttslab
from ttslab.trackfile import Track
ttslab.extend(Track, "ttslab.trackfile.funcs.tfuncs_praat")

def friendly_log(f):
    try:
        return math.log(f)
    except ValueError:
        return float('-1e+10')

if __name__ == "__main__":
    fn = sys.argv[1]
    outfn = sys.argv[2]
    minf0 = float(sys.argv[3])
    maxf0 = float(sys.argv[4])

    t = Track()
    t.get_f0(fn, minpitch=minf0, maxpitch=maxf0, timestep=0.005, fixocterrs=True)  #timestep hardcoded here because of hack below...
    #hack aligns samples with equiv from HTS script:
    pad = np.array([0.0, 0.0]).reshape(-1, 1)
    f0hzvalues = np.concatenate([pad, t.values, pad])
    lf0 = array.array(b"f", map(friendly_log, f0hzvalues))
    with open(outfn, "wb") as outfh:
        lf0.tofile(outfh)
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join([os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    durations = []
    starttime = 0.0
    for seg in u.get_relation("Segment"):
        endtime = float(seg["end"])
        if "cl_end" in seg:
            splittime = float(seg["cl_end"])
        else:
            splittime = (endtime + starttime) / 2
            #TODO: should still add 25% split if diphthong...
        boundarytimes.append([starttime, splittime, endtime])
        durations.extend([splittime - starttime, endtime - splittime])
        starttime = endtime

    #convert boundtimes into sample ranges (and flatten):
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []

    #DEMITASSE: If not pruning pau halfphones:
    # for bounds in boundarytimes:
    #     lpcsampleranges.extend([lpctrack.get_index_at(bounds[0]),
    #                             lpctrack.get_index_at(bounds[1])])
    #     joinsamples.extend([jointrack.get_sample_at(bounds[0]),
    #                         jointrack.get_sample_at(bounds[1])])
    # lpcsampleranges.append(len(lpctrack))
    # joinsamples.append(jointrack.get_sample_at(len(jointrack)))

    #DEMITASSE: If pruning pau halfphones:
    durations = durations[1:-1]
    for i, bounds in enumerate(boundarytimes):
        if i == 0:
            lpcsampleranges.append(lpctrack.index_at(bounds[1]))
            f0sampleranges.append(f0track.index_at(bounds[1]))
            joinsamples.append(jointrack.values[bounds[1]])
        else:
            lpcsampleranges.extend([lpctrack.index_at(bounds[0]),
                                    lpctrack.index_at(bounds[1])])
            f0sampleranges.extend([f0track.index_at(bounds[0]),
                                   f0track.index_at(bounds[1])])
            joinsamples.extend([jointrack.values[bounds[0]],
                                jointrack.values[bounds[1]]])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()
    
    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, dur, i in zip(joinsamples[:-1], joinsamples[1:],
                                                        lpcsampleranges[:-1], lpcsampleranges[1:],
                                                        f0sampleranges[:-1], f0sampleranges[1:],
                                                        durations,
                                                        units):
#        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(lti0, lti1, copy=True) #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        i["dur"] = dur
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
                                        restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
def make_joincoefs(featconfig, wav_dir):
    """ Make joincoefs...
    """

    mcep_dir = os.path.join(os.getcwd(), MCEP_DIR)
    os.mkdir(mcep_dir)
    join_dir = os.path.join(os.getcwd(), JOIN_DIR)
    os.mkdir(join_dir)
    pm_dir = os.path.join(os.getcwd(), PM_DIR)
    f0_dir = os.path.join(os.getcwd(), F0_DIR)

    fbank_order = featconfig.get("SIG2FV_MCEP", "FBANK_ORDER")
    melcep_order = featconfig.get("SIG2FV_MCEP", "MELCEP_ORDER")
    melcep_coefs = featconfig.get("SIG2FV_MCEP", "MELCEP_COEFS")
    preemph_coef = featconfig.get("SIG2FV_MCEP", "PREEMPH_COEF")
    window_factor = featconfig.get("SIG2FV_MCEP", "WINDOW_FACTOR")
    window_type = featconfig.get("SIG2FV_MCEP", "WINDOW_TYPE")

    print("MAKING JOINCOEFS...")
    map(extract_mceps,
        [(wavfilename, fbank_order, window_factor, preemph_coef, melcep_order,
          window_type, melcep_coefs, mcep_dir, pm_dir)
         for wavfilename in sorted(
             glob(os.path.join(wav_dir, ".".join(["*", WAV_EXT]))))])

    print("NORMALISING AND JOINING F0 AND MCEPS...")
    #Normalising mceps and f0s:
    upper = +1.0
    lower = -1.0

    mceptracks = {}
    for fn in glob(os.path.join(mcep_dir, ".".join(["*", MCEP_EXT]))):
        t = Track()
        t.load_track(fn)
        mceptracks[os.path.basename(fn)] = t

    allmcepvecs = np.concatenate(
        [mceptracks[tn].values for tn in sorted(mceptracks)])
    mcepmean = allmcepvecs.mean(0)
    mcepstd = allmcepvecs.std(0)
    for k in mceptracks:
        mceptracks[k].values = (mceptracks[k].values -
                                mcepmean) / (4 * mcepstd) * (upper - lower)

    f0tracks = {}
    for fn in glob(os.path.join(f0_dir, ".".join(["*", F0_EXT]))):
        t = Track()
        t.load_track(fn)
        f0tracks[os.path.basename(fn)] = t

    #allf0vecs = np.concatenate([f0tracks[tn].values for tn in sorted(f0tracks)])
    allf0vecs = np.concatenate([
        f0tracks[tn].values[f0tracks[tn].values.nonzero()]
        for tn in sorted(f0tracks)
    ])
    f0mean = allf0vecs.mean(0)
    f0std = allf0vecs.std(0)
    for k in f0tracks:
        f0tracks[k].values = (f0tracks[k].values -
                              f0mean) / (4 * f0std) * (upper - lower)

    #Add f0 to mcep track:
    for k1, k2 in zip(sorted(mceptracks), sorted(f0tracks)):
        mceptracks[k1].values = np.concatenate(
            (mceptracks[k1].values, f0tracks[k2].values), 1)

    for fn in mceptracks:
        basename = os.path.splitext(os.path.basename(fn))[0]
        ttslab.tofile(mceptracks[fn],
                      os.path.join(join_dir, basename + "." + JOIN_EXT))
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join(
        [os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    durations = []
    starttime = 0.0
    for seg in u.get_relation("Segment"):
        endtime = float(seg["end"])
        if "cl_end" in seg:
            splittime = float(seg["cl_end"])
        else:
            splittime = (endtime + starttime) / 2
            #TODO: should still add 25% split if diphthong...
        boundarytimes.append([starttime, splittime, endtime])
        durations.extend([splittime - starttime, endtime - splittime])
        starttime = endtime

    #convert boundtimes into sample ranges (and flatten):
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []

    #DEMITASSE: If not pruning pau halfphones:
    # for bounds in boundarytimes:
    #     lpcsampleranges.extend([lpctrack.get_index_at(bounds[0]),
    #                             lpctrack.get_index_at(bounds[1])])
    #     joinsamples.extend([jointrack.get_sample_at(bounds[0]),
    #                         jointrack.get_sample_at(bounds[1])])
    # lpcsampleranges.append(len(lpctrack))
    # joinsamples.append(jointrack.get_sample_at(len(jointrack)))

    #DEMITASSE: If pruning pau halfphones:
    durations = durations[1:-1]
    for i, bounds in enumerate(boundarytimes):
        if i == 0:
            lpcsampleranges.append(lpctrack.index_at(bounds[1]))
            f0sampleranges.append(f0track.index_at(bounds[1]))
            joinsamples.append(jointrack.values[bounds[1]])
        else:
            lpcsampleranges.extend(
                [lpctrack.index_at(bounds[0]),
                 lpctrack.index_at(bounds[1])])
            f0sampleranges.extend(
                [f0track.index_at(bounds[0]),
                 f0track.index_at(bounds[1])])
            joinsamples.extend(
                [jointrack.values[bounds[0]], jointrack.values[bounds[1]]])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()

    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, dur, i in zip(
            joinsamples[:-1], joinsamples[1:], lpcsampleranges[:-1],
            lpcsampleranges[1:], f0sampleranges[:-1], f0sampleranges[1:],
            durations, units):
        #        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(
            lti0, lti1, copy=True)  #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        i["dur"] = dur
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(
            restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
            restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
示例#26
0
    def hts_synth(self, utt, processname):
        htsparms = self.engine_parms.copy()
        htsparms["-of"] = "%(tempolf0_file)s"
        if "htsparms" in utt:
            htsparms.update(utt["htsparms"])   #parm overrides for this utt...

        #build command string and execute:
        cmds = self.hts_bin
        for k in htsparms:
            if htsparms[k]:
                if htsparms[k] is True:
                    cmds += " " + k
                else:
                    cmds += " " + k + " " + str(htsparms[k])
        cmds += " %(tempilab_file)s"

        fd1, tempwav_file = mkstemp(prefix="ttslab_", suffix=".wav")
        fd2, tempilab_file = mkstemp(prefix="ttslab_")
        fd3, tempolab_file = mkstemp(prefix="ttslab_")
        fd4, tempolf0_file = mkstemp(prefix="ttslab_")

        cmds = cmds % {'models_dir': self.models_dir,
                       'tempwav_file': tempwav_file,
                       'tempilab_file': tempilab_file,
                       'tempolab_file': tempolab_file,
                       'tempolf0_file': tempolf0_file}
        #print(cmds)
        with codecs.open(tempilab_file, "w", encoding="utf-8") as outfh:
            outfh.write("\n".join(utt["hts_label"]))

        os.system(cmds)

        #load seg endtimes into utt:
        with open(tempolab_file) as infh:
            lines = infh.readlines()
            segs = utt.get_relation("Segment").as_list()
            assert len(segs) == len(lines)
            for line, seg in zip(lines, segs):
                seg["end"] = hts_labels_tone.htk_int_to_float(line.split()[1])

        #load audio:
        utt["waveform"] = Waveform(tempwav_file)

        #load lf0:
        f0 = np.exp(np.fromfile(tempolf0_file, "float32")) #load and lf0 to hertz
        #to semitones relative to 1Hz:
        f0[f0.nonzero()] = 12.0 * np.log2(f0[f0.nonzero()]) # 12 * log2 (F0 / F0reference) where F0reference = 1
        f0t = Track()
        f0t.values = f0
        f0t.times = np.arange(len(f0), dtype=np.float64) * 0.005
        utt["f0"] = f0t

        #cleanup tempfiles:
        os.close(fd1)
        os.close(fd2)
        os.close(fd3)
        os.close(fd4)
        os.remove(tempwav_file)
        os.remove(tempolab_file)
        os.remove(tempilab_file)
        os.remove(tempolf0_file)

        return utt