Пример #1
0
def make_joincoefs(featconfig, wav_dir):
    """ Make joincoefs...
    """
    
    mcep_dir = os.path.join(os.getcwd(), MCEP_DIR)
    os.mkdir(mcep_dir)
    join_dir = os.path.join(os.getcwd(), JOIN_DIR)
    os.mkdir(join_dir)
    pm_dir = os.path.join(os.getcwd(), PM_DIR)
    f0_dir = os.path.join(os.getcwd(), F0_DIR)

    fbank_order = featconfig.get("SIG2FV_MCEP", "FBANK_ORDER")
    melcep_order = featconfig.get("SIG2FV_MCEP", "MELCEP_ORDER")
    melcep_coefs = featconfig.get("SIG2FV_MCEP", "MELCEP_COEFS")
    preemph_coef = featconfig.get("SIG2FV_MCEP", "PREEMPH_COEF")
    window_factor = featconfig.get("SIG2FV_MCEP", "WINDOW_FACTOR")
    window_type = featconfig.get("SIG2FV_MCEP", "WINDOW_TYPE")
    
    print("MAKING JOINCOEFS...")
    map(extract_mceps,
        [(wavfilename, fbank_order, window_factor, preemph_coef, melcep_order, window_type, melcep_coefs, mcep_dir, pm_dir)
         for wavfilename in sorted(glob(os.path.join(wav_dir, ".".join(["*", WAV_EXT]))))])

    print("NORMALISING AND JOINING F0 AND MCEPS...")
    #Normalising mceps and f0s:
    upper = +1.0
    lower = -1.0

    mceptracks = {}
    for fn in glob(os.path.join(mcep_dir, ".".join(["*", MCEP_EXT]))):
        t = Track()
        t.load_track(fn)
        mceptracks[os.path.basename(fn)] = t

    allmcepvecs = np.concatenate([mceptracks[tn].values for tn in sorted(mceptracks)])
    mcepmean = allmcepvecs.mean(0)
    mcepstd = allmcepvecs.std(0)
    for k in mceptracks:
        mceptracks[k].values = (mceptracks[k].values - mcepmean) / (4 * mcepstd) * (upper - lower)

    f0tracks = {}
    for fn in glob(os.path.join(f0_dir, ".".join(["*", F0_EXT]))):
        t = Track()
        t.load_track(fn)
        f0tracks[os.path.basename(fn)] = t

    #allf0vecs = np.concatenate([f0tracks[tn].values for tn in sorted(f0tracks)])
    allf0vecs = np.concatenate([f0tracks[tn].values[f0tracks[tn].values.nonzero()] for tn in sorted(f0tracks)])
    f0mean = allf0vecs.mean(0)
    f0std = allf0vecs.std(0)
    for k in f0tracks:
        f0tracks[k].values = (f0tracks[k].values - f0mean) / (4 * f0std) * (upper - lower)

    #Add f0 to mcep track:
    for k1, k2 in zip(sorted(mceptracks), sorted(f0tracks)):
        mceptracks[k1].values = np.concatenate((mceptracks[k1].values, f0tracks[k2].values), 1)

    for fn in mceptracks:
        basename = os.path.splitext(os.path.basename(fn))[0]
        ttslab.tofile(mceptracks[fn], os.path.join(join_dir, basename + "." + JOIN_EXT))
Пример #2
0
def utt_mceps(utt, shift=0.005, remove_pau=False, resettimes=False):
    temppath = mkdtemp()

    #wavs
    wfn1 = os.path.join(temppath, "1." + WAV_EXT)
    utt["waveform"].write(wfn1)
    #feats
    ffn1 = os.path.join(temppath, "1." + FEAT_EXT)
    cmds = SIG2FV % {"inputfile": wfn1,
                     "outputfile": ffn1,
                     "shift": shift}
    #print(cmds)
    os.system(cmds)

    #tracks
    t1 = Track()
    t1.load_track(ffn1)

    #cleanup
    shutil.rmtree(temppath)

    keep_intervals = []
    if remove_pau:
        u = deepcopy(utt)
        fill_startendtimes(u)
        for seg in u.gr("Segment"):
            if seg["name"] != "pau":
                keep_intervals.append((seg["start"], seg["end"]))
        indices = t1.mask_indices(keep_intervals)
        t1.times = t1.times[indices]
        t1.values = t1.values[indices]
    if resettimes:
        t1.times = np.arange(1, len(t1.times) + 1, dtype=np.float) * shift
    return t1
Пример #3
0
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()
    for unit, word in zip(u.gr("Unit"), u.gr("Word")):
        assert unit["name"] == word["name"]
        unit["start"] = word["start"]
        unit["end"] = word["end"]

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join([os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    for i, unit in enumerate(u.gr("Unit")):
        if i == 0:
            boundarytimes.append(unit["start"])
        boundarytimes.append(unit["end"])

    #convert boundtimes into sample ranges:
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []
    for bound in boundarytimes:
        lpcsampleranges.append(lpctrack.index_at(bound))
        f0sampleranges.append(f0track.index_at(bound))
        joinsamples.append(jointrack.values[jointrack.index_at(bound)])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()
    
    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, i in zip(joinsamples[:-1], joinsamples[1:],
                                                   lpcsampleranges[:-1], lpcsampleranges[1:],
                                                   f0sampleranges[:-1], f0sampleranges[1:],
                                                   units):
#        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(lti0, lti1, copy=True) #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
                                        restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
Пример #4
0
def utt_distance(utt,
                 utt2,
                 method="dtw",
                 metric="euclidean",
                 sig2fv=SIG2FV,
                 VI=None):
    """ Uses Trackfile class' distance measurements to compare utts...
        See docstring in tfuncs_analysis.py for more details...
    """

    temppath = mkdtemp()

    #wavs
    wfn1 = os.path.join(temppath, "1." + WAV_EXT)
    wfn2 = os.path.join(temppath, "2." + WAV_EXT)
    utt["waveform"].write(wfn1)
    utt2["waveform"].write(wfn2)
    #feats
    ffn1 = os.path.join(temppath, "1." + FEAT_EXT)
    ffn2 = os.path.join(temppath, "2." + FEAT_EXT)
    cmds = SIG2FV % {"inputfile": wfn1, "outputfile": ffn1}
    #print(cmds)
    os.system(cmds)
    cmds = SIG2FV % {"inputfile": wfn2, "outputfile": ffn2}
    #print(cmds)
    os.system(cmds)

    #tracks
    t1 = Track()
    t1.load_track(ffn1)
    t2 = Track()
    t2.load_track(ffn2)

    #compare and save
    t3 = t1.distances(t2, method=method, metric=metric, VI=VI)

    shutil.rmtree(temppath)

    return t3
Пример #5
0
def utt_distance(utt, utt2, method="dtw", metric="euclidean", sig2fv=SIG2FV, VI=None):
    """ Uses Trackfile class' distance measurements to compare utts...
        See docstring in tfuncs_analysis.py for more details...
    """

    temppath = mkdtemp()

    #wavs
    wfn1 = os.path.join(temppath, "1." + WAV_EXT)
    wfn2 = os.path.join(temppath, "2." + WAV_EXT)
    utt["waveform"].write(wfn1)
    utt2["waveform"].write(wfn2)
    #feats
    ffn1 = os.path.join(temppath, "1." + FEAT_EXT)
    ffn2 = os.path.join(temppath, "2." + FEAT_EXT)
    cmds = SIG2FV % {"inputfile": wfn1,
                     "outputfile": ffn1}
    #print(cmds)
    os.system(cmds)
    cmds = SIG2FV % {"inputfile": wfn2,
                     "outputfile": ffn2}
    #print(cmds)
    os.system(cmds)

    #tracks
    t1 = Track()
    t1.load_track(ffn1)
    t2 = Track()
    t2.load_track(ffn2)

    #compare and save
    t3 = t1.distances(t2, method=method, metric=metric, VI=VI)

    shutil.rmtree(temppath)

    return t3
Пример #6
0
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join(
        [os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    durations = []
    starttime = 0.0
    for seg in u.get_relation("Segment"):
        endtime = float(seg["end"])
        if "cl_end" in seg:
            splittime = float(seg["cl_end"])
        else:
            splittime = (endtime + starttime) / 2
            #TODO: should still add 25% split if diphthong...
        boundarytimes.append([starttime, splittime, endtime])
        durations.extend([splittime - starttime, endtime - splittime])
        starttime = endtime

    #convert boundtimes into sample ranges (and flatten):
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []

    #DEMITASSE: If not pruning pau halfphones:
    # for bounds in boundarytimes:
    #     lpcsampleranges.extend([lpctrack.get_index_at(bounds[0]),
    #                             lpctrack.get_index_at(bounds[1])])
    #     joinsamples.extend([jointrack.get_sample_at(bounds[0]),
    #                         jointrack.get_sample_at(bounds[1])])
    # lpcsampleranges.append(len(lpctrack))
    # joinsamples.append(jointrack.get_sample_at(len(jointrack)))

    #DEMITASSE: If pruning pau halfphones:
    durations = durations[1:-1]
    for i, bounds in enumerate(boundarytimes):
        if i == 0:
            lpcsampleranges.append(lpctrack.index_at(bounds[1]))
            f0sampleranges.append(f0track.index_at(bounds[1]))
            joinsamples.append(jointrack.values[bounds[1]])
        else:
            lpcsampleranges.extend(
                [lpctrack.index_at(bounds[0]),
                 lpctrack.index_at(bounds[1])])
            f0sampleranges.extend(
                [f0track.index_at(bounds[0]),
                 f0track.index_at(bounds[1])])
            joinsamples.extend(
                [jointrack.values[bounds[0]], jointrack.values[bounds[1]]])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()

    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, dur, i in zip(
            joinsamples[:-1], joinsamples[1:], lpcsampleranges[:-1],
            lpcsampleranges[1:], f0sampleranges[:-1], f0sampleranges[1:],
            durations, units):
        #        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(
            lti0, lti1, copy=True)  #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        i["dur"] = dur
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(
            restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
            restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
Пример #7
0
def make_joincoefs(featconfig, wav_dir):
    """ Make joincoefs...
    """

    mcep_dir = os.path.join(os.getcwd(), MCEP_DIR)
    os.mkdir(mcep_dir)
    join_dir = os.path.join(os.getcwd(), JOIN_DIR)
    os.mkdir(join_dir)
    pm_dir = os.path.join(os.getcwd(), PM_DIR)
    f0_dir = os.path.join(os.getcwd(), F0_DIR)

    fbank_order = featconfig.get("SIG2FV_MCEP", "FBANK_ORDER")
    melcep_order = featconfig.get("SIG2FV_MCEP", "MELCEP_ORDER")
    melcep_coefs = featconfig.get("SIG2FV_MCEP", "MELCEP_COEFS")
    preemph_coef = featconfig.get("SIG2FV_MCEP", "PREEMPH_COEF")
    window_factor = featconfig.get("SIG2FV_MCEP", "WINDOW_FACTOR")
    window_type = featconfig.get("SIG2FV_MCEP", "WINDOW_TYPE")

    print("MAKING JOINCOEFS...")
    map(extract_mceps,
        [(wavfilename, fbank_order, window_factor, preemph_coef, melcep_order,
          window_type, melcep_coefs, mcep_dir, pm_dir)
         for wavfilename in sorted(
             glob(os.path.join(wav_dir, ".".join(["*", WAV_EXT]))))])

    print("NORMALISING AND JOINING F0 AND MCEPS...")
    #Normalising mceps and f0s:
    upper = +1.0
    lower = -1.0

    mceptracks = {}
    for fn in glob(os.path.join(mcep_dir, ".".join(["*", MCEP_EXT]))):
        t = Track()
        t.load_track(fn)
        mceptracks[os.path.basename(fn)] = t

    allmcepvecs = np.concatenate(
        [mceptracks[tn].values for tn in sorted(mceptracks)])
    mcepmean = allmcepvecs.mean(0)
    mcepstd = allmcepvecs.std(0)
    for k in mceptracks:
        mceptracks[k].values = (mceptracks[k].values -
                                mcepmean) / (4 * mcepstd) * (upper - lower)

    f0tracks = {}
    for fn in glob(os.path.join(f0_dir, ".".join(["*", F0_EXT]))):
        t = Track()
        t.load_track(fn)
        f0tracks[os.path.basename(fn)] = t

    #allf0vecs = np.concatenate([f0tracks[tn].values for tn in sorted(f0tracks)])
    allf0vecs = np.concatenate([
        f0tracks[tn].values[f0tracks[tn].values.nonzero()]
        for tn in sorted(f0tracks)
    ])
    f0mean = allf0vecs.mean(0)
    f0std = allf0vecs.std(0)
    for k in f0tracks:
        f0tracks[k].values = (f0tracks[k].values -
                              f0mean) / (4 * f0std) * (upper - lower)

    #Add f0 to mcep track:
    for k1, k2 in zip(sorted(mceptracks), sorted(f0tracks)):
        mceptracks[k1].values = np.concatenate(
            (mceptracks[k1].values, f0tracks[k2].values), 1)

    for fn in mceptracks:
        basename = os.path.splitext(os.path.basename(fn))[0]
        ttslab.tofile(mceptracks[fn],
                      os.path.join(join_dir, basename + "." + JOIN_EXT))
Пример #8
0
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join([os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    durations = []
    starttime = 0.0
    for seg in u.get_relation("Segment"):
        endtime = float(seg["end"])
        if "cl_end" in seg:
            splittime = float(seg["cl_end"])
        else:
            splittime = (endtime + starttime) / 2
            #TODO: should still add 25% split if diphthong...
        boundarytimes.append([starttime, splittime, endtime])
        durations.extend([splittime - starttime, endtime - splittime])
        starttime = endtime

    #convert boundtimes into sample ranges (and flatten):
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []

    #DEMITASSE: If not pruning pau halfphones:
    # for bounds in boundarytimes:
    #     lpcsampleranges.extend([lpctrack.get_index_at(bounds[0]),
    #                             lpctrack.get_index_at(bounds[1])])
    #     joinsamples.extend([jointrack.get_sample_at(bounds[0]),
    #                         jointrack.get_sample_at(bounds[1])])
    # lpcsampleranges.append(len(lpctrack))
    # joinsamples.append(jointrack.get_sample_at(len(jointrack)))

    #DEMITASSE: If pruning pau halfphones:
    durations = durations[1:-1]
    for i, bounds in enumerate(boundarytimes):
        if i == 0:
            lpcsampleranges.append(lpctrack.index_at(bounds[1]))
            f0sampleranges.append(f0track.index_at(bounds[1]))
            joinsamples.append(jointrack.values[bounds[1]])
        else:
            lpcsampleranges.extend([lpctrack.index_at(bounds[0]),
                                    lpctrack.index_at(bounds[1])])
            f0sampleranges.extend([f0track.index_at(bounds[0]),
                                   f0track.index_at(bounds[1])])
            joinsamples.extend([jointrack.values[bounds[0]],
                                jointrack.values[bounds[1]]])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()
    
    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, dur, i in zip(joinsamples[:-1], joinsamples[1:],
                                                        lpcsampleranges[:-1], lpcsampleranges[1:],
                                                        f0sampleranges[:-1], f0sampleranges[1:],
                                                        durations,
                                                        units):
#        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(lti0, lti1, copy=True) #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        i["dur"] = dur
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
                                        restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
Пример #9
0
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()
    for unit, word in zip(u.gr("Unit"), u.gr("Word")):
        assert unit["name"] == word["name"]
        unit["start"] = word["start"]
        unit["end"] = word["end"]

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join(
        [os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    for i, unit in enumerate(u.gr("Unit")):
        if i == 0:
            boundarytimes.append(unit["start"])
        boundarytimes.append(unit["end"])

    #convert boundtimes into sample ranges:
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []
    for bound in boundarytimes:
        lpcsampleranges.append(lpctrack.index_at(bound))
        f0sampleranges.append(f0track.index_at(bound))
        joinsamples.append(jointrack.values[jointrack.index_at(bound)])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()

    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, i in zip(joinsamples[:-1],
                                                   joinsamples[1:],
                                                   lpcsampleranges[:-1],
                                                   lpcsampleranges[1:],
                                                   f0sampleranges[:-1],
                                                   f0sampleranges[1:], units):
        #        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(
            lti0, lti1, copy=True)  #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(
            restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
            restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u