示例#1
0
def _addSyllableNucleiToTextgrids(wavPath, tgPath, tierName,
                                 syllableNucleiPath, outputPath):
    # Add syllable nuclei to textgrids
    for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True):
        
        tg = tgio.openTextgrid(join(tgPath, name + ".TextGrid"))
        entryList = tg.tierDict[tierName].entryList
        startTimeList = [entry[0] for entry in entryList]
        nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath,
                                                    startTimeList)
        flattenedSyllableList = [nuclei for sublist in nucleusSyllableList
                                 for nuclei in sublist]
        wavFN = join(wavPath, name + ".wav")
        duration = audio_scripts.getSoundFileDuration(wavFN)
        
        oom = my_math.orderOfMagnitude(len(flattenedSyllableList))
        labelTemplate = "%%0%dd" % (oom + 1)

        entryList = [(timestamp, labelTemplate % i)
                     for i, timestamp in enumerate(flattenedSyllableList)]
        print(flattenedSyllableList)
        tier = tgio.PointTier("Syllable Nuclei", entryList, 0, duration)
        
        tgFN = join(tgPath, name + ".TextGrid")
        tg = tgio.openTextgrid(tgFN)
        tg.addTier(tier)
        tg.save(join(outputPath, name + ".TextGrid"))
示例#2
0
def detectPitchErrors(pitchList, maxJumpThreshold=0.70, tgToMark=None):
    '''
    Detect pitch halving and doubling errors.
    
    If a textgrid is passed in, it adds the markings to the textgrid
    '''
    assert (maxJumpThreshold >= 0.0 and maxJumpThreshold <= 1.0)

    errorList = []
    for i in range(1, len(pitchList)):
        lastPitch = pitchList[i - 1][1]
        currentPitch = pitchList[i][1]

        ceilingCutoff = currentPitch / maxJumpThreshold
        floorCutoff = currentPitch * maxJumpThreshold
        if ((lastPitch <= floorCutoff) or (lastPitch >= ceilingCutoff)):
            currentTime = pitchList[i][0]
            errorList.append([currentTime, currentPitch / lastPitch])

    if tgToMark is not None:
        tierName = "pitch errors"
        assert (tierName not in tgToMark.tierNameList)
        pointTier = tgio.PointTier(tierName, errorList, tgToMark.minTimestamp,
                                   tgToMark.maxTimestamp)
        tgToMark.addTier(pointTier)

    return errorList, tgToMark
def make_textgrid(df, out_name, orig_name=None, word2phone=None):
    if orig_name:
        tg = tgio.openTextgrid(orig_name)
    else:
        tg = tgio.Textgrid()
    phones_list = []
    syllables_list = []
    curr_syllable = []
    for tup in df[['start', 'end', 'phone']].itertuples():
        phones_list.append((tup.start, tup.end, tup.phone))
        if tup.phone in set(['spn', 'sil']):
            # pass
            syllables_list.append((tup.start, tup.end, tup.phone))
            curr_syllable = []
        elif len(tup.phone) > 2 and tup.phone[-2] == '_':  # final
            curr_syllable.append(tup.phone)
            syllables_list.append(
                (initial_start, tup.end, ' '.join(curr_syllable)))
            curr_syllable = []
        else:  # initial
            curr_syllable.append(tup.phone)
            initial_start = tup.start

    phone_tier = tgio.IntervalTier('phone', phones_list)
    syllable_tier = tgio.IntervalTier('syllable\_phones', syllables_list)
    if orig_name and word2phone:
        ipus, xmins, xmaxs = get_ipus(tg)
        word_list, unmatched_words, break_list = make_word_list(
            syllable_tier, ipus, word2phone, out_name, xmaxs)
        word_tier = tgio.IntervalTier('word', word_list)
        tg.addTier(word_tier)

    tg.addTier(phone_tier)
    tg.addTier(syllable_tier)

    if not tg.tierDict['breaks'].entryList:
        tg.removeTier('breaks')
        break_tier = tgio.PointTier('break', break_list)
        tg.addTier(break_tier)
    else:
        print(out_name, 'has break tier, did not write new one')
    os.makedirs(os.path.dirname(out_name), exist_ok=True)
    tg.save(out_name, useShortForm=False)
    print('wrote to {}, # matched: {}, # unmatched: {}'.format(
        out_name, len(word_list), len(unmatched_words)))
    return len(word_list), len(unmatched_words)