def tgBoundariesToZeroCrossings(tgFN, wavFN, outputTGFN, adjustPoints=True): ''' Makes all textgrid interval boundaries fall on pressure wave zero crossings maxShiftAmount specifies the search space in seconds (the amount before and after the given time) if ignoreOnFailure is true, a warning is printed to the screen and the given timestamp is returned ''' audiofile = wave.open(wavFN, "rb") tg = tgio.openTextGrid(tgFN) for tierName in tg.tierNameList[:]: tier = tg.tierDict[tierName] newEntryList = [] if isinstance(tier, tgio.PointTier) and adjustPoints is True: for start, label in tier.entryList: newStart = findNearestZeroCrossing(audiofile, start) newEntryList.append((newStart, label)) elif isinstance(tier, tgio.IntervalTier): for start, stop, label in tier.entryList: newStart = findNearestZeroCrossing(audiofile, start) newStop = findNearestZeroCrossing(audiofile, stop) newEntryList.append((newStart, newStop, label)) tg.replaceTier(tierName, newEntryList, True) tg.save(outputTGFN)
def deleteUnlabeledIntervals(tgFN, wavFN, tierName, outputFN): ''' Removes all audio from sections of wav file not inside labeled intervals ''' tg = tgio.openTextGrid(tgFN) # Get the unlabeled intervals tier = tg.tierDict[tierName].fillInBlanks() entryList = [entry for entry in tier.entryList if entry[2] == ""] # Sometimes the textgrid and wav file differ by some small amount # If the textgrid is longer, the script crashes wavDur = _getSoundFileDuration(wavFN) if entryList[-1][1] > wavDur and entryList[-1][0] < wavDur: entryList[-1] = (entryList[-1][0], wavDur, "") try: praatio_scripts.deleteWavSections(wavFN, outputFN, entryList, doShrink=False) except wave.Error: print("There was a problem processing {}".format( os.path.basename(tgFN)))
def renameTiers(inputPath, outputPath, includeMothersPhones=False): renameList = [(["Mother", "Mother's Speech", "Mother's speech", "mother's speech", "Mother Speech", "mother speech"], "Mother"), (["Mother's Backchannel", "Mother's backchannel", "mother's backchannel", "child's backchannel"], "Mother's Backchannel"), (["Child", "Child's speech", "Child's Speech", "child's speech", "Child Speech", "child speech"], "Child"), (["Room", "Extraneous room noise", "Extraneous Room Noise", "Extraneous Noise", "Room Noise", "room noise", "Room noise", "extraneous room noise"], "Room"), (["Timer", "Time"], "Timer"), (["Epochs", "epochs",], "Epochs"), ] if includeMothersPhones: renameList.insert(1, (["Mother's phones",], "Mother's Phones")) utils.makeDir(outputPath) for fn in utils.findFiles(inputPath, filterExt=".TextGrid"): print(fn) tg = tgio.openTextGrid(join(inputPath, fn)) for oldNameList, newName in renameList: try: tg = replaceTierName(tg, oldNameList, newName) except ValueError: print fn raise tg.save(join(outputPath, fn))
def deleteUnlabeledIntervals(tgPath, wavPath, tierName, outputPath): """ Does not assume TextGrid and wav files are inside same directory """ utils.makeDir(outputPath) for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True): tg = tgio.openTextGrid(join(tgPath, name + ".TextGrid")) # Get the unlabeled intervals tier = tg.tierDict[tierName].fillInBlanks() entryList = [entry for entry in tier.entryList if entry[2] == ""] wavFN = join(wavPath, name + ".wav") outputWavFN = join(outputPath, name + ".wav") # Sometimes the textgrid and wav file differ by some small amount # If the textgrid is longer, the script crashes wavDur = _getSoundFileDuration(wavFN) if entryList[-1][1] > wavDur and entryList[-1][0] < wavDur: entryList[-1] = (entryList[-1][0], wavDur, "") praatio_scripts.deleteWavSections(wavFN, outputWavFN, entryList, doShrink=False)
def filterTextgrids(tgPath, speechTierName, laughterTierName, minDuration, outputPath): ''' Removes invalid entries from the mother's speech tier - removes pauses (FP, SP) - removes speech (MS) that occurs with insitu laughter (LA) - removes ultrashort utterances (uwe's script crashed on an utterance of length 0.013 seconds) ''' utils.makeDir(outputPath) for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(tgPath, fn)) # Removes all non-speech events (MS) newTierEntryList = [] speechTier = tg.tierDict[speechTierName] for entry in speechTier.entryList: start, stop, label = entry print(entry) if insituLaughterCheck(start, stop, tg, laughterTierName): newTierEntryList.append(entry) # Removes all speech events shorter than some threshold newTierEntryList = [(start, stop, label) for start, stop, label in newTierEntryList if float(stop) - float(start) > minDuration] tg.replaceTier(speechTierName, newTierEntryList) tg.save(join(outputPath, fn))
def extractTGInfo(inputPath, outputPath, tierName, searchForMothersSpeech): ''' Same as textgrids.extractTGInfo? ''' utils.makeDir(outputPath) minDuration = 0.15 # Time in seconds for name in utils.findFiles(inputPath, filterExt=".TextGrid", stripExt=True): print name tg = tgio.openTextGrid(join(inputPath, name+".TextGrid")) tier = tg.tierDict[tierName] entryList = tier.getEntries() if searchForMothersSpeech: entryList = [(start, stop, label) for start, stop, label in entryList if label == "MS"] outputList = [] for start, stop, label in entryList: outputList.append( "%f,%f,%s" % (start, stop, label) ) outputTxt = "\n".join(outputList) + "\n" codecs.open(join(outputPath, name + ".txt"), "w", encoding="utf-8").write(outputTxt)
def textgridMorphDuration(fromTGFN, toTGFN): ''' A convenience function. Morphs interval durations of one tg to another. This assumes the two textgrids have the same number of segments. ''' fromTG = tgio.openTextGrid(fromTGFN) toTG = tgio.openTextGrid(toTGFN) adjustedTG = tgio.Textgrid() for tierName in fromTG.tierNameList: fromTier = fromTG.tierDict[tierName] toTier = toTG.tierDict[tierName] adjustedTier = fromTier.morph(toTier) adjustedTG.addTier(adjustedTier) return adjustedTG
def getPitchForIntervals(data, tgFN, tierName): ''' Preps data for use in f0Morph ''' tg = tgio.openTextGrid(tgFN) data = tg.tierDict[tierName].getValuesInIntervals(data) data = [dataList for _, dataList in data] return data
def replaceAllLabelsInMotherTierWithMS(inputPath, outputPath): utils.makeDir(outputPath) speechTierName = "Mother" for fn in utils.findFiles(inputPath, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(inputPath, fn)) tg.replaceTier(speechTierName, [[start, stop, "MS"] for start, stop, label in tg.tierDict[speechTierName].entryList]) tg.save(join(outputPath, fn))
def analyzeInsituLaughter(inputPath, outputPath): outputList = [] for fn in utils.findFiles(inputPath, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(inputPath, fn)) tier = tg.tierDict["Mother"] for start, stop, label in tier.getEntries(): isInsitu = insituLaughterCheck(start, stop, tg, "Mother's Backchannel") if isInsitu: outputList.append("%s,%02.02f,%02.02f,%s" % (fn, start, stop, label)) open(join(outputPath, "insitu_laughter_events.csv"), "w").write("\n".join(outputList) + "\n")
def _addSyllableNucleiToTextgrids(wavPath, tgPath, tierName, syllableNucleiPath, outputPath): # Add syllable nuclei to textgrids for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): tg = tgio.openTextGrid(join(tgPath, name + ".TextGrid")) entryList = tg.tierDict[tierName].entryList startTimeList = [entry[0] for entry in entryList] nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath, startTimeList) ######### DEBUG ############ for i in range(len(startTimeList)): print("{}: {}".format(startTimeList[i], len(nucleusSyllableList[i]))) # print("startTimeList has {} entries:\n{}".format(len(startTimeList), startTimeList)) # print("nucleusSyllableList has {} sublists:\n{}".format(len(nucleusSyllableList), nucleusSyllableList)) ############################# flattenedSyllableList = [ nuclei for sublist in nucleusSyllableList for nuclei in sublist ] wavFN = join(wavPath, name + ".wav") duration = audio_scripts.getSoundFileDuration(wavFN) oom = my_math.orderOfMagnitude(len(flattenedSyllableList)) labelTemplate = "%%0%dd" % (oom + 1) entryList = [(timestamp, labelTemplate % i) for i, timestamp in enumerate(flattenedSyllableList)] # print flattenedSyllableList tier = tgio.PointTier("Syllable Nuclei", entryList, 0, duration) tgFN = join(tgPath, name + ".TextGrid") tg = tgio.openTextGrid(tgFN) tg.addTier(tier) tg.save(join(outputPath, name + ".TextGrid"))
def addEpochsToTextgrids(tgPath, epochPath, outputPath): utils.makeDir(outputPath) for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True): print name tg = tgio.openTextGrid(join(tgPath, name+".TextGrid")) entryList = utils.openCSV(epochPath, name+".txt") entryList = [(float(start), float(end), label) for label, start, end in entryList] tier = tgio.IntervalTier("epochs", entryList, minT=0, maxT=tg.maxTimestamp) tg.addTier(tier) tg.save(join(outputPath, name+".TextGrid"))
def getIntervals(fn, tierName, filterFunc=None, includeUnlabeledRegions=False): ''' Get information about the 'extract' tier, used by several merge scripts ''' tg = tgio.openTextGrid(fn) tier = tg.tierDict[tierName] if includeUnlabeledRegions is True: tier = tier.fillInBlanks() entryList = tier.entryList if filterFunc is not None: entryList = [entry for entry in entryList if filterFunc(entry)] return entryList
def textgridManipulateDuration(tgFN, ratioList): tg = tgio.openTextGrid(tgFN) adjustedTG = tgio.Textgrid() for tierName in tg.tierNameList: fromTier = tg.tierDict[tierName] adjustedTier = None if isinstance(fromTier, tgio.IntervalTier): adjustedTier = _morphIntervalTier(fromTier, ratioList) elif isinstance(fromTier, tgio.PointTier): adjustedTier = _morphPointTier(fromTier, ratioList) assert(adjustedTier is not None) adjustedTG.addTier(adjustedTier) return adjustedTG
def generatePIMeasures(dataList, tgPath, tgFN, tierName, doPitch, medianFilterWindowSize=None): ''' Generates processed values for the labeled intervals in a textgrid nullLabelList - labels to ignore in the textgrid. Defaults to ["",] if 'doPitch'=true get pitch measures; if =false get rms intensity ''' tgFN = join(tgPath, tgFN) tg = tgio.openTextGrid(tgFN) piData = tg.tierDict[tierName].getValuesInIntervals(dataList) outputList = [] for interval, entryList in piData: label = interval[0] if doPitch: tmpValList = [f0Val for _, f0Val, _ in entryList] f0Measures = getPitchMeasures(tmpValList, tgFN, label, medianFilterWindowSize, True) outputList.append(list(f0Measures)) else: tmpValList = [intensityVal for _, _, intensityVal in entryList] tmpValList = [ intensityVal for intensityVal in tmpValList if intensityVal != 0.0 ] rmsIntensity = 0 if len(tmpValList) != 0: rmsIntensity = myMath.rms(tmpValList) outputList.append([ rmsIntensity, ]) return outputList
def _calculateSyllablesPerSecondForIntervals(wavPath, tgPath, tierName, syllableNucleiPath): # Add syllable nuclei to textgrids for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True): tg = tgio.openTextGrid(join(tgPath, name + ".TextGrid")) entryList = tg.tierDict[tierName].entryList startTimeList = [entry[0] for entry in entryList] nucleusSyllableList = uwe_sr.toAbsoluteTime(name, syllableNucleiPath, startTimeList) durationList = [] for intervalList, entry in utils.safeZip( [nucleusSyllableList, entryList], enforceLength=True): start, stop = entry[0], entry[1] duration = len(intervalList) / (stop - start) durationList.append(str(duration)) print("%s - %s (syllables/second for each interval)" % (name, ",".join(durationList)))
def isolateMotherSpeech(path, filterGrid, outputPath): ''' Removes mother speech when the child is also speaking ''' utils.makeDir(outputPath) for fn in utils.findFiles(path, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(path, fn)) motherTier = tg.tierDict["Mother"] newEntryList = [] for start, stop, label in motherTier.entryList: croppedTG = tg.crop(False, False, start, stop) entryList = croppedTG.tierDict[filterGrid].entryList resultList = [(start, stop, label),] for subStart, subStop, subLabel in entryList: i = 0 while i < len(resultList): tmpStart = resultList[i][0] tmpEnd = resultList[i][1] tmpResultList = subtractOverlap(tmpStart, tmpEnd, label, subStart, subStop) # Replace if there has been a change if tmpResultList != [[tmpStart, tmpEnd, label],]: resultList = resultList[:i] + tmpResultList i += len(tmpResultList) - 1 i += 1 newEntryList.extend(resultList) newMotherTier = tgio.IntervalTier("Mother", newEntryList) tg.replaceTier("Mother", newMotherTier.entryList) tg.save(join(outputPath, fn))
def extractMotherSpeech(wavPath, textgridPath, mothersSpeechName, outputWavPath, outputTextgridPath): utils.makeDir(outputWavPath) utils.makeDir(outputTextgridPath) for name in utils.findFiles(wavPath, filterExt=".wav", stripExt=True,): print name tg = tgio.openTextGrid(join(textgridPath, name+".TextGrid")) speechTier = tg.tierDict[mothersSpeechName] for i, entry in enumerate(speechTier.entryList): subName = "%s_%03d" % (name, i) start, stop, label = entry start, stop = float(start), float(stop) audio_scripts.extractSubwav(join(wavPath, name+".wav"), join(outputWavPath, subName+".wav" ), start, stop, singleChannelFlag=True) subTG = tg.crop(strictFlag=False, softFlag=False, startTime=start, endTime=stop) subTG.save(join(outputTextgridPath, subName+".TextGrid"))
def analyzeLaughter(textgridPath, outputPath): utils.makeDir(outputPath) speechTierName = "Mother" laughterTierName = "Mother's Backchannel" speechCode = "MS" laughterCode = "LA" pauseCode = "FP" # How much did each event occur? allCodeSummaryList = [] for tierName, code, outputName in [[speechTierName, speechCode, "speech_occurances"], [laughterTierName, laughterCode, "laughter_occurances"], [speechTierName, pauseCode, "pause_code"], ]: entryList = [] summaryList = [] for fn in utils.findFiles(textgridPath, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(textgridPath, fn)) tier = tg.tierDict[tierName] matchEntryList = tier.find(code) durationList = [float(stop)-float(start) for start, stop, label in matchEntryList] matchEntryList = [[fn,str(start),str(stop),label]for start, stop, label in matchEntryList] entryList.extend(matchEntryList) summaryList.append( (fn, str(sum(durationList))) ) entryList = [",".join(row) for row in entryList] open(join(outputPath, outputName+".csv"), "w").write("\n".join(entryList)) allCodeSummaryList.append(summaryList) outputList = ["Filename,Speech,Laughter,Pause",] for speech, laugh, pause in utils.safeZip(allCodeSummaryList, enforceLength=True): outputList.append(",".join([speech[0], speech[1], laugh[1], pause[1]])) open(join(outputPath, "event_cumulative_lengths.csv"), "w").write("\n".join(outputList) + "\n")
def filterShortIntervalsFromTier(tgPath, speechTierName, minDuration, outputPath): ''' Removes ultrashort utterances from tier (uwe's script crashed on an utterance of length 0.013 seconds) ''' utils.makeDir(outputPath) for fn in utils.findFiles(tgPath, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(tgPath, fn)) speechTier = tg.tierDict[speechTierName] newTierEntryList = [] for entry in speechTier.entryList: start, stop, label = entry if float(stop) - float(start) >= minDuration: newTierEntryList.append(entry) tg.replaceTier(speechTierName, newTierEntryList) tg.save(join(outputPath, fn))
def generateEpochFiles(tgPath, wavPath, epPath): utils.makeDir(epPath) try: for filename in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True): tgrid = tgio.openTextGrid(os.path.join(tgPath, filename+".TextGrid")) with open(os.path.join(epPath, filename+".txt"), "w") as epochFile: for (start,stop,label) in tgrid.tierDict["Epochs"].entryList: epochFile.write(str(label)+','+str(start)+','+str(stop)+'\n') except: epDuration = int(raw_input("\nOk, the textgrids don't have an 'Epochs' tier. How long are the epochs in this dataset?\nEnter the epoch duration in seconds: ")) print("\nOk. Epochs are each %dsecs max.\n" % epDuration) #def generatePlayEpochs(path, outputPath): durationList = [] for fn in utils.findFiles(wavPath, filterExt=".wav"): duration = audio_scripts.getSoundFileDuration(join(wavPath, fn)) durationList.append( (fn, int(duration)) ) durationList.sort() for fn, duration in durationList: # if '045' in fn: # print 'hello' outputFN = os.path.splitext(fn)[0] + ".txt" numEpoches = int(duration / epDuration) epochList = [(i, i*epDuration,(i+1)*epDuration) for i in xrange((numEpoches))] if duration % epDuration != 0: startTime = (numEpoches)*epDuration epochList.append( (numEpoches+1, startTime, startTime+(duration%epDuration) ) ) epochList = ["%02d, %02d, %02d" % row for row in epochList] with open(join(epPath, outputFN), "w") as epochFN: epochFN.write("\n".join(epochList) + "\n")
def removeIntervalsFromTierByLabel(inputPath, tierName, targetLabel, outputPath, removeAllBut=False): utils.makeDir(outputPath) for fn in utils.findFiles(inputPath, filterExt=".TextGrid"): tg = tgio.openTextGrid(join(inputPath, fn)) speechTier = tg.tierDict[tierName] newTierEntryList = [] for entry in speechTier.entryList: start, stop, label = entry if removeAllBut and label == targetLabel: newTierEntryList.append(entry) elif not removeAllBut and label != targetLabel: newTierEntryList.append(entry) tg.replaceTier(tierName, newTierEntryList) tg.save(join(outputPath, fn))
to modify a textgrid. It still shows that, but all that code is now in the main library (pysle.praattools.syllabifyTextgrid) This snippet shows you how to use this function. ''' from os.path import join from praatio import tgio from pysle import isletool from pysle import praattools path = join('.', 'files') path = "/Users/tmahrt/Dropbox/workspace/pysle/test/files" tg = tgio.openTextGrid(join(path, "pumpkins.TextGrid")) # Needs the full path to the file islevPath = '/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt' isleDict = isletool.LexicalTool(islevPath) # Get the syllabification tiers and add it to the textgrid syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "word", "phone", skipLabelList=["",]) tg.addTier(syllableTG.tierDict["syllable"]) tg.addTier(syllableTG.tierDict["tonicSyllable"]) tg.addTier(syllableTG.tierDict["tonicVowel"]) tg.save(join(path, "pumpkins_with_syllables.TextGrid"))
def markMaxPitch(tgFNFullPath, wavFNFullPath, outputPath, tierName, minPitch, maxPitch, numTopPitchIntervals, numIntervalsPerTier, praatEXE): ''' Returns a textgrid whose tier intervals denote locations of highest pitch measurements in the wav file it annotates. ''' tgFN = os.path.basename(tgFNFullPath) wavFN = os.path.basename(wavFNFullPath) print("Processing max pitch from {}".format(wavFN)) io.make_dir(outputPath) cleanedWavPath = join(outputPath, "cleanedWavs") io.make_dir(cleanedWavPath) cleanedWavFN = join(cleanedWavPath, wavFN) pitchPath = join(outputPath, "pitch") io.make_dir(pitchPath) pitchFN = io.get_filename_w_new_ext(wavFN, "pitch") textgridPath = join(outputPath, "textgrid") io.make_dir(textgridPath) textgridFN = join(textgridPath, tgFN) # 1 Delete unlabeled segments if not os.path.exists(cleanedWavFN): deleteUnlabeledIntervals(tgFNFullPath, wavFNFullPath, tierName, cleanedWavFN) # 2 Measure pitch from 'pruned' recording file piList = pitch_and_intensity.audioToPI(cleanedWavPath, wavFNFullPath, pitchPath, pitchFN, praatEXE, minPitch, maxPitch, forceRegenerate=False) # 3 Get pitch from each interval tg = tgio.openTextGrid(tgFNFullPath) tier = tg.tierDict[tierName] piListSegmented = tier.getValuesInIntervals(piList) # 4 Get max pitch from each interval entryList = [] for interval, dataList in piListSegmented: pitchList = [f0Val for _, f0Val, _ in dataList] if len(pitchList) == 0: continue maxF0Val = max(pitchList) entryList.append((interval[0], interval[1], maxF0Val)) entryList.sort(key=lambda x: x[2], reverse=True) entryList = [(start, stop, str(label)) for start, stop, label in entryList] # 5 Report the top intervals outputTG = tgio.Textgrid() for i in xrange(0, numTopPitchIntervals, numIntervalsPerTier): name = "top %d" % (i + 10) subEntryList = entryList[i:i + 10] minT = tg.minTimestamp maxT = tg.maxTimestamp tier = tgio.IntervalTier(name, subEntryList, minT, maxT) outputTG.addTier(tier) outputTG.save(textgridFN)
def eventStructurePerEpoch(epochPath, fullyFilteredTGPath, childFilteredTGPath, noiseFilteredTGPath, unfilteredTGPath, outputPath, speechTierName, laughterTierName): ''' How frequent and with what duration did laughter, pauses, and speech occur ''' def _getCountsAndDurations(tier, searchLabel): entryList = tier.find(searchLabel) durationList = [float(stop) - float(start) for start, stop, label in entryList] count = len(entryList) return sum(durationList), count utils.makeDir(outputPath) for name in utils.findFiles(epochPath, filterExt=".txt", stripExt=True): epochList = utils.openCSV(epochPath, name+".txt") epochList = [(epochNum, float(start), float(stop)) for epochNum, start, stop in epochList] tg = tgio.openTextGrid(join(fullyFilteredTGPath, name + ".TextGrid")) childFilteredTG = tgio.openTextGrid(join(childFilteredTGPath, name + ".TextGrid")) noiseFilteredTG = tgio.openTextGrid(join(noiseFilteredTGPath, name + ".TextGrid")) origTG = tgio.openTextGrid(join(unfilteredTGPath, name + ".TextGrid")) outputList = [] for epochNum, start, stop in epochList: subTG = tg.crop(strictFlag=False, softFlag=False, startTime=start, endTime=stop) speechTier = subTG.tierDict[speechTierName] laughterTier = subTG.tierDict[laughterTierName] pauseDur, numPauses = _getCountsAndDurations(speechTier, "FP") speechDur, numSpeech = _getCountsAndDurations(speechTier, "MS") laughDur, numLaughter = _getCountsAndDurations(laughterTier, "LA") subCSFilteredTG = childFilteredTG.crop(strictFlag=False, softFlag=False, startTime=start, endTime=stop) csFilteredTier = subCSFilteredTG.tierDict[speechTierName] csFiltSpeech, numCSFiltSpeech = _getCountsAndDurations(csFilteredTier, "MS") subNoiseFilteredTG = noiseFilteredTG.crop(strictFlag=False, softFlag=False, startTime=start, endTime=stop) nsFilteredTier = subNoiseFilteredTG.tierDict[speechTierName] nsFiltSpeech, numNsFiltSpeech = _getCountsAndDurations(nsFilteredTier, "MS") subOrigTG = origTG.crop(strictFlag=False, softFlag=False, startTime=start, endTime=stop) origSpeechTier = subOrigTG.tierDict[speechTierName] fullSpeechDur, fullNumSpeech = _getCountsAndDurations(origSpeechTier, "MS") epochTuple = (speechDur, numSpeech, csFiltSpeech, nsFiltSpeech, fullSpeechDur, fullSpeechDur - speechDur, pauseDur, numPauses, laughDur, numLaughter) outputList.append("%.02f, %d, %.02f, %.02f, %.02f, %.02f, %.02f, %d, %.02f, %d" % epochTuple) open(join(outputPath, name+".txt"), "w").write("\n".join(outputList) + "\n")
def splitAudioOnTier(wavFN, tgFN, tierName, outputPath, outputTGFlag=False, nameStyle=None, noPartialIntervals=False): ''' Outputs one subwav for each entry in the tier of a textgrid outputTGFlag: If True, outputs paired, cropped textgrids If is type str (a tier name), outputs a paired, cropped textgrid with only the specified tier nameStyle: if 'append': append interval label to output name if 'append_no_i': append label but not interval to output name if 'label': output name is the same as label if None: output name plus the interval number noPartialIntervals: if True: intervals in non-target tiers that are not wholly contained by an interval in the target tier will not be included in the output textgrids ''' tg = tgio.openTextGrid(tgFN) entryList = tg.tierDict[tierName].entryList # Build the output name template name = os.path.splitext(os.path.split(wavFN)[1])[0] orderOfMagnitude = int(math.floor(math.log10(len(entryList)))) # We want one more zero in the output than the order of magnitude outputTemplate = "%s_%%0%dd" % (name, orderOfMagnitude + 1) firstWarning = True # If we're using the 'label' namestyle for outputs, all of the # interval labels have to be unique, or wave files with those # labels as names, will be overwritten if nameStyle == 'label': wordList = [word for _, _, word in entryList] multipleInstList = [] for word in set(wordList): if wordList.count(word) > 1: multipleInstList.append(word) if len(multipleInstList) > 0: instListTxt = "\n".join(multipleInstList) print(("Overwriting wave files in: %s\n" + "Intervals exist with the same name:\n%s") % (outputPath, instListTxt)) firstWarning = False # Output wave files outputFNList = [] for i, entry in enumerate(entryList): start, stop, label = entry # Resolve output name outputName = outputTemplate % i if nameStyle == "append": outputName += "_" + label elif nameStyle == "append_no_i": outputName = name + "_" + label elif nameStyle == "label": outputName = label outputFNFullPath = join(outputPath, outputName + ".wav") if os.path.exists(outputFNFullPath) and firstWarning: print(("Overwriting wave files in: %s\n" + "Files existed before or intervals exist with " + "the same name:\n%s") % (outputPath, outputName)) _extractSubwav(wavFN, outputFNFullPath, start, stop) outputFNList.append((start, stop, outputName + ".wav")) # Output the textgrid if requested if outputTGFlag is not False: subTG = tg.crop(noPartialIntervals, False, start, stop) if isinstance(outputTGFlag, str): for tierName in subTG.tierNameList: if tierName != outputTGFlag: subTG.removeTier(tierName) offset = -1 * start subTG = subTG.editTimestamps(offset, offset, offset) subTG.minTimestamp = 0 subTG.maxTimestamp = stop - start subTG.save(join(outputPath, outputName + ".TextGrid")) return outputFNList