Пример #1
0
def extractIntensity(inputFN, outputFN, praatEXE,
                     minPitch, sampleStep=0.01, forceRegenerate=True,
                     undefinedValue=None):
    outputPath = os.path.split(outputFN)[0]
    utils.makeDir(outputPath)
    
    assert(os.path.exists(inputFN))
    firstTime = not os.path.exists(outputFN)
    if firstTime or forceRegenerate is True:
        
        # The praat script uses append mode, so we need to clear any prior
        # result
        if os.path.exists(outputFN):
            os.remove(outputFN)
        
        argList = [inputFN, outputFN, sampleStep,
                   minPitch, -1, -1]
        
        scriptName = "get_intensity.praat"
        scriptFN = join(utils.scriptsPath, scriptName)
        utils.runPraatScript(praatEXE, scriptFN, argList)
            
    iList = loadTimeSeriesData(outputFN, undefinedValue=undefinedValue)
    
    return iList
Пример #2
0
def extractIntensity(inputFN,
                     outputFN,
                     praatEXE,
                     minPitch,
                     sampleStep=0.01,
                     forceRegenerate=True,
                     undefinedValue=None):
    outputPath = os.path.split(outputFN)[0]
    utils.makeDir(outputPath)

    assert (os.path.exists(inputFN))
    firstTime = not os.path.exists(outputFN)
    if firstTime or forceRegenerate is True:

        # The praat script uses append mode, so we need to clear any prior
        # result
        if os.path.exists(outputFN):
            os.remove(outputFN)

        argList = [inputFN, outputFN, sampleStep, minPitch, -1, -1]

        scriptName = "get_intensity.praat"
        scriptFN = join(utils.scriptsPath, scriptName)
        utils.runPraatScript(praatEXE, scriptFN, argList)

    iList = loadTimeSeriesData(outputFN, undefinedValue=undefinedValue)

    return iList
Пример #3
0
def deleteUnlabeledIntervals(tgPath, wavPath, tierName, outputPath):
    """
    Does not assume TextGrid and wav files are inside same directory
    """

    utils.makeDir(outputPath)

    for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):

        tg = tgio.openTextGrid(join(tgPath, name + ".TextGrid"))

        # Get the unlabeled intervals
        tier = tg.tierDict[tierName].fillInBlanks()
        entryList = [entry for entry in tier.entryList if entry[2] == ""]

        wavFN = join(wavPath, name + ".wav")
        outputWavFN = join(outputPath, name + ".wav")

        # Sometimes the textgrid and wav file differ by some small amount
        # If the textgrid is longer, the script crashes
        wavDur = _getSoundFileDuration(wavFN)
        if entryList[-1][1] > wavDur and entryList[-1][0] < wavDur:
            entryList[-1] = (entryList[-1][0], wavDur, "")

        praatio_scripts.deleteWavSections(wavFN,
                                          outputWavFN,
                                          entryList,
                                          doShrink=False)
Пример #4
0
def _extractPIPiecewise(inputFN,
                        outputFN,
                        praatEXE,
                        minPitch,
                        maxPitch,
                        tgFN,
                        tierName,
                        tmpOutputPath,
                        sampleStep=0.01,
                        silenceThreshold=0.03,
                        forceRegenerate=True,
                        undefinedValue=None,
                        medianFilterWindowSize=0,
                        pitchQuadInterp=False):
    '''
    Extracts pitch and int from each labeled interval in a textgrid
    
    This has the benefit of being faster than using _extractPIFile if only
    labeled regions need to have their pitch values sampled, particularly
    for longer files.
    
    Returns the result as a list.  Will load the serialized result
    if this has already been called on the appropriate files before
    '''
    outputPath = os.path.split(outputFN)[0]
    utils.makeDir(outputPath)

    windowSize = medianFilterWindowSize

    assert (os.path.exists(inputFN))
    firstTime = not os.path.exists(outputFN)
    if firstTime or forceRegenerate is True:

        utils.makeDir(tmpOutputPath)
        splitAudioList = praatio_scripts.splitAudioOnTier(
            inputFN, tgFN, tierName, tmpOutputPath, False)
        allPIList = []
        for start, _, fn in splitAudioList:
            tmpTrackName = os.path.splitext(fn)[0] + ".txt"
            piList = _extractPIFile(join(tmpOutputPath, fn),
                                    join(tmpOutputPath, tmpTrackName),
                                    praatEXE,
                                    minPitch,
                                    maxPitch,
                                    sampleStep,
                                    silenceThreshold,
                                    forceRegenerate=True,
                                    medianFilterWindowSize=windowSize,
                                    pitchQuadInterp=pitchQuadInterp)
            piList = [("%0.3f" % (float(time) + start), str(pV), str(iV))
                      for time, pV, iV in piList]
            allPIList.extend(piList)

        allPIList = [",".join(row) for row in allPIList]
        with open(outputFN, "w") as fd:
            fd.write("\n".join(allPIList) + "\n")

    piList = loadTimeSeriesData(outputFN, undefinedValue=undefinedValue)

    return piList
Пример #5
0
def _audioToPIFile(inputPath,
                   inputFN,
                   outputPath,
                   outputFN,
                   praatEXE,
                   minPitch,
                   maxPitch,
                   sampleStep=0.01,
                   silenceThreshold=0.03,
                   forceRegenerate=True,
                   tgPath=None,
                   tgFN=None,
                   tierName=None,
                   undefinedValue=None):
    '''
    Extracts pitch and intensity values from an audio file
    
    Returns the result as a list.  Will load the serialized result
    if this has already been called on the appropriate files before
    '''

    inputFullFN = join(inputPath, inputFN)
    outputFullFN = join(outputPath, outputFN)

    utils.makeDir(outputPath)

    assert (os.path.exists(inputFullFN))
    firstTime = not os.path.exists(outputFullFN)
    if firstTime or forceRegenerate is True:

        # The praat script uses append mode, so we need to clear any prior
        # result
        if os.path.exists(outputFullFN):
            os.remove(outputFullFN)

        if tgPath is None or tgFN is None or tierName is None:
            argList = [
                inputFullFN, outputFullFN, sampleStep, minPitch, maxPitch,
                silenceThreshold, -1, -1
            ]

            scriptName = "get_pitch_and_intensity_via_python.praat"
            scriptFN = join(utils.scriptsPath, scriptName)
            utils.runPraatScript(praatEXE, scriptFN, argList)

        else:
            argList = [
                inputFullFN, outputFullFN,
                join(tgPath, tgFN), tierName, sampleStep, minPitch, maxPitch,
                silenceThreshold
            ]

            scriptName = "get_pitch_and_intensity_segments_via_python.praat"
            scriptFN = join(utils.scriptsPath, scriptName)
            utils.runPraatScript(praatEXE, scriptFN, argList)

    piList = loadPIAndTime(outputPath, outputFN, undefinedValue=undefinedValue)

    return piList
Пример #6
0
def autoSegmentSpeech(praatEXE, inputWavPath, rawTGPath, finalTGPath):

    utils.makeDir(finalTGPath)

    praat_scripts.annotateSilences(praatEXE, inputWavPath, rawTGPath)

    for tgFN in utils.findFiles(rawTGPath, filterExt=".TextGrid"):
        markTranscriptForAnnotations(join(rawTGPath, tgFN), "silences",
                                     join(finalTGPath, tgFN))
Пример #7
0
def autoSegmentSpeech(praatEXE, inputWavPath, rawTGPath, finalTGPath):
    
    utils.makeDir(finalTGPath)
    
    praat_scripts.annotateSilences(praatEXE, inputWavPath, rawTGPath)
    
    for tgFN in utils.findFiles(rawTGPath, filterExt=".TextGrid"):
        markTranscriptForAnnotations(join(rawTGPath, tgFN),
                                     "silences",
                                     join(finalTGPath, tgFN))
Пример #8
0
def extractPitch(wavFN,
                 outputFN,
                 praatEXE,
                 minPitch,
                 maxPitch,
                 sampleStep=0.01,
                 silenceThreshold=0.03,
                 forceRegenerate=True,
                 undefinedValue=None,
                 medianFilterWindowSize=0,
                 pitchQuadInterp=False):
    '''
    Extract pitch at regular intervals from the input wav file
    
    Data is output to a text file and then returned in a list in the form
    [(timeV1, pitchV1), (timeV2, pitchV2), ...]
    
    sampleStep - the frequency to sample pitch at
    silenceThreshold - segments with lower intensity won't be analyzed
                       for pitch
    forceRegenerate - if running this function for the same file, if False
                      just read in the existing pitch file
    undefinedValue - if None remove from the dataset, otherset set to
                     undefinedValue
    pitchQuadInterp - if True, quadratically interpolate pitch
    '''
    outputPath = os.path.split(outputFN)[0]

    utils.makeDir(outputPath)

    if pitchQuadInterp is True:
        doInterpolation = 1
    else:
        doInterpolation = 0

    assert (os.path.exists(wavFN))
    firstTime = not os.path.exists(outputFN)
    if firstTime or forceRegenerate is True:
        if os.path.exists(outputFN):
            os.remove(outputFN)

        argList = [
            wavFN, outputFN, sampleStep, minPitch, maxPitch, silenceThreshold,
            -1, -1, medianFilterWindowSize, doInterpolation
        ]

        scriptName = "get_pitch.praat"
        scriptFN = join(utils.scriptsPath, scriptName)
        utils.runPraatScript(praatEXE, scriptFN, argList)

    piList = loadTimeSeriesData(outputFN, undefinedValue=undefinedValue)

    return piList
Пример #9
0
def _extractPIPiecewise(inputFN, outputFN, praatEXE,
                        minPitch, maxPitch, tgFN, tierName,
                        tmpOutputPath, sampleStep=0.01,
                        silenceThreshold=0.03, pitchUnit="Hertz",
                        forceRegenerate=True, undefinedValue=None,
                        medianFilterWindowSize=0, pitchQuadInterp=False):
    '''
    Extracts pitch and int from each labeled interval in a textgrid
    
    This has the benefit of being faster than using _extractPIFile if only
    labeled regions need to have their pitch values sampled, particularly
    for longer files.
    
    Returns the result as a list.  Will load the serialized result
    if this has already been called on the appropriate files before
    '''
    outputPath = os.path.split(outputFN)[0]
    utils.makeDir(outputPath)
    
    windowSize = medianFilterWindowSize
    
    assert(os.path.exists(inputFN))
    firstTime = not os.path.exists(outputFN)
    if firstTime or forceRegenerate is True:
        
        utils.makeDir(tmpOutputPath)
        splitAudioList = praatio_scripts.splitAudioOnTier(inputFN,
                                                          tgFN,
                                                          tierName,
                                                          tmpOutputPath,
                                                          False)
        allPIList = []
        for start, _, fn in splitAudioList:
            tmpTrackName = os.path.splitext(fn)[0] + ".txt"
            piList = _extractPIFile(join(tmpOutputPath, fn),
                                    join(tmpOutputPath, tmpTrackName),
                                    praatEXE, minPitch, maxPitch,
                                    sampleStep, silenceThreshold,
                                    pitchUnit, forceRegenerate=True,
                                    medianFilterWindowSize=windowSize,
                                    pitchQuadInterp=pitchQuadInterp)
            piList = [("%0.3f" % (float(time) + start), str(pV), str(iV))
                      for time, pV, iV in piList]
            allPIList.extend(piList)
            
        allPIList = [",".join(row) for row in allPIList]
        with open(outputFN, "w") as fd:
            fd.write("\n".join(allPIList) + "\n")

    piList = loadTimeSeriesData(outputFN, undefinedValue=undefinedValue)
    
    return piList
Пример #10
0
def _extractPIFile(inputFN,
                   outputFN,
                   praatEXE,
                   minPitch,
                   maxPitch,
                   sampleStep=0.01,
                   silenceThreshold=0.03,
                   pitchUnit="Hertz",
                   forceRegenerate=True,
                   undefinedValue=None,
                   medianFilterWindowSize=0,
                   pitchQuadInterp=False):
    '''
    Extracts pitch and intensity values from an audio file

    Returns the result as a list.  Will load the serialized result
    if this has already been called on the appropriate files before
    '''
    outputPath = os.path.split(outputFN)[0]
    utils.makeDir(outputPath)

    assert (os.path.exists(inputFN))
    firstTime = not os.path.exists(outputFN)
    if firstTime or forceRegenerate is True:

        # The praat script uses append mode, so we need to clear any prior
        # result
        if os.path.exists(outputFN):
            os.remove(outputFN)

        if pitchQuadInterp is True:
            doInterpolation = 1
        else:
            doInterpolation = 0

        argList = [
            inputFN, outputFN, sampleStep, minPitch, maxPitch,
            silenceThreshold, pitchUnit, -1, -1, medianFilterWindowSize,
            doInterpolation
        ]

        scriptName = "get_pitch_and_intensity.praat"
        scriptFN = join(utils.scriptsPath, scriptName)
        utils.runPraatScript(praatEXE, scriptFN, argList)

    piList = loadTimeSeriesData(outputFN, undefinedValue=undefinedValue)

    return piList
Пример #11
0
def extractPitch(wavFN, outputFN, praatEXE,
                 minPitch, maxPitch, sampleStep=0.01,
                 silenceThreshold=0.03, forceRegenerate=True,
                 undefinedValue=None, medianFilterWindowSize=0,
                 pitchQuadInterp=False):
    '''
    Extract pitch at regular intervals from the input wav file
    
    Data is output to a text file and then returned in a list in the form
    [(timeV1, pitchV1), (timeV2, pitchV2), ...]
    
    sampleStep - the frequency to sample pitch at
    silenceThreshold - segments with lower intensity won't be analyzed
                       for pitch
    forceRegenerate - if running this function for the same file, if False
                      just read in the existing pitch file
    undefinedValue - if None remove from the dataset, otherset set to
                     undefinedValue
    pitchQuadInterp - if True, quadratically interpolate pitch
    '''
    outputPath = os.path.split(outputFN)[0]
    
    utils.makeDir(outputPath)
    
    if pitchQuadInterp is True:
        doInterpolation = 1
    else:
        doInterpolation = 0
    
    assert(os.path.exists(wavFN))
    firstTime = not os.path.exists(outputFN)
    if firstTime or forceRegenerate is True:
        if os.path.exists(outputFN):
            os.remove(outputFN)
        
        argList = [wavFN, outputFN, sampleStep,
                   minPitch, maxPitch, silenceThreshold, -1, -1,
                   medianFilterWindowSize, doInterpolation]
        
        scriptName = "get_pitch.praat"
        scriptFN = join(utils.scriptsPath, scriptName)
        utils.runPraatScript(praatEXE, scriptFN, argList)

    piList = loadTimeSeriesData(outputFN, undefinedValue=undefinedValue)

    return piList
def printFormantsToCSV(args):

    wavPath = os.path.abspath(join(".", "data"))
    # tgPath = os.path.abspath(join(".", "files"))
    rootOutputFolder = os.path.abspath(join(".", "data", "praat_extraction"))
    pitchPath = join(rootOutputFolder, "pitch")
    formantsPath = join(rootOutputFolder, "formants")

    # make the directories
    praatEXE = r"D:\voice\Praat.exe"
    utils.makeDir(rootOutputFolder)
    utils.makeDir(formantsPath)

    def printFormantToCSVFile(filename: str):
        formantData = praat_scripts.getFormants(
            praatEXE, join(wavPath, filename),
            join(formantsPath, filename + "_formants.txt"), 5500, 0.001, 0.050)
Пример #13
0
def deleteVowels(inputTGFN,
                 inputWavFN,
                 outputPath,
                 doShrink,
                 atZeroCrossing=True):

    utils.makeDir(outputPath)

    wavFN = os.path.split(inputWavFN)[1]
    tgFN = os.path.split(inputTGFN)[1]
    outputWavFN = join(outputPath, wavFN)
    outputTGFN = join(outputPath, tgFN)

    if atZeroCrossing is True:
        zeroCrossingTGPath = join(outputPath, "zero_crossing_tgs")
        zeroCrossingTGFN = join(zeroCrossingTGPath, tgFN)
        utils.makeDir(zeroCrossingTGPath)

        tg = tgio.openTextgrid(inputTGFN)
        wavObj = audioio.WavQueryObj(inputWavFN)

        praatio_scripts.tgBoundariesToZeroCrossings(tg, wavObj,
                                                    zeroCrossingTGFN)

    else:
        tg = tgio.openTextgrid(inputTGFN)

    keepList = tg.tierDict["phone"].entryList
    keepList = [entry for entry in keepList if not isVowel(entry[2])]
    deleteList = utils.invertIntervalList(keepList, tg.maxTimestamp)

    wavObj = audioio.openAudioFile(inputWavFN,
                                   keepList=keepList,
                                   doShrink=doShrink)
    wavObj.save(outputWavFN)

    shrunkTG = copy.deepcopy(tg)
    for start, stop in sorted(deleteList, reverse=True):
        shrunkTG = shrunkTG.eraseRegion(start, stop, doShrink=doShrink)

    shrunkTG.save(outputTGFN)
Пример #14
0
def deleteVowels(inputTGFN, inputWavFN, outputPath, doShrink,
                 atZeroCrossing=True):
    
    utils.makeDir(outputPath)
    
    wavFN = os.path.split(inputWavFN)[1]
    tgFN = os.path.split(inputTGFN)[1]
    outputWavFN = join(outputPath, wavFN)
    outputTGFN = join(outputPath, tgFN)
    
    if atZeroCrossing is True:
        zeroCrossingTGPath = join(outputPath, "zero_crossing_tgs")
        zeroCrossingTGFN = join(zeroCrossingTGPath, tgFN)
        utils.makeDir(zeroCrossingTGPath)
        
        tg = tgio.openTextgrid(inputTGFN)
        wavObj = audioio.WavQueryObj(inputWavFN)
        
        praatio_scripts.tgBoundariesToZeroCrossings(tg,
                                                    wavObj,
                                                    zeroCrossingTGFN)

    else:
        tg = tgio.openTextgrid(inputTGFN)
    
    keepList = tg.tierDict["phone"].entryList
    keepList = [entry for entry in keepList
                if not isVowel(entry[2])]
    deleteList = utils.invertIntervalList(keepList, tg.maxTimestamp)
    
    wavObj = audioio.openAudioFile(inputWavFN,
                                   keepList=keepList,
                                   doShrink=doShrink)
    wavObj.save(outputWavFN)
    
    shrunkTG = copy.deepcopy(tg)
    for start, stop in sorted(deleteList, reverse=True):
        shrunkTG = shrunkTG.eraseRegion(start, stop, doShrink=doShrink)
    
    shrunkTG.save(outputTGFN)
Пример #15
0
def _extractPIFile(inputFN, outputFN, praatEXE,
                   minPitch, maxPitch, sampleStep=0.01, silenceThreshold=0.03,
                   pitchUnit="Hertz", forceRegenerate=True,
                   undefinedValue=None, medianFilterWindowSize=0,
                   pitchQuadInterp=False):
    '''
    Extracts pitch and intensity values from an audio file

    Returns the result as a list.  Will load the serialized result
    if this has already been called on the appropriate files before
    '''
    outputPath = os.path.split(outputFN)[0]
    utils.makeDir(outputPath)
    
    assert(os.path.exists(inputFN))
    firstTime = not os.path.exists(outputFN)
    if firstTime or forceRegenerate is True:
        
        # The praat script uses append mode, so we need to clear any prior
        # result
        if os.path.exists(outputFN):
            os.remove(outputFN)
        
        if pitchQuadInterp is True:
            doInterpolation = 1
        else:
            doInterpolation = 0
    
        argList = [inputFN, outputFN, sampleStep,
                   minPitch, maxPitch, silenceThreshold, pitchUnit, -1, -1,
                   medianFilterWindowSize, doInterpolation]
        
        scriptName = "get_pitch_and_intensity.praat"
        scriptFN = join(utils.scriptsPath, scriptName)
        utils.runPraatScript(praatEXE, scriptFN, argList)

    piList = loadTimeSeriesData(outputFN, undefinedValue=undefinedValue)
    
    return piList
Пример #16
0
def padEndWithSilence(indir, outdir):

    utils.makeDir(outdir)

    for name in utils.findFiles(indir, filterExt=".wav", stripExt=False):

        inwavfile = join(indir, name)
        outwavfile = join(outdir, name)

        inwav = wave.open(inwavfile, 'rb')
        outwav = wave.open(outwavfile, 'wb')

        data = inwav.readframes(inwav.getnframes())
        silence = '\x00' * 200000
        outdata = data + silence

        outwav.setnchannels(inwav.getnchannels())
        outwav.setsampwidth(inwav.getsampwidth())
        outwav.setframerate(inwav.getframerate())
        outwav.setcomptype('NONE', 'not compressed')
        outwav.writeframes(outdata)

        inwav.close()
        outwav.close()
Пример #17
0
def _audioToPIPiecewise(inputPath,
                        inputFN,
                        outputPath,
                        outputFN,
                        praatEXE,
                        minPitch,
                        maxPitch,
                        tgPath,
                        tgFN,
                        tierName,
                        tmpOutputPath,
                        sampleStep=0.01,
                        silenceThreshold=0.03,
                        forceRegenerate=True,
                        undefinedValue=None):
    '''
    Extracts pitch and int from each labeled interval in a textgrid
    
    This has the benefit of being faster than using _audioToPIFile if only
    labeled regions need to have their pitch values sampled, particularly
    for longer files.
    
    Returns the result as a list.  Will load the serialized result
    if this has already been called on the appropriate files before
    '''

    inputFullFN = join(inputPath, inputFN)
    tgFullFN = join(tgPath, tgFN)
    outputFullFN = join(outputPath, outputFN)

    utils.makeDir(outputPath)

    assert (os.path.exists(inputFullFN))
    firstTime = not os.path.exists(outputFullFN)
    if firstTime or forceRegenerate is True:

        utils.makeDir(tmpOutputPath)
        splitAudioList = praatio_scripts.splitAudioOnTier(
            inputFullFN,
            tgFullFN,
            tierName,
            tmpOutputPath,
            outputTGFlag=False,
            nameStyle='append_no_i')
        print(splitAudioList, 'splitAudioList')
        allPIList = []
        for start, _, fn in splitAudioList:
            tmpTrackName = os.path.splitext(
                fn)[0] + ".wav"  #ejg change ".txt" to ".wav"
            piList = _audioToPIFile(tmpOutputPath,
                                    fn,
                                    tmpOutputPath,
                                    tmpTrackName,
                                    praatEXE,
                                    minPitch,
                                    maxPitch,
                                    sampleStep,
                                    silenceThreshold,
                                    forceRegenerate=True)
            piList = [("%0.3f" % (float(time) + start), str(pV), str(iV))
                      for time, pV, iV in piList]
            allPIList.extend(piList)

        allPIList = [",".join(row) for row in allPIList]
        with open(outputFullFN, "w") as fd:
            fd.write("\n".join(allPIList) + "\n")

    piList = loadPIAndTime(outputPath, outputFN, undefinedValue=undefinedValue)
    print(piList)
    return piList
Пример #18
0
from praatio import pitch_and_intensity
from praatio import praat_scripts
from praatio import tgio
from praatio.utilities import utils

wavPath = os.path.abspath(join(".", "files"))
tgPath = os.path.abspath(join(".", "files"))
rootOutputFolder = os.path.abspath(join(".", "files", "pitch_extraction"))
pitchPath = join(rootOutputFolder, "pitch")
formantsPath = join(rootOutputFolder, "formants")
pitchMeasuresPath = join(rootOutputFolder, "pitch_measures")
rmsIntensityPath = join(rootOutputFolder, "rms_intensity")

praatEXE = r"C:\Praat.exe"
#praatEXE = "/Applications/Praat.app/Contents/MacOS/Praat"
utils.makeDir(rootOutputFolder)
utils.makeDir(pitchPath)
utils.makeDir(pitchMeasuresPath)
utils.makeDir(rmsIntensityPath)
utils.makeDir(formantsPath)

bobbyPitchData = pitch_and_intensity.extractPI(join(wavPath, "bobby.wav"),
                                               join(pitchPath, "bobby.txt"),
                                               praatEXE,
                                               50,
                                               350,
                                               forceRegenerate=False)

# Here are two examples of the new functionality of extracting pitch
# from only labeled intervals in a textgrid.
Пример #19
0
def getPitchData(tgpath, wavpath, outpath, matlabExePath, matlabScriptPath, 
					praatExePath, praatScriptPath):

	"""
	Generates clean textgrid files with the mother's speech isolated from room noise and child speech.
	 
	Directory variables below which are ALL CAPS, such as WAV_DIR and EPOCH_DIR hold files which are
	referenced throughout the workflow as opposed to directories which contain textgrids at a certain
	stage of processing.

	Directories ending in numbers such as textgrids_tier_names_checked_(01) are considered to hold textgrids 
	at certaining milestones of processing, and are placed into the working directory instead of the TEMP
	directory.

	"""

	def _nextStep(n):
		if n == int(n):
			return n+1.0
		else:
			return math.ceil(n)

	# initialize

#	 tg_dir = join(path, "TEXTGRIDS_INTERVALS_MARKED")
	tg_dir = tgpath
#	 WAV_DIR = join(path, "WAVS")
	WAV_DIR = wavpath
	path = outpath
	io.make_dir(path)
	TEMP = tempfile.mkdtemp()
	tgStep = 0.0
	praatStep = 0.0
	uweStep = 0.0
	wavStep = 0.0

	# ensure the tier names are consistent

	tgStep+=0.1
	new_tg_dir = join(path, "_textgrids_{}_tier_names_checked".format(tgStep))

	general.renameTiers(
						tg_dir
						,new_tg_dir
						)

	tg_dir = new_tg_dir

	# replace all labels from Mother tier other than MS

	tgStep+=0.1
	new_tg_dir = join(path, "_textgrids_{}_MS_label_only_in_Mother_tier".format(tgStep))

	general.removeIntervalsFromTierByLabel(
											tg_dir
											,"Mother"
											,"MS"
											,new_tg_dir
											,removeAllBut=True
											)
	tg_dir = new_tg_dir

	# crop portions of intervals in Mother tier overlapping with Mother's Backchannel tier, 
	# meaning that all portions of MS intervals overlapping with LA intervals (laughter) are cropped

	tgStep+=0.1
	new_tg_dir = join(path, "_textgrids_{}_LA_removed".format(tgStep))

	general.isolateMotherSpeech(
								tg_dir
								,"Mother's Backchannel"
								,new_tg_dir
								)

	tg_dir = new_tg_dir

	# set current tg_dir as directory referenced after preprocessing and before cropping

	TG_PREPROCESSED = tg_dir


	# crop portions of intervals in Mother tier overlapping with Child tier, then Room tier, then both.
	# after each cropping, intervals shorter than can be processed are removed from the new Mother tiers
	# non-overlapping portions of intervals in Mother tier are retained

	tgStep = _nextStep(tgStep)
	TG_CS_RMVD_DIR = join(path, "_textgrids_{}_child_removed".format(tgStep))

	tgStep = _nextStep(tgStep)
	TG_ROOM_RMVD_DIR = join(path, "_textgrids_{}_room_removed".format(tgStep))

	tgStep = _nextStep(tgStep)
	TG_CS_ROOM_RMVD_DIR = join(path, "_textgrids_{}_child_room_removed".format(tgStep))

	general.isolateMotherSpeech(
								tg_dir
								,"Child"
								,join(TEMP, "cs_rmvd")
								)
	general.filterShortIntervalsFromTier(
											join(TEMP, "cs_rmvd")
											,"Mother"
											,0.15
											,TG_CS_RMVD_DIR
											)

	general.isolateMotherSpeech(
								tg_dir 
								,"Room"
								,join(TEMP, "rm_rmvd")
								)
	general.filterShortIntervalsFromTier(
											join(TEMP, "rm_rmvd")
											,"Mother"
											,0.15
											,TG_ROOM_RMVD_DIR
											)

	general.isolateMotherSpeech(
								TG_CS_RMVD_DIR
								,"Room"
								,join(TEMP, "cs_rm_rmvd")
								)
	general.filterShortIntervalsFromTier(
											join(TEMP, "cs_rm_rmvd")
											,"Mother"
											,0.15
											,TG_CS_ROOM_RMVD_DIR
											)

################################
# TODO: Delete these lines
################################

#	 TG_CS_ROOM_RMVD_DIR = join(path, "TEXTGRIDS_FROM_OLD_CODE")
#	 TG_CS_RMVD_DIR = join(path, "TEXTGRIDS_OLD_CODE_CS_RMVD")
#	 TG_ROOM_RMVD_DIR = join(path, "TEXTGRIDS_OLD_CODE_ROOM_RMVD")

################################
################################
################################
	tg_dir = TG_CS_ROOM_RMVD_DIR
	

	# create directory of tg_info files (tier entry information as plain text listing)

	TG_INFO_DIR = join(path, "__tg_info")

	general.extractTGInfo(
							tg_dir
							,TG_INFO_DIR
							,"Mother"
							,searchForMothersSpeech=False
							)


	# generate an epoch file (.txt file) corresponding to the Epochs tier in each textgrid (start, stop, label)

	EPOCH_DIR = join(path, "__epochs")

	general.generateEpochFiles(
								tg_dir
								,WAV_DIR
								,EPOCH_DIR
								)

	# pad wav files with about two seconds of silence at the end
	# the next step does not process wav files successfuly if the end of the last MS interval is too near the end of the wav

	wavStep = _nextStep(wavStep)
	new_wav_dir = join(path, "_wavs_{}_padded_w_silence".format(wavStep))

	padEndWithSilence(
						WAV_DIR
						,new_wav_dir
						)

	WAV_DIR = new_wav_dir


	# remove intervals from Mother tier not marked MS
	# this is done in order to try to eliminate loud noises which affect how praat extracts F0 when processing entire wav files

	wavStep = _nextStep(wavStep)
	new_wav_dir = join(path, "_wavs_{}_nonMS_zeroed_out".format(wavStep))

	deleteUnlabeledIntervals(
								tg_dir
								,WAV_DIR
								,"Mother"
								,new_wav_dir
								)

	WAV_DIR = new_wav_dir


	# extract syllable nuclei to determine speech rate (MATLAB REQUIRED)

	wav_temp_dir = join(TEMP, "_subset_wav_files")

	uweStep = _nextStep(uweStep)
	syllable_nuclei_dir = join(path, "_uwe_{}_syllable_nuclei_whole".format(uweStep))

	tgStep = _nextStep(tgStep)
	new_tg_dir = join(path, "_textgrids_{}_syllable_nuclei_added".format(tgStep))

	markupTextgridWithSyllableNuclei(
										WAV_DIR
										,tg_dir
										,"Mother"
										,wav_temp_dir
										,syllable_nuclei_dir
										,matlabExePath
										,matlabScriptPath
										,new_tg_dir
										,printCmd=True
										,outputTGFlag=False
										)
	tg_dir = new_tg_dir


	# acoustic analysis

	uweStep = _nextStep(uweStep)
	nucleus_listing_per_file_dir = join(path, "_uwe_{}_nucleus_listing_mothers_speech".format(uweStep))

	uweStep = _nextStep(uweStep)
	SPEECH_RATE_PER_EPOCH_DIR = join(path, "_uwe_{}_speech_rate_for_epochs".format(uweStep))

	general.aggregateSpeechRate(
								TG_INFO_DIR
								,syllable_nuclei_dir
								,nucleus_listing_per_file_dir
								,44100
								)
	general.uwePhoneCountForEpochs(
									EPOCH_DIR
									,TG_INFO_DIR
									,nucleus_listing_per_file_dir
									,SPEECH_RATE_PER_EPOCH_DIR
									)


	# The following code can be run over the whole audio files, regardless of epoch
	# or textgrids (we'll extract pitch information for the intervals and
	# epochs later) 

	# The first Praat section below extracts pitch data from one wav file with 
	# unlabled intervals silenced.
	#
	# The second Praat section splits labeled intervals into subwavs.
	#
	# It is recommended to use the first section.
	#
	# Regardless of which is used make sure the corresponding aggregate section is 
	# uncommented below, or that both are if both full wavs and subwavs are used.

	praatStep = _nextStep(praatStep)
	praat_dir = join(path, "_praat_{}_75Hz_750Hz_fullwav".format(praatStep))
	utils.makeDir(praat_dir)

	praatStep+=0.1
	PI_FULLWAV_DIR = join(path, "_praat_{}_75Hz_750Hz_fullwav_filter9".format(praatStep))
	utils.makeDir(PI_FULLWAV_DIR)

	for fn in utils.findFiles(WAV_DIR, filterExt=".wav", stripExt=True):
		print(fn+".wav")
		userPitchData = pitch_and_intensity.audioToPI(
														inputPath=WAV_DIR
														,inputFN=fn+".wav"
														,outputPath=praat_dir
														,outputFN=fn+".txt"
														,praatEXE=praatExePath
														,minPitch=75
														,maxPitch=750
														,sampleStep=0.01
														,silenceThreshold=0.03
#														 ,silenceThreshold=0.01
#														 ,silenceThreshold=0.001
#														 ,silenceThreshold=0.0001
#														 ,silenceThreshold=0.00001
														,forceRegenerate=True
#														 ,tgPath=tg_dir
#														 ,tgFN=fn+".TextGrid"
#														 ,tierName="Mother"
#														 ,tmpOutputPath=TEMP
														)
		filteredPitchData = pitch_and_intensity.generatePIMeasures(
																	userPitchData
																	,tg_dir
																	,fn+".TextGrid"
																	,tierName="Epochs"
																	,doPitch=True
																	,medianFilterWindowSize=9
																	)
		with open(join(PI_FULLWAV_DIR, fn+'.txt'), 'w') as outfile:
			for line in filteredPitchData:
				line = [str(x) for x in line]
				outfile.write(",".join(line)+'\n')



#	praatStep = _nextStep(praatStep)
#	praat_dir = join(path, "_praat_{}_75Hz_750Hz_subwav".format(praatStep))
#	utils.makeDir(praat_dir)
#
#	praatStep+=0.1
#	PI_SUBWAV_DIR = join(path, "_praat_{}_75Hz_750Hz_subwav_filter9".format(praatStep))
#	utils.makeDir(PI_SUBWAV_DIR)
#
#	for fn in utils.findFiles(WAV_DIR, filterExt=".wav", stripExt=True):
#		print(fn+".wav")
#		userPitchData = pitch_and_intensity.audioToPI(
#														inputPath=WAV_DIR
#														,inputFN=fn+".wav"
#														,outputPath=praat_dir
#														,outputFN=fn+".txt"
#														,praatEXE=praatExePath
#														,minPitch=75
#														,maxPitch=750
#														,sampleStep=0.01
#														,silenceThreshold=0.03
##														 ,silenceThreshold=0.01
##														 ,silenceThreshold=0.001
##														 ,silenceThreshold=0.0001
##														 ,silenceThreshold=0.00001
#														,forceRegenerate=True
#														,tgPath=tg_dir
#														,tgFN=fn+".TextGrid"
#														,tierName="Mother"
#														,tmpOutputPath=TEMP
#														)
#		filteredPitchData = pitch_and_intensity.generatePIMeasures(
#																	userPitchData
#																	,tg_dir
#																	,fn+".TextGrid"
#																	,tierName="Epochs"
#																	,doPitch=True
#																	,medianFilterWindowSize=9
#																	)
#		with open(join(PI_SUBWAV_DIR, fn+'.txt'), 'w') as outfile:
#			for line in filteredPitchData:
#				line = [str(x) for x in line]
#				outfile.write(",".join(line)+'\n')



	EVENT_DIR = join(path, "__event_frequency_and_duration")
	general.eventStructurePerEpoch(
									EPOCH_DIR
									,TG_CS_ROOM_RMVD_DIR
									,TG_CS_RMVD_DIR
									,TG_ROOM_RMVD_DIR
									,TG_PREPROCESSED
									,EVENT_DIR
									,"Mother"
									,"Mother's Backchannel"
									)


	# TODO: generalize this so that 'P' is not output for every type of session
	EPOCH_ROW_HEADER_DIR = join(path, "__epoch_row_header")
	general.generateEpochRowHeader(
									EPOCH_DIR
									,EPOCH_ROW_HEADER_DIR
									,"P"
									)

	headerStr = ("file,id,session,interval,int_start,int_end,int_dur,"
				 "ms_dur_s,ms_freq,ms_child_speech_filtered_dur_s,"
				 "ms_noise_filtered_dur_s,ms_full_dur_s,lost_ms_dur_s,"
				 "fp_dur_s,fp_freq,la_dur_s,la_freq,"
				 "uwe_sylcnt,f0_mean,"
				 "f0_max,f0_min,f0_range,f0_var,f0_std"
				 )

	general.aggregateFeatures(
								path
								,[
									os.path.split(EPOCH_ROW_HEADER_DIR)[1]
									,os.path.split(EVENT_DIR)[1]
									,os.path.split(SPEECH_RATE_PER_EPOCH_DIR)[1]
									,os.path.split(PI_FULLWAV_DIR)[1]
								]
								,"__aggr_fullwav"
								,headerStr
								)

#	general.aggregateFeatures(
#								path
#								,[
#									os.path.split(EPOCH_ROW_HEADER_DIR)[1]
#									,os.path.split(EVENT_DIR)[1]
#									,os.path.split(SPEECH_RATE_PER_EPOCH_DIR)[1]
#									,os.path.split(PI_SUBWAV_DIR)[1]
#								]
#								,"__aggr_subwav"
#								,headerStr
#								)

	# remove the temp directory			   
	shutil.rmtree(TEMP)