Python stChromagram示例，pyAudioAnalysis.audioFeatureExtraction.stChromagram Python示例

示例#1

0

显示文件

文件： audioAnalysis.py 项目： tyiannak/pyAudioAnalysis

def fileChromagramWrapper(wav_file):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.readAudioFile(wav_file)
    x = audioBasicIO.stereo2mono(x)
    specgram, TimeAxis, FreqAxis = aF.stChromagram(x, fs, round(fs * 0.040),
                                                   round(fs * 0.040), True)

示例#2

0

显示文件

def fileChromagramWrapper(wav_file):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.readAudioFile(wav_file)
    x = audioBasicIO.stereo2mono(x)
    specgram, TimeAxis, FreqAxis = aF.stChromagram(x, fs, round(fs * 0.040),
                                                   round(fs * 0.040), True)

示例#3

0

显示文件

plt.xlabel('Frame no')
plt.ylabel(f_names[1])
plt.show()

print("\n\n\n * * * TEST 2 * * * \n\n\n")
[Fs, x] = audioBasicIO.read_audio_file(root_data_path +
                                       "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo_to_mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stSpectogram(
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 3 * * * \n\n\n")
[Fs, x] = audioBasicIO.read_audio_file(root_data_path +
                                       "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo_to_mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram(
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 4 * * * \n\n\n")
aT.featureAndTrain([root_data_path + "SM/speech", root_data_path + "SM/music"],
                   1.0, 1.0, 0.2, 0.2, "svm", "temp", True)

print("\n\n\n * * * TEST 5 * * * \n\n\n")
[flagsInd, classesAll, acc, CM] = aS.mtFileClassification(
    root_data_path + "pyAudioAnalysis/data//scottish.wav",
    root_data_path + "pyAudioAnalysis/data/svmSM", "svm", True,
    root_data_path + 'pyAudioAnalysis/data/scottish.segments')

print("\n\n\n * * * TEST 6 * * * \n\n\n")
aS.trainHMM_fromFile(root_data_path + 'radioFinal/train/bbc4A.wav',
                     root_data_path + 'radioFinal/train/bbc4A.segments',
                     'hmmTemp1', 1.0, 1.0)

示例#4

0

显示文件

文件： paura2.py 项目： valuesavvy/paura

def recordAudioSegments(BLOCKSIZE,
                        Fs=16000,
                        showSpectrogram=False,
                        showChromagram=False,
                        recordActivity=False):

    midTermBufferSize = int(Fs * BLOCKSIZE)

    print "Press Ctr+C to stop recording"

    startDateTimeStr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p")

    MEAN, STD = loadMEANS("svmMovies8classesMEANS")  # load MEAN feature values

    pa = pyaudio.PyAudio()

    stream = pa.open(format=FORMAT,
                     channels=1,
                     rate=Fs,
                     input=True,
                     frames_per_buffer=midTermBufferSize)

    midTermBuffer = []
    curWindow = []
    count = 0
    global allData
    allData = []
    energy100_buffer_zero = []
    curActiveWindow = numpy.array([])
    timeStart = time.time()

    while 1:
        try:
            block = stream.read(midTermBufferSize)
            countB = len(block) / 2
            format = "%dh" % (countB)
            shorts = struct.unpack(format, block)
            curWindow = list(shorts)
            midTermBuffer = midTermBuffer + curWindow
            # copy to midTermBuffer
            del (curWindow)
            #print len(midTermBuffer), midTermBufferSize
            #if len(midTermBuffer) == midTermBufferSize:                                     # if midTermBuffer is full:
            if 1:
                elapsedTime = (time.time() - timeStart
                               )  # time since recording started
                dataTime = (count + 1) * BLOCKSIZE  # data-driven time

                # TODO
                # mtF, _ = aF.mtFeatureExtraction(midTermBuffer, Fs, BLOCKSIZE * Fs, BLOCKSIZE * Fs, 0.050 * Fs, 0.050 * Fs)
                # curFV = (mtF - MEAN) / STD
                # TODO
                allData += midTermBuffer
                midTermBuffer = numpy.double(
                    midTermBuffer)  # convert current buffer to numpy array

                # Compute spectrogram
                if showSpectrogram:
                    (spectrogram, TimeAxisS, FreqAxisS) = aF.stSpectogram(
                        midTermBuffer, Fs, 0.020 * Fs,
                        0.02 * Fs)  # extract spectrogram
                    FreqAxisS = numpy.array(FreqAxisS)  # frequency axis
                    DominantFreqs = FreqAxisS[numpy.argmax(
                        spectrogram, axis=1
                    )]  # most dominant frequencies (for each short-term window)
                    maxFreq = numpy.mean(
                        DominantFreqs)  # get average most dominant freq
                    maxFreqStd = numpy.std(DominantFreqs)

                # Compute chromagram
                if showChromagram:
                    (chromagram, TimeAxisC,
                     FreqAxisC) = aF.stChromagram(midTermBuffer, Fs,
                                                  0.020 * Fs,
                                                  0.02 * Fs)  # get chromagram
                    FreqAxisC = numpy.array(
                        FreqAxisC)  # frequency axis (12 chroma classes)
                    DominantFreqsC = FreqAxisC[numpy.argmax(
                        chromagram, axis=1)]  # most dominant chroma classes
                    maxFreqC = most_common(DominantFreqsC)[
                        0]  # get most common among all short-term windows

                # Plot signal window
                signalPlotCV = plotCV(
                    scipy.signal.resample(midTermBuffer + 16000, WidthPlot),
                    WidthPlot, HeightPlot, 32000)
                cv2.imshow('Signal', signalPlotCV)
                cv2.moveWindow('Signal', 50, statusHeight + 50)
                # Show spectrogram
                if showSpectrogram:
                    iSpec = numpy.array(spectrogram.T * 255, dtype=numpy.uint8)
                    iSpec2 = cv2.resize(iSpec, (WidthPlot, HeightPlot),
                                        interpolation=cv2.INTER_CUBIC)
                    iSpec2 = cv2.applyColorMap(iSpec2, cv2.COLORMAP_JET)
                    cv2.putText(iSpec2, "maxFreq: %.0f Hz" % maxFreq, (0, 11),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                    cv2.imshow('Spectrogram', iSpec2)
                    cv2.moveWindow('Spectrogram', 50,
                                   HeightPlot + statusHeight + 60)
                # Show chromagram
                if showChromagram:
                    iChroma = numpy.array(
                        (chromagram.T / chromagram.max()) * 255,
                        dtype=numpy.uint8)
                    iChroma2 = cv2.resize(iChroma, (WidthPlot, HeightPlot),
                                          interpolation=cv2.INTER_CUBIC)
                    iChroma2 = cv2.applyColorMap(iChroma2, cv2.COLORMAP_JET)
                    cv2.putText(iChroma2, "maxFreqC: %s" % maxFreqC, (0, 11),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                    cv2.imshow('Chroma', iChroma2)
                    cv2.moveWindow('Chroma', 50,
                                   2 * HeightPlot + statusHeight + 60)
                # Activity Detection:
                energy100 = (100 * numpy.sum(midTermBuffer * midTermBuffer) /
                             (midTermBuffer.shape[0] * 32000 * 32000))
                if count < 10:  # TODO make this param
                    energy100_buffer_zero.append(energy100)
                    mean_energy100_zero = numpy.mean(
                        numpy.array(energy100_buffer_zero))
                else:
                    mean_energy100_zero = numpy.mean(
                        numpy.array(energy100_buffer_zero))
                    if (energy100 < 1.2 * mean_energy100_zero):
                        if curActiveWindow.shape[
                                0] > 0:  # if a sound has been detected in the previous segment:
                            activeT2 = elapsedTime - BLOCKSIZE  # set time of current active window
                            if activeT2 - activeT1 > minActivityDuration:
                                wavFileName = startDateTimeStr + "_activity_{0:.2f}_{1:.2f}.wav".format(
                                    activeT1, activeT2)
                                if recordActivity:
                                    wavfile.write(
                                        wavFileName, Fs,
                                        numpy.int16(curActiveWindow)
                                    )  # write current active window to file
                            curActiveWindow = numpy.array(
                                [])  # delete current active window
                    else:
                        if curActiveWindow.shape[
                                0] == 0:  # this is a new active window!
                            activeT1 = elapsedTime - BLOCKSIZE  # set timestamp start of new active window
                        curActiveWindow = numpy.concatenate(
                            (curActiveWindow, midTermBuffer))

                # Show status messages on Status cv winow:
                textIm = numpy.zeros((statusHeight, WidthPlot, 3))
                statusStrTime = "time: %.1f sec" % elapsedTime + " - data time: %.1f sec" % dataTime + " - loss : %.1f sec" % (
                    elapsedTime - dataTime)
                statusStrFeature = "ene1:%.1f" % energy100 + " eneZero:%.1f" % mean_energy100_zero
                cv2.putText(textIm, statusStrTime, (0, 11),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                cv2.putText(textIm, statusStrFeature, (0, 22),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                if curActiveWindow.shape[0] > 0:
                    cv2.putText(textIm, "sound", (0, 33),
                                cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255))
                else:
                    cv2.putText(textIm, "silence", (0, 33),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 220))
                cv2.imshow("Status", textIm)
                cv2.moveWindow("Status", 50, 0)
                midTermBuffer = []
                ch = cv2.waitKey(10)
                count += 1

        except IOError, e:
            print("(%d) Error recording: %s" % (errorcount, e))

示例#5

0

显示文件

def recordAndCalcHR(BLOCKSIZE,
                    Fs,
                    showSpectrogram=False,
                    showChromagram=False,
                    recordActivity=False):
    print("Press Ctr+C to stop process")

    startDateTimeStr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p")

    MEAN, STD = loadMEANS("svmMovies8classesMEANS")  # load MEAN feature values

    inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE,
                        alsaaudio.PCM_NONBLOCK)  # open alsaaudio capture
    inp.setchannels(1)  # 1 channel
    inp.setrate(Fs)  # set sampling freq
    inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)  # set 2-byte sample
    inp.setperiodsize(512)
    midTermBufferSize = int(Fs * BLOCKSIZE)
    midTermBuffer = []
    curWindow = []
    count = 0
    global allData
    allData = []
    energy100_buffer_zero = []
    curActiveWindow = numpy.array([])
    timeStart = time.time()

    while 1:
        l, data = inp.read()  # read data from buffer
        if l:
            for i in range(len(data) / 2):
                curWindow.append(audioop.getsample(data, 2,
                                                   i))  # get audio samples

            if (len(curWindow) + len(midTermBuffer) > midTermBufferSize):
                samplesToCopyToMidBuffer = midTermBufferSize - len(
                    midTermBuffer)
            else:
                samplesToCopyToMidBuffer = len(curWindow)

            midTermBuffer = midTermBuffer + curWindow[
                0:samplesToCopyToMidBuffer]
            # copy to midTermBuffer
            del (curWindow[0:samplesToCopyToMidBuffer])

            if len(midTermBuffer
                   ) == midTermBufferSize:  # if midTermBuffer is full:
                elapsedTime = (time.time() - timeStart
                               )  # time since recording started
                dataTime = (count + 1) * BLOCKSIZE  # data-driven time

                # TODO
                # mtF, _ = aF.mtFeatureExtraction(midTermBuffer, Fs, BLOCKSIZE * Fs, BLOCKSIZE * Fs, 0.050 * Fs, 0.050 * Fs)
                # curFV = (mtF - MEAN) / STD
                # TODO
                allData += midTermBuffer
                midTermBuffer = numpy.double(
                    midTermBuffer)  # convert current buffer to numpy array

                # Compute spectrogram
                if showSpectrogram:
                    (spectrogram, TimeAxisS, FreqAxisS) = aF.stSpectogram(
                        midTermBuffer, Fs, 0.020 * Fs,
                        0.02 * Fs)  # extract spectrogram
                    FreqAxisS = numpy.array(FreqAxisS)  # frequency axis
                    DominantFreqs = FreqAxisS[numpy.argmax(
                        spectrogram, axis=1
                    )]  # most dominant frequencies (for each short-term window)
                    maxFreq = numpy.mean(
                        DominantFreqs)  # get average most dominant freq
                    maxFreqStd = numpy.std(DominantFreqs)

                    # Compute chromagram
                if showChromagram:
                    (chromagram, TimeAxisC,
                     FreqAxisC) = aF.stChromagram(midTermBuffer, Fs,
                                                  0.020 * Fs,
                                                  0.02 * Fs)  # get chromagram
                    FreqAxisC = numpy.array(
                        FreqAxisC)  # frequency axis (12 chroma classes)
                    DominantFreqsC = FreqAxisC[numpy.argmax(
                        chromagram, axis=1)]  # most dominant chroma classes
                    maxFreqC = most_common(DominantFreqsC)[
                        0]  # get most common among all short-term windows

                # Plot signal window
                signalPlotCV = plotCV(
                    scipy.signal.resample(midTermBuffer + 16000, WidthPlot),
                    WidthPlot, HeightPlot, 32000)
                cv2.imshow('Signal', signalPlotCV)
                cv2.moveWindow('Signal', 50, statusHeight + 50)

                # Show spectrogram
                if showSpectrogram:
                    iSpec = numpy.array(spectrogram.T * 255, dtype=numpy.uint8)
                    iSpec2 = cv2.resize(iSpec, (WidthPlot, HeightPlot),
                                        interpolation=cv2.INTER_CUBIC)
                    iSpec2 = cv2.applyColorMap(iSpec2, cv2.COLORMAP_JET)
                    cv2.putText(iSpec2, "maxFreq: %.0f Hz" % maxFreq, (0, 11),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                    cv2.imshow('Spectrogram', iSpec2)
                    cv2.moveWindow('Spectrogram', 50,
                                   HeightPlot + statusHeight + 60)

                # Show chromagram
                if showChromagram:
                    iChroma = numpy.array(
                        (chromagram.T / chromagram.max()) * 255,
                        dtype=numpy.uint8)
                    iChroma2 = cv2.resize(iChroma, (WidthPlot, HeightPlot),
                                          interpolation=cv2.INTER_CUBIC)
                    iChroma2 = cv2.applyColorMap(iChroma2, cv2.COLORMAP_JET)
                    cv2.putText(iChroma2, "maxFreqC: %s" % maxFreqC, (0, 11),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                    cv2.imshow('Chroma', iChroma2)
                    cv2.moveWindow('Chroma', 50,
                                   2 * HeightPlot + statusHeight + 60)

                # Activity Detection:
                energy100 = (100 * numpy.sum(midTermBuffer * midTermBuffer) /
                             (midTermBuffer.shape[0] * 32000 * 32000))
                if count < 10:  # TODO make this param
                    energy100_buffer_zero.append(energy100)
                    mean_energy100_zero = numpy.mean(
                        numpy.array(energy100_buffer_zero))
                else:
                    mean_energy100_zero = numpy.mean(
                        numpy.array(energy100_buffer_zero))
                    if (energy100 < 1.2 * mean_energy100_zero):
                        if curActiveWindow.shape[
                                0] > 0:  # if a sound has been detected in the previous segment:
                            activeT2 = elapsedTime - BLOCKSIZE  # set time of current active window
                            if activeT2 - activeT1 > minActivityDuration:
                                wavFileName = startDateTimeStr + "_activity_{0:.2f}_{1:.2f}.wav".format(
                                    activeT1, activeT2)
                                if recordActivity:
                                    wavfile.write(
                                        wavFileName, Fs,
                                        numpy.int16(curActiveWindow)
                                    )  # write current active window to file
                            curActiveWindow = numpy.array(
                                [])  # delete current active window
                    else:
                        if curActiveWindow.shape[
                                0] == 0:  # this is a new active window!
                            activeT1 = elapsedTime - BLOCKSIZE  # set timestamp start of new active window
                        curActiveWindow = numpy.concatenate(
                            (curActiveWindow, midTermBuffer))

                        # Show status messages on Status cv winow:
                textIm = numpy.zeros((statusHeight, WidthPlot, 3))
                statusStrTime = "time: %.1f sec" % elapsedTime + " - data time: %.1f sec" % dataTime + " - loss : %.1f sec" % (
                    elapsedTime - dataTime)
                statusStrFeature = "ene1:%.1f" % energy100 + " eneZero:%.1f" % mean_energy100_zero
                cv2.putText(textIm, statusStrTime, (0, 11),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                cv2.putText(textIm, statusStrFeature, (0, 22),
                            cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200))
                if curActiveWindow.shape[0] > 0:
                    cv2.putText(textIm, "sound", (0, 33),
                                cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255))
                else:
                    cv2.putText(textIm, "silence", (0, 33),
                                cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 220))
                cv2.imshow("Status", textIm)
                cv2.moveWindow("Status", 50, 0)
                midTermBuffer = []
                ch = cv2.waitKey(10)
                count += 1

示例#6

0

显示文件

文件： example11.py 项目： patropavan/multimodalAnalysis

"""! 
@brief Example 11
@details pyAudioAnalysis chromagram example
@author Theodoros Giannakopoulos {[email protected]}
"""
import plotly
import plotly.graph_objs as go
from pyAudioAnalysis import audioFeatureExtraction as aF
from pyAudioAnalysis import audioBasicIO as aIO
layout = go.Layout(title='Chromagram example for doremi.wav signal',
                   xaxis=dict(title='time (sec)',),
                   yaxis=dict(title='Chroma Name',))


if __name__ == '__main__':
    win = 0.04
    fp = "../data/doremi.wav" # music sample
    # read machine sound
    fs, s = aIO.readAudioFile(fp)
    fs = float(fs)
    dur1 = len(s) / float(fs)
    spec, time, freq = aF.stChromagram(s, fs, int(fs * win),
                                       int(fs * win), False)
    heatmap = go.Heatmap(z=spec.T, y=freq, x=time)
    plotly.offline.plot(go.Figure(data=[heatmap], layout=layout),
                        filename="temp.html", auto_open=True)

示例#7

0

显示文件

文件： script_tests.py 项目： tyiannak/pyAudioAnalysis

print("\n\n\n * * * TEST 1 * * * \n\n\n")
[Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/count.wav");
F, f_names = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.025*Fs);
plt.subplot(2,1,1); plt.plot(F[0,:]); plt.xlabel('Frame no'); plt.ylabel(f_names[0]);
plt.subplot(2,1,2); plt.plot(F[1,:]); plt.xlabel('Frame no'); plt.ylabel(f_names[1]); plt.show()

print("\n\n\n * * * TEST 2 * * * \n\n\n")
[Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo2mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stSpectogram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 3 * * * \n\n\n")
[Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo2mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 4 * * * \n\n\n")
aT.featureAndTrain([root_data_path +"SM/speech",root_data_path + "SM/music"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True)

print("\n\n\n * * * TEST 5 * * * \n\n\n")
[flagsInd, classesAll, acc, CM] = aS.mtFileClassification(root_data_path + "pyAudioAnalysis/data//scottish.wav", root_data_path + "pyAudioAnalysis/data/svmSM", "svm", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments')

print("\n\n\n * * * TEST 6 * * * \n\n\n")
aS.trainHMM_fromFile(root_data_path + 'radioFinal/train/bbc4A.wav', root_data_path + 'radioFinal/train/bbc4A.segments', 'hmmTemp1', 1.0, 1.0)	
aS.trainHMM_fromDir(root_data_path + 'radioFinal/small', 'hmmTemp2', 1.0, 1.0)
aS.hmmSegmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav', 'hmmTemp1', True, root_data_path + 'pyAudioAnalysis/data//scottish.segments')				# test 1
aS.hmmSegmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav', 'hmmTemp2', True, root_data_path + 'pyAudioAnalysis/data//scottish.segments')				# test 2

print("\n\n\n * * * TEST 7 * * * \n\n\n")
aT.featureAndTrainRegression(root_data_path + "pyAudioAnalysis/data/speechEmotion", 1, 1, 0.050, 0.050, "svm_rbf", "temp.mod", compute_beat=False)