Пример #1
    def extractFeatures(self, eventsList, Fs, snr):
        feature = []
        for event in eventsList:
            frame = event.getData()

            F = audioFeatureExtraction.stFeatureExtraction(
                frame, Fs, self.frameSize * Fs, self.frameStep * Fs)
            raw_feature = F[:self.discard, :].T

            tmp = []
            for j in range(0, raw_feature.shape[1]
                           ):  # compute median and med for each columns
                feature_column = raw_feature[:, j]
                median = np.median(raw_feature[:, j])
                median_absolute_deviation = np.median(
                    np.abs(feature_column - median))

            tmp.append(event.getTarget())  # add class label
            tmp.append(raw_feature.shape[0])  # add number of frame per signal
            tmp.append(snr)  # add snr
            tmp.append(event.getId())  # add id
            tmp.append(event.getBackground())  # add background type

        return feature
Пример #5
def musicThumbnailing(x, Fs, shortTermSize=1.0, shortTermStep=0.5, thumbnailSize=10.0):
	This function detects instances of the most representative part of a music recording, also called "music thumbnails".
	A technique similar to the one proposed in [1], however a wider set of audio features is used instead of chroma features.
	In particular the following steps are followed:
	 - Extract short-term audio features. Typical short-term window size: 1 second
	 - Compute the self-silimarity matrix, i.e. all pairwise similarities between feature vectors
 	 - Apply a diagonal mask is as a moving average filter on the values of the self-similarty matrix. 
	   The size of the mask is equal to the desirable thumbnail length.
 	 - Find the position of the maximum value of the new (filtered) self-similarity matrix.
	   The audio segments that correspond to the diagonial around that position are the selected thumbnails

	 - x:			input signal
	 - Fs:			sampling frequency
	 - shortTermSize: 	window size (in seconds)
	 - shortTermStep:	window step (in seconds)
	 - thumbnailSize:	desider thumbnail size (in seconds)
	 - A1:			beginning of 1st thumbnail (in seconds)
	 - A2:			ending of 1st thumbnail (in seconds)
	 - B1:			beginning of 2nd thumbnail (in seconds)
	 - B2:			ending of 2nd thumbnail (in seconds)

  	 import audioFeatureExtraction as aF
	 [Fs, x] = basicIO.readAudioFile(inputFile)
	 [A1, A2, B1, B2] = musicThumbnailing(x, Fs)

	[1] Bartsch, M. A., & Wakefield, G. H. (2005). Audio thumbnailing of popular music using chroma-based representations. 
	Multimedia, IEEE Transactions on, 7(1), 96-104.
	x = audioBasicIO.stereo2mono(x);
	# feature extraction:
	stFeatures = aF.stFeatureExtraction(x, Fs, Fs*shortTermSize, Fs*shortTermStep)

	# self-similarity matrix
	S = selfSimilarityMatrix(stFeatures)

	# moving filter:
	M = int(round(thumbnailSize / shortTermStep))
	B = numpy.eye(M,M)
	S = scipy.signal.convolve2d(S, B, 'valid')

	# post-processing (remove main diagonal elements)
	MIN = numpy.min(S)
	for i in range(S.shape[0]):
		for j in range(S.shape[1]):
			if abs(i-j) < 5.0 / shortTermStep or i > j:
				S[i,j] = MIN;

	# find max position:
	maxVal = numpy.max(S)
	I = numpy.argmax(S)
	[I, J] = numpy.unravel_index(S.argmax(), S.shape)

	# expand:
	i1 = I; i2 = I
	j1 = J; j2 = J

	while i2-i1<M:
		if S[i1-1, j1-1] > S[i2+1,j2+1]:
			i1 -= 1
			j1 -= 1
			i2 += 1
			j2 += 1

	return (shortTermStep*i1, shortTermStep*i2, shortTermStep*j1, shortTermStep*j2, S)
def silenceRemoval(x,
	Event Detection (silence removal)
		 - x:                the input audio signal
		 - fs:               sampling freq
		 - st_win, st_step:    window size and step in seconds
		 - smoothWindow:     (optinal) smooth window (in seconds)
		 - weight:           (optinal) weight factor (0 < weight < 1) the higher, the more strict
		 - plot:             (optinal) True if results are to be plotted
		 - seg_limits:    list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that
					the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds

    if weight >= 1:
        weight = 0.99
    if weight <= 0:
        weight = 0.01

    # Step 1: feature extraction
    x = audioBasicIO.stereo2mono(x)
    st_feats, _ = aF.stFeatureExtraction(x, fs, st_win * fs, st_step * fs)

    # Step 2: train binary svm classifier of low vs high energy frames
    # keep only the energy short-term sequence (2nd feature)
    st_energy = st_feats[1, :]
    en = numpy.sort(st_energy)
    # number of 10% of the total short-term windows
    l1 = int(len(en) / 10)
    # compute "lower" 10% energy threshold
    t1 = numpy.mean(en[0:l1]) + 0.000000000000001
    # compute "higher" 10% energy threshold
    t2 = numpy.mean(en[-l1:-1]) + 0.000000000000001
    # get all features that correspond to low energy
    class1 = st_feats[:, numpy.where(st_energy <= t1)[0]]
    # get all features that correspond to high energy
    class2 = st_feats[:, numpy.where(st_energy >= t2)[0]]
    # form the binary classification task and ...
    faets_s = [class1.T, class2.T]
    # normalize and train the respective svm probabilistic model
    # (ONSET vs SILENCE)
    [faets_s_norm, means_s, stds_s] = aT.normalizeFeatures(faets_s)
    svm = aT.trainSVM(faets_s_norm, 1.0)

    # Step 3: compute onset probability based on the trained svm
    prob_on_set = []
    for i in range(st_feats.shape[1]):
        # for each frame
        cur_fv = (st_feats[:, i] - means_s) / stds_s
        # get svm probability (that it belongs to the ONSET class)
        prob_on_set.append(svm.predict_proba(cur_fv.reshape(1, -1))[0][1])
    prob_on_set = numpy.array(prob_on_set)
    # smooth probability:
    prob_on_set = smoothMovingAvg(prob_on_set, smoothWindow / st_step)

    # Step 4A: detect onset frame indices:
    prog_on_set_sort = numpy.sort(prob_on_set)
    # find probability Threshold as a weighted average
    # of top 10% and lower 10% of the values
    Nt = int(prog_on_set_sort.shape[0] / 10)
    T = (numpy.mean((1 - weight) * prog_on_set_sort[0:Nt]) +
         weight * numpy.mean(prog_on_set_sort[-Nt::]))

    max_idx = numpy.where(prob_on_set > T)[0]
    # get the indices of the frames that satisfy the thresholding
    i = 0
    time_clusters = []
    seg_limits = []

    # Step 4B: group frame indices to onset segments
    while i < len(max_idx):
        # for each of the detected onset indices
        cur_cluster = [max_idx[i]]
        if i == len(max_idx) - 1:
        while max_idx[i + 1] - cur_cluster[-1] <= 2:
            cur_cluster.append(max_idx[i + 1])
            i += 1
            if i == len(max_idx) - 1:
        i += 1
            [cur_cluster[0] * st_step, cur_cluster[-1] * st_step])

    # Step 5: Post process: remove very small segments:
    min_dur = 0.2
    seg_limits_2 = []
    for s in seg_limits:
        if s[1] - s[0] > min_dur:
    seg_limits = seg_limits_2

    if plot:
        timeX = numpy.arange(0, x.shape[0] / float(fs), 1.0 / fs)

        plt.subplot(2, 1, 1)
        plt.plot(timeX, x)
        for s in seg_limits:
        plt.subplot(2, 1, 2)
        plt.plot(numpy.arange(0, prob_on_set.shape[0] * st_step, st_step),
        for s in seg_limits:
        plt.title('svm Probability')

    return seg_limits
import random as rn
import math
import operator
import numpy as np
import audioBasicIO
import audioFeatureExtraction

dire = r"G:\5th sem\ee320 DSP\project\csv\newdata\dataset\testdata\angry"
c = 1
for filename in os.walk(dire):
    for x in filename[2]:
        label1 = []
        label = []
        file = filename[0] + "\\" + str(x)
        [Fs, x] = audioBasicIO.readAudioFile(file)
        F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.50 * Fs,
                                                       0.25 * Fs)
        for i in range(len(F[0])):
        G = np.append(F, label, axis=0)
        loc = r"G:\5th sem\ee320 DSP\project\csv\newdata\dataset\testdata\angrycsv\an" + str(
            c) + ".csv"
        c = c + 1
        np.savetxt(loc, G, delimiter=",")

dire = r"G:\5th sem\ee320 DSP\project\csv\newdata\dataset\testdata\sad"
c = 1

for filename in os.walk(dire):
    for x in filename[2]:
        label = []
Пример #13
height = 20
k = 0
col, avg, med, std, maxm, minm = [], [], [], [], [], []

emo_labels, signal_data, filename = dirWav(
    '/media/shreya/New Volume1/datasets/EMO-DB/wav/', '*.wav')

output = np.asarray(emo_labels)

features = []
feat = []
feature = []
length_features = []

for i in signal_data:
    temp = audioFeatureExtraction.stFeatureExtraction(i, 16000, 1024, 1024)
    print 'temp', temp.shape
    med.append(np.median(temp, axis=1))
    std.append(np.std(temp, axis=1))
    maxm.append(np.amax(temp, axis=1))
    minm.append(np.amin(temp, axis=1))

mean = np.asarray(avg)
median = np.asarray(med)
maximum = np.asarray(maxm)
minimum = np.asarray(minm)
standard_deviation = np.asarray(std)

def reduce_zeroOneNorm(arr):
Пример #14
#!/usr/bin/env python2.7
import audioBasicIO
import audioFeatureExtraction
import matplotlib.pyplot as plt
[Fs_x, x] = audioBasicIO.readAudioFile("emer/1.wav")
x = audioBasicIO.stereo2mono(x)
F_x = audioFeatureExtraction.stFeatureExtraction(x, Fs_x, 0.050 * Fs_x,
                                                 0.025 * Fs_x)

[Fs_y, y] = audioBasicIO.readAudioFile("nonemer/9.wav")
y = audioBasicIO.stereo2mono(y)
F_y = audioFeatureExtraction.stFeatureExtraction(y, Fs_y, 0.050 * Fs_y,
                                                 0.025 * Fs_y)

plt.subplot(2, 1, 1)
plt.plot(F_x[0, :])
plt.subplot(2, 1, 2)
plt.plot(F_y[0, :])

plt.subplot(2, 1, 1)
plt.plot(F_x[1, :])
plt.subplot(2, 1, 2)
plt.plot(F_y[1, :])
Пример #15
	def analyzeFeatures(self, data):
		values, features = aF.stFeatureExtraction(data, self.Fs, 0.1 * self.Fs, 0.1 * self.Fs)
		for index, feature in enumerate(features):
			print("%s:\t%s" % (feature, features[index]))
Пример #16
def main(argv):
    if argv[1] == "-shortTerm":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            F = audioFeatureExtraction.stFeatureExtraction(
                x, Fs, 0.050 * Fs, 0.050 * Fs)
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print("short-term feature extraction: {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-classifyFile":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            aT.fileClassification("diarizationExample.wav", "svmSM", "svm")
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print("Mid-term feature extraction + classification \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-mtClassify":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            [flagsInd, classesAll, acc] = aS.mtFileClassification(
                "diarizationExample.wav", "svmSM", "svm", False, '')
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print("Fix-sized classification - segmentation \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-hmmSegmentation":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
                               'hmmRadioSM', False, '')
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print("HMM-based classification - segmentation \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-silenceRemoval":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            segments = aS.silenceRemoval(
                x, Fs, 0.050, 0.050, smoothWindow=1.0, Weight=0.3, plot=False)
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print("Silence removal \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-thumbnailing":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.readAudioFile("scottish.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.clock()
            [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(
                x1, Fs1, 1.0, 1.0, 15.0)  # find thumbnail endpoints
            t2 = time.clock()
            perTime1 = duration1 / (t2 - t1)
            print("Thumbnail \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-diarization-noLDA":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.clock()
                                  4, LDAdim=0, PLOT=False)
            t2 = time.clock()
            perTime1 = duration1 / (t2 - t1)
            print("Diarization \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-diarization-LDA":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.clock()
            aS.speakerDiarization("diarizationExample.wav", 4, PLOT=False)
            t2 = time.clock()
            perTime1 = duration1 / (t2 - t1)
            print("Diarization \t {0:.1f} x realtime".format(perTime1))