def compute_zcr(self): self.zcr = [] for i in range(0, self.frames): current_frame = utils._get_frame(self.audio, i, self.frame) self.zcr.append( np.mean(0.5 * np.abs(np.diff(np.sign(current_frame))))) self.zcr = np.asarray(self.zcr)
def compute_mfcc(self): self.melspectrogram = [] self.logamplitude = [] self.mfcc = [] self.mfcc_delta = [] self.mfcc_delta2 = [] for i in range(0, self.frames-1): current_frame = utils._get_frame(self.audio, i, self.frame) # MFCC computation with default settings (2048 FFT window length) self.melspectrogram.append(librosa.feature.melspectrogram(current_frame, sr=self.sampling_rate, hop_length=self.frame)[0:,][0:,1]) self.logamplitude.append(librosa.logamplitude(self.melspectrogram[i])) self.mfcc.append(librosa.feature.mfcc(S=self.logamplitude[i], n_mfcc=self.number_of_mfcc).transpose()) # plt.figure(figsize=(10, 4)) # librosa.display.specshow(self.mfcc[i], x_axis='time') # plt.colorbar() # plt.title('MFCC') # plt.tight_layout() self.mfcc_delta.append(librosa.feature.delta(self.mfcc[i])) self.mfcc_delta2.append(librosa.feature.delta(self.mfcc[i], order=2)) self.logamplitude[i]=(self.logamplitude[i].T.flatten()[:, np.newaxis].T) self.melspectrogram = np.asarray(self.melspectrogram) self.logamplitude = np.asarray(self.logamplitude) self.mfcc = np.asarray(self.mfcc) self.mfcc_delta = np.asarray(self.mfcc_delta) self.mfcc_delta2 = np.asarray(self.mfcc_delta2)
def compute_energy(self, frame=2048, sampleing_rate=44000): self.energy = [] for i in range(0, self.frames): current_frame = utils._get_frame(self.audio, i, frame) self.energy.append( np.sum(current_frame**2) / np.float64(len(current_frame))) self.energy = np.asarray(self.energy)
def compute_geometric_mean(self): self.geometric_mean = [] for i in range(0, self.frames): current_frame = utils._get_frame(self.audio, i, self.frame) sum = np.sum(current_frame**2) # total frame energy frame_length = len(current_frame) self.geometric_mean.append(sum / frame_length) self.geometric_mean = np.asarray(self.geometric_mean)
def compute_energy_entropy(self): numOfShortBlocks = 10 eps = 0.00000001 self.energy_entropy = [] for i in range(0, self.frames): current_frame = utils._get_frame(self.audio, i, self.frame) Eol = np.sum(current_frame**2) # total frame energy L = len(current_frame) subWinLength = int(np.floor(L / numOfShortBlocks)) if L != subWinLength * numOfShortBlocks: current_frame = current_frame[0:subWinLength * numOfShortBlocks] # subWindows is of size [numOfShortBlocks x L] subWindows = current_frame.reshape(subWinLength, numOfShortBlocks, order='F').copy() # Compute normalized sub-frame energies: s = np.sum(subWindows**2, axis=0) / (Eol + eps) # Compute entropy of the normalized sub-frame energies: entropy = -np.sum(s * np.log2(s + eps)) self.energy_entropy.append(entropy) self.energy_entropy = np.asarray(self.energy_entropy)