def mpc_features(frames, n_mpc=32, n_segments=10): AF = audiofeature.AudioFeatures(22050, 1024, n_bands=n_mpc, scale_to_int_range=False, critical_band_fn=mel.MelMasters.warp_dense, power_sqr_abs=False, noise_level=1.0e-4) #1/10000 mpc = AF.loudness(frames) mpc_segments = audiofeature.segment(mpc, n_segments) return AF, mpc_segments, n_mpc
def mpc_w_softplus(frames, n_mpc=32, n_segments=10): AF = audiofeature.AudioFeatures(22050, 1024, n_bands=n_mpc, scale_to_int_range=False, critical_band_fn=mel.MelMasters.warp_dense, power_sqr_abs=False, noise_level=1.0e-4) #1/10000 mpc = theano.tensor.log10( 0.01 * theano.tensor.nnet.softplus(100 * AF.audspec(frames)) + 1.0e-4) mpc_segments = audiofeature.segment(mpc, n_segments) return AF, mpc_segments, n_mpc
def mfcc_features(frames, n_mfcc=16, n_segments=10): af = audiofeature.AudioFeatures(sample_rate=22050, frame_len=1024, n_bands=2 * n_mfcc + 4, n_audcc=n_mfcc, half_fft=True, hamming=True, scale_to_int_range=True, critical_band_fn=mel.melhtk_4k, power_sqr_abs=True, noise_level=1) mfcc = af.audcc(frames) mfcc_segments = audiofeature.segment(mfcc, n_segments) return af, mfcc_segments, n_mfcc
def mpc_w_clipping(frames, n_mpc=32, n_segments=10, use_sparse_warp=False): AF = audiofeature.AudioFeatures(22050, 1024, n_bands=n_mpc, scale_to_int_range=False, critical_band_fn=mel.MelMasters.warp_dense, power_sqr_abs=False, use_sparse_warp=use_sparse_warp, noise_level=1.0e-4) #1/10000 audspec = AF.audspec(frames) mpc = theano.tensor.log10( theano.tensor.switch(audspec > AF.noise_level, audspec, AF.noise_level)) mpc_segments = audiofeature.segment(mpc, n_segments) return AF, mpc_segments, n_mpc
def mpc_w_max_approx(frames, n_mpc=32, n_segments=10, use_sparse_warp=False): AF = audiofeature.AudioFeatures(22050, 1024, n_bands=n_mpc, scale_to_int_range=False, critical_band_fn=mel.MelMasters.warp_dense, power_sqr_abs=False, noise_level=1.0e-4) #1/10000 powspec = AF.powspec(frames) #abs(fft(x)) logspec = theano.tensor.log10(powspec + AF.noise_level) logspec3 = theano.tensor.DimShuffle(logspec.broadcastable, [0, 'x', 1])(logspec) # warp_mat dims: nfilts x nfft warp_mat = AF.critical_band_fn(nfft=AF.frame_len / 2, fft_max_freq=AF.sample_rate / 2, nfilts=AF.n_bands) log_warp_mat = numpy.log10(warp_mat + 1.0e-12) sum3 = theano.tensor.add(log_warp_mat, logspec3) feature = theano.tensor.max(sum3) mpc_segments = audiofeature.segment(feature, n_segments) return AF, mpc_segments, n_mpc
def mpc_w_max_approx_learnable(frames, n_mpc=32, n_segments=10, use_sparse_warp=False): def log_warp_mat_fn(*args, **kwargs): m = mel.MelMasters.warp_dense(*args, **kwargs) return numpy.log10(m + 1.0e-12) AF = audiofeature.AudioFeatures(22050, 1024, n_bands=n_mpc, scale_to_int_range=False, critical_band_fn=log_warp_mat_fn, power_sqr_abs=False, noise_level=1.0e-4) #1/10000 powspec = AF.powspec(frames) #abs(fft(x)) logspec = theano.tensor.log10(powspec + AF.noise_level) logspec3 = theano.tensor.DimShuffle(logspec.broadcastable, [0, 'x', 1])(logspec) sum3 = theano.tensor.add(AF.critical_band_warp_dense, logspec3) feature = theano.tensor.max(sum3) mpc_segments = audiofeature.segment(feature, n_segments) return AF, mpc_segments, n_mpc