Пример #1
0
def mpc_features(frames, n_mpc=32, n_segments=10):
    AF = audiofeature.AudioFeatures(22050,
                                    1024,
                                    n_bands=n_mpc,
                                    scale_to_int_range=False,
                                    critical_band_fn=mel.MelMasters.warp_dense,
                                    power_sqr_abs=False,
                                    noise_level=1.0e-4)  #1/10000

    mpc = AF.loudness(frames)
    mpc_segments = audiofeature.segment(mpc, n_segments)
    return AF, mpc_segments, n_mpc
Пример #2
0
def mpc_w_softplus(frames, n_mpc=32, n_segments=10):
    AF = audiofeature.AudioFeatures(22050,
                                    1024,
                                    n_bands=n_mpc,
                                    scale_to_int_range=False,
                                    critical_band_fn=mel.MelMasters.warp_dense,
                                    power_sqr_abs=False,
                                    noise_level=1.0e-4)  #1/10000

    mpc = theano.tensor.log10(
        0.01 * theano.tensor.nnet.softplus(100 * AF.audspec(frames)) + 1.0e-4)
    mpc_segments = audiofeature.segment(mpc, n_segments)
    return AF, mpc_segments, n_mpc
Пример #3
0
def mpc_w_softplus(frames, n_mpc=32, n_segments=10):
    AF = audiofeature.AudioFeatures(
        22050,
        1024,
        n_bands=n_mpc,
        scale_to_int_range=False,
        critical_band_fn=mel.MelMasters.warp_dense,
        power_sqr_abs=False,
        noise_level=1.0e-4,
    )  # 1/10000

    mpc = theano.tensor.log10(0.01 * theano.tensor.nnet.softplus(100 * AF.audspec(frames)) + 1.0e-4)
    mpc_segments = audiofeature.segment(mpc, n_segments)
    return AF, mpc_segments, n_mpc
Пример #4
0
def mpc_features(frames, n_mpc=32, n_segments=10):
    AF = audiofeature.AudioFeatures(
        22050,
        1024,
        n_bands=n_mpc,
        scale_to_int_range=False,
        critical_band_fn=mel.MelMasters.warp_dense,
        power_sqr_abs=False,
        noise_level=1.0e-4,
    )  # 1/10000

    mpc = AF.loudness(frames)
    mpc_segments = audiofeature.segment(mpc, n_segments)
    return AF, mpc_segments, n_mpc
Пример #5
0
def mfcc_features(frames, n_mfcc=16, n_segments=10):
    af = audiofeature.AudioFeatures(sample_rate=22050,
                                    frame_len=1024,
                                    n_bands=2 * n_mfcc + 4,
                                    n_audcc=n_mfcc,
                                    half_fft=True,
                                    hamming=True,
                                    scale_to_int_range=True,
                                    critical_band_fn=mel.melhtk_4k,
                                    power_sqr_abs=True,
                                    noise_level=1)
    mfcc = af.audcc(frames)
    mfcc_segments = audiofeature.segment(mfcc, n_segments)
    return af, mfcc_segments, n_mfcc
Пример #6
0
def mfcc_features(frames, n_mfcc=16, n_segments=10):
    af = audiofeature.AudioFeatures(
        sample_rate=22050,
        frame_len=1024,
        n_bands=2 * n_mfcc + 4,
        n_audcc=n_mfcc,
        half_fft=True,
        hamming=True,
        scale_to_int_range=True,
        critical_band_fn=mel.melhtk_4k,
        power_sqr_abs=True,
        noise_level=1,
    )
    mfcc = af.audcc(frames)
    mfcc_segments = audiofeature.segment(mfcc, n_segments)
    return af, mfcc_segments, n_mfcc
Пример #7
0
def mpc_w_clipping(frames, n_mpc=32, n_segments=10, use_sparse_warp=False):
    AF = audiofeature.AudioFeatures(
        22050,
        1024,
        n_bands=n_mpc,
        scale_to_int_range=False,
        critical_band_fn=mel.MelMasters.warp_dense,
        power_sqr_abs=False,
        use_sparse_warp=use_sparse_warp,
        noise_level=1.0e-4,
    )  # 1/10000

    audspec = AF.audspec(frames)
    mpc = theano.tensor.log10(theano.tensor.switch(audspec > AF.noise_level, audspec, AF.noise_level))
    mpc_segments = audiofeature.segment(mpc, n_segments)
    return AF, mpc_segments, n_mpc
Пример #8
0
def mpc_w_clipping(frames, n_mpc=32, n_segments=10, use_sparse_warp=False):
    AF = audiofeature.AudioFeatures(22050,
                                    1024,
                                    n_bands=n_mpc,
                                    scale_to_int_range=False,
                                    critical_band_fn=mel.MelMasters.warp_dense,
                                    power_sqr_abs=False,
                                    use_sparse_warp=use_sparse_warp,
                                    noise_level=1.0e-4)  #1/10000

    audspec = AF.audspec(frames)
    mpc = theano.tensor.log10(
        theano.tensor.switch(audspec > AF.noise_level, audspec,
                             AF.noise_level))
    mpc_segments = audiofeature.segment(mpc, n_segments)
    return AF, mpc_segments, n_mpc
Пример #9
0
def mpc_w_max_approx_learnable(frames, n_mpc=32, n_segments=10, use_sparse_warp=False):
    def log_warp_mat_fn(*args, **kwargs):
        m = mel.MelMasters.warp_dense(*args, **kwargs)
        return numpy.log10(m + 1.0e-12)

    AF = audiofeature.AudioFeatures(
        22050,
        1024,
        n_bands=n_mpc,
        scale_to_int_range=False,
        critical_band_fn=log_warp_mat_fn,
        power_sqr_abs=False,
        noise_level=1.0e-4,
    )  # 1/10000

    powspec = AF.powspec(frames)  # abs(fft(x))
    logspec = theano.tensor.log10(powspec + AF.noise_level)
    logspec3 = theano.tensor.DimShuffle(logspec.broadcastable, [0, "x", 1])(logspec)
    sum3 = theano.tensor.add(AF.critical_band_warp_dense, logspec3)
    feature = theano.tensor.max(sum3)
    mpc_segments = audiofeature.segment(feature, n_segments)
    return AF, mpc_segments, n_mpc
Пример #10
0
def mpc_w_max_approx(frames, n_mpc=32, n_segments=10, use_sparse_warp=False):
    AF = audiofeature.AudioFeatures(
        22050,
        1024,
        n_bands=n_mpc,
        scale_to_int_range=False,
        critical_band_fn=mel.MelMasters.warp_dense,
        power_sqr_abs=False,
        noise_level=1.0e-4,
    )  # 1/10000

    powspec = AF.powspec(frames)  # abs(fft(x))
    logspec = theano.tensor.log10(powspec + AF.noise_level)
    logspec3 = theano.tensor.DimShuffle(logspec.broadcastable, [0, "x", 1])(logspec)
    # warp_mat dims: nfilts x nfft
    warp_mat = AF.critical_band_fn(nfft=AF.frame_len / 2, fft_max_freq=AF.sample_rate / 2, nfilts=AF.n_bands)
    log_warp_mat = numpy.log10(warp_mat + 1.0e-12)

    sum3 = theano.tensor.add(log_warp_mat, logspec3)
    feature = theano.tensor.max(sum3)
    mpc_segments = audiofeature.segment(feature, n_segments)
    return AF, mpc_segments, n_mpc
Пример #11
0
def mpc_w_max_approx(frames, n_mpc=32, n_segments=10, use_sparse_warp=False):
    AF = audiofeature.AudioFeatures(22050,
                                    1024,
                                    n_bands=n_mpc,
                                    scale_to_int_range=False,
                                    critical_band_fn=mel.MelMasters.warp_dense,
                                    power_sqr_abs=False,
                                    noise_level=1.0e-4)  #1/10000

    powspec = AF.powspec(frames)  #abs(fft(x))
    logspec = theano.tensor.log10(powspec + AF.noise_level)
    logspec3 = theano.tensor.DimShuffle(logspec.broadcastable,
                                        [0, 'x', 1])(logspec)
    # warp_mat dims: nfilts x nfft
    warp_mat = AF.critical_band_fn(nfft=AF.frame_len / 2,
                                   fft_max_freq=AF.sample_rate / 2,
                                   nfilts=AF.n_bands)
    log_warp_mat = numpy.log10(warp_mat + 1.0e-12)

    sum3 = theano.tensor.add(log_warp_mat, logspec3)
    feature = theano.tensor.max(sum3)
    mpc_segments = audiofeature.segment(feature, n_segments)
    return AF, mpc_segments, n_mpc
Пример #12
0
def mpc_w_max_approx_learnable(frames,
                               n_mpc=32,
                               n_segments=10,
                               use_sparse_warp=False):
    def log_warp_mat_fn(*args, **kwargs):
        m = mel.MelMasters.warp_dense(*args, **kwargs)
        return numpy.log10(m + 1.0e-12)

    AF = audiofeature.AudioFeatures(22050,
                                    1024,
                                    n_bands=n_mpc,
                                    scale_to_int_range=False,
                                    critical_band_fn=log_warp_mat_fn,
                                    power_sqr_abs=False,
                                    noise_level=1.0e-4)  #1/10000

    powspec = AF.powspec(frames)  #abs(fft(x))
    logspec = theano.tensor.log10(powspec + AF.noise_level)
    logspec3 = theano.tensor.DimShuffle(logspec.broadcastable,
                                        [0, 'x', 1])(logspec)
    sum3 = theano.tensor.add(AF.critical_band_warp_dense, logspec3)
    feature = theano.tensor.max(sum3)
    mpc_segments = audiofeature.segment(feature, n_segments)
    return AF, mpc_segments, n_mpc