示例#1
0
文件: hdf5.py 项目: sevagh/MiXiN
def _compute_hdf5_row(tup):
    spec_in = []
    spec_out = []
    all_ndarray_rows = []

    (mix, ref) = tup

    x_mix, _ = librosa.load(mix, sr=sample_rate, mono=True)
    x_ref, _ = librosa.load(ref, sr=sample_rate, mono=True)
    assert x_mix.shape == x_ref.shape

    all_ndarray_rows = []

    n_samples = x_mix.shape[0]
    n_chunks = int(numpy.ceil(n_samples / chunk_size))
    n_pad = n_chunks * chunk_size - x_mix.shape[0]

    x_mix = numpy.concatenate((x_mix, numpy.zeros(n_pad)))
    x_ref = numpy.concatenate((x_ref, numpy.zeros(n_pad)))

    # calculate transform parameters
    L = chunk_size
    nsgt = NSGT(nsgt_scale, sample_rate, L, real=True, matrixform=True)

    for chunk in range(n_chunks - 1):
        x_mix_chunk = x_mix[chunk * chunk_size:(chunk + 1) * chunk_size]
        x_ref_chunk = x_ref[chunk * chunk_size:(chunk + 1) * chunk_size]

        # forward transform
        cmix = nsgt.forward(x_mix_chunk)
        Cmix = numpy.asarray(cmix)

        Cmagmix = numpy.abs(Cmix)

        cref = nsgt.forward(x_ref_chunk)
        Cref = numpy.asarray(cref)

        Cmagref = numpy.abs(Cref)

        spec_in.append(Cmagmix)
        spec_out.append(Cmagref)

    for spec_pairs in zip(spec_in, spec_out):
        all_ndarray_rows.append(
            numpy.concatenate((spec_pairs[0], spec_pairs[1]), axis=1))

    return all_ndarray_rows
示例#2
0
 def test_oct(self):
     siglen = int(10 ** np.random.uniform(4, 6))
     sig = np.random.random(siglen)
     fmin = np.random.random() * 200 + 20
     fmax = np.random.random() * (22048 - fmin) + fmin
     obins = np.random.randint(24) + 1
     scale = OctScale(fmin, fmax, obins)
     nsgt = NSGT(scale, fs=44100, Ls=len(sig))
     c = nsgt.forward(sig)
     s_r = nsgt.backward(c)
     self.assertTrue(np.allclose(sig, s_r))
示例#3
0
文件: cq_test.py 项目: sevagh/nsgt
 def test_oct(self):
     siglen = int(10**np.random.uniform(4, 6))
     sig = np.random.random(siglen)
     fmin = np.random.random() * 200 + 20
     fmax = np.random.random() * (22048 - fmin) + fmin
     obins = np.random.randint(24) + 1
     scale = OctScale(fmin, fmax, obins)
     nsgt = NSGT(scale, fs=44100, Ls=len(sig))
     c = nsgt.forward(sig)
     s_r = nsgt.backward(c)
     self.assertTrue(np.allclose(sig, s_r, atol=1e-07))
示例#4
0
def getNSGT(X, Fs, resol=24):
    """
    Perform a Nonstationary Gabor Transform implementation of CQT
    :param X: A 1D array of audio samples
    :param Fs: Sample rate
    :param resol: Number of CQT bins per octave
    """
    from nsgt import NSGT, OctScale
    scl = OctScale(50, Fs, resol)
    nsgt = NSGT(scl, Fs, len(X), matrixform=True)
    C = nsgt.forward(X)
    return np.array(C)
示例#5
0
def getiNSGTGriffinLim(C, L, Fs, resol=24, randPhase=False, NIters=20):
    from nsgt import NSGT, OctScale
    scl = OctScale(50, Fs, resol)
    nsgt = NSGT(scl, Fs, L, matrixform=True)
    eps = 2.2204e-16
    if randPhase:
        C = np.exp(
            np.complex(0, 1) * np.random.rand(C.shape[0], C.shape[1])) * C
    A = np.array(C, dtype=np.complex)
    for i in range(NIters):
        print("iNSGT Griffin Lim Iteration %i of %i" % (i + 1, NIters))
        Ai = np.array(nsgt.forward(nsgt.backward(C)))
        A = np.zeros_like(C)
        A[:, 0:Ai.shape[1]] = Ai
        Norm = np.sqrt(A * np.conj(A))
        Norm[Norm < eps] = 1
        A = np.abs(C) * (A / Norm)
    X = nsgt.backward(A)
    return np.real(X)
def gabor(s, args):
    """
    TODO: add default parameters to args"""
    fmin, fmax, real, matrixform, reducedform, rate, l_scale, bins, __time__ = args
    # define parameters for nsgt
    scales = {'log': LogScale, 'lin': LinScale, 'mel': MelScale, 'oct': OctScale}
    scale = scales[l_scale]
    
    # some default parameters
    Ls = len(s)
    if __time__:
        t1 = cputime()
        
    # parameters needed by nsgt  
    with warnings.catch_warnings(record=True) as w:
        scl = scale(fmin, fmax, bins)
        description = 'scl raises UserWarning:'
        userWarning_action(w, description)
        
        nsgt = NSGT(scl, rate, Ls, real, matrixform, reducedform)
        description = 'nsgt raises UserWarning:'
        userWarning_action(w, description)
        warnings.simplefilter("ignore")
        
    # forward transform 
    with warnings.catch_warnings(record=True) as w:
        c = nsgt.forward(s)
        description = 'UserWarning raised during forward transform:'
        userWarning_action(w, description)
        warnings.simplefilter("ignore")   
        
    logger.debug('Gabor transform performed on {} samples.'.format(Ls))
    if __time__:
        t2 = cputime()
        print('Gabor transform performed in {} seconds'.format(t2-t1))    
    return c
示例#7
0
for _ in range(args.time or 1):
    t1 = cputime()

    # calculate transform parameters
    Ls = len(s)

    nsgt = NSGT(scl,
                fs,
                Ls,
                real=args.real,
                matrixform=args.matrixform,
                reducedform=args.reducedform)

    # forward transform
    c = nsgt.forward(s)

    #        c = N.array(c)
    #        print "c",len(c),N.array(map(len,c))

    # inverse transform
    s_r = nsgt.backward(c)

    t2 = cputime()
    times.append(t2 - t1)

norm = lambda x: np.sqrt(np.sum(np.abs(np.square(x))))
rec_err = norm(s - s_r) / norm(s)
print("Reconstruction error: %.3e" % rec_err)
print("Calculation time: %.3f±%.3fs (min=%.3f s)" %
      (np.mean(times), np.std(times) / 2, np.min(times)))
示例#8
0
        parser.error('scale unknown')

    scl = scale(options.fmin,options.fmax,options.bins)

    times = []

    for _ in xrange(options.time or 1):
        t1 = cputime()
        
        # calculate transform parameters
        Ls = len(s)
        
        nsgt = NSGT(scl,fs,Ls,real=options.real,matrixform=options.matrixform,reducedform=options.reducedform)
        
        # forward transform 
        c = nsgt.forward(s)

#        c = N.array(c)
#        print "c",len(c),N.array(map(len,c))
    
        # inverse transform 
        s_r = nsgt.backward(c)
 
        t2 = cputime()
        times.append(t2-t1)

    norm = lambda x: N.sqrt(N.sum(N.abs(N.square(x))))
    rec_err = norm(s-s_r)/norm(s)
    print "Reconstruction error: %.3e"%rec_err
    print "Calculation time: %.3f +- %.3f s (min=%.3f s)"%(N.mean(times),N.std(times)/2,N.min(times))
示例#9
0
def xtract_mixin(x,
                 instrumental=False,
                 single_model=False,
                 pretrained_model_dir=None):
    if pretrained_model_dir is None:
        p_model = components["percussive"]["model_file"]
        h_model = components["harmonic"]["model_file"]
        v_model = components["vocal"]["model_file"]
    else:
        p_model = os.path.join(pretrained_model_dir, "model_percussive.h5")
        h_model = os.path.join(pretrained_model_dir, "model_harmonic.h5")
        v_model = os.path.join(pretrained_model_dir, "model_vocal.h5")

    print("Loading models from:\n\t{0}\n\t{1}\n\t{2}".format(
        h_model, p_model, v_model))
    percussive_model = Model(p_model).model
    harmonic_model = Model(h_model).model
    vocal_model = Model(v_model).model

    n_samples = x.shape[0]
    n_chunks = int(numpy.ceil(n_samples / chunk_size))
    n_pad = n_chunks * chunk_size - x.shape[0]

    x = numpy.concatenate((x, numpy.zeros(n_pad)))
    x_out_h = numpy.zeros_like(x)
    x_out_p = numpy.zeros_like(x)
    x_out_v = numpy.zeros_like(x)

    # calculate transform parameters
    L = chunk_size
    nsgt = NSGT(nsgt_scale, sample_rate, L, real=True, matrixform=True)

    for chunk in range(n_chunks - 1):
        s = x[chunk * chunk_size:(chunk + 1) * chunk_size]

        # forward transform
        c = nsgt.forward(s)
        C = numpy.asarray(c)

        Cmag_orig, Cphase_orig = librosa.magphase(C)
        Cmag_for_nn = numpy.reshape(Cmag_orig, (1, dim_1, dim_2, 1))

        # inference from model
        Cmag_p = percussive_model.predict(Cmag_for_nn)
        Cmag_p = numpy.reshape(Cmag_p, (dim_1, dim_2))

        Cmag_h = harmonic_model.predict(Cmag_for_nn)
        Cmag_h = numpy.reshape(Cmag_h, (dim_1, dim_2))

        Cmag_v = numpy.zeros_like(Cmag_h)
        if not instrumental:
            Cmag_v = vocal_model.predict(Cmag_for_nn)
            Cmag_v = numpy.reshape(Cmag_v, (dim_1, dim_2))

        if single_model:
            Ch_desired = _pol2cart(Cmag_h, Cphase_orig)
            Cp_desired = _pol2cart(Cmag_p, Cphase_orig)

            if not instrumental:
                Cv_desired = _pol2cart(Cmag_v, Cphase_orig)
        else:
            # soft mask first
            Mp = numpy.ones_like(Cmag_orig)
            Mh = numpy.ones_like(Cmag_orig)
            Mv = numpy.ones_like(Cmag_orig)

            tot = (numpy.power(Cmag_p, 2.0) + numpy.power(Cmag_h, 2.0) +
                   numpy.power(Cmag_v, 2.0) + K.epsilon())
            Mp = numpy.divide(numpy.power(Cmag_p, 2.0), tot)
            Mh = numpy.divide(numpy.power(Cmag_h, 2.0), tot)
            Mv = numpy.divide(numpy.power(Cmag_v, 2.0), tot)

            Cp_desired = numpy.multiply(Mp, C)
            Ch_desired = numpy.multiply(Mh, C)
            Cv_desired = numpy.multiply(Mv, C)

        # inverse transform
        s_p = nsgt.backward(Cp_desired)
        s_h = nsgt.backward(Ch_desired)

        s_v = numpy.zeros_like(s_h)
        if not instrumental:
            s_v = nsgt.backward(Cv_desired)

        x_out_p[chunk * chunk_size:(chunk + 1) * chunk_size] = s_p
        x_out_v[chunk * chunk_size:(chunk + 1) * chunk_size] = s_v
        x_out_h[chunk * chunk_size:(chunk + 1) * chunk_size] = s_h

    # strip off padding
    if n_pad > 0:
        x_out_p = x_out_p[:-n_pad]
        x_out_h = x_out_h[:-n_pad]
        x_out_v = x_out_v[:-n_pad]

    x_out_h = x_out_h.astype(numpy.float32)
    x_out_p = x_out_p.astype(numpy.float32)
    x_out_v = x_out_v.astype(numpy.float32)

    return x_out_h, x_out_p, x_out_v