示例#1
0
文件: cq_test.py 项目: sevagh/nsgt
 def test_oct(self):
     siglen = int(10**np.random.uniform(4, 6))
     sig = np.random.random(siglen)
     fmin = np.random.random() * 200 + 20
     fmax = np.random.random() * (22048 - fmin) + fmin
     obins = np.random.randint(24) + 1
     scale = OctScale(fmin, fmax, obins)
     nsgt = NSGT(scale, fs=44100, Ls=len(sig))
     c = nsgt.forward(sig)
     s_r = nsgt.backward(c)
     self.assertTrue(np.allclose(sig, s_r, atol=1e-07))
示例#2
0
 def test_oct(self):
     siglen = int(10 ** np.random.uniform(4, 6))
     sig = np.random.random(siglen)
     fmin = np.random.random() * 200 + 20
     fmax = np.random.random() * (22048 - fmin) + fmin
     obins = np.random.randint(24) + 1
     scale = OctScale(fmin, fmax, obins)
     nsgt = NSGT(scale, fs=44100, Ls=len(sig))
     c = nsgt.forward(sig)
     s_r = nsgt.backward(c)
     self.assertTrue(np.allclose(sig, s_r))
示例#3
0
def getiNSGT(C, L, Fs, resol=24):
    """
    Perform an inverse Nonstationary Gabor Transform
    :param C: An NBinsxNFrames CQT array
    :param L: Number of samples in audio file
    :param Fs: Sample rate
    :param resol: Number of CQT bins per octave
    """
    from nsgt import NSGT, OctScale
    scl = OctScale(50, Fs, resol)
    nsgt = NSGT(scl, Fs, L, matrixform=True)
    return nsgt.backward(C)
示例#4
0
def getNSGT(X, Fs, resol=24):
    """
    Perform a Nonstationary Gabor Transform implementation of CQT
    :param X: A 1D array of audio samples
    :param Fs: Sample rate
    :param resol: Number of CQT bins per octave
    """
    from nsgt import NSGT, OctScale
    scl = OctScale(50, Fs, resol)
    nsgt = NSGT(scl, Fs, len(X), matrixform=True)
    C = nsgt.forward(X)
    return np.array(C)
示例#5
0
文件: hdf5.py 项目: sevagh/MiXiN
def _compute_hdf5_row(tup):
    spec_in = []
    spec_out = []
    all_ndarray_rows = []

    (mix, ref) = tup

    x_mix, _ = librosa.load(mix, sr=sample_rate, mono=True)
    x_ref, _ = librosa.load(ref, sr=sample_rate, mono=True)
    assert x_mix.shape == x_ref.shape

    all_ndarray_rows = []

    n_samples = x_mix.shape[0]
    n_chunks = int(numpy.ceil(n_samples / chunk_size))
    n_pad = n_chunks * chunk_size - x_mix.shape[0]

    x_mix = numpy.concatenate((x_mix, numpy.zeros(n_pad)))
    x_ref = numpy.concatenate((x_ref, numpy.zeros(n_pad)))

    # calculate transform parameters
    L = chunk_size
    nsgt = NSGT(nsgt_scale, sample_rate, L, real=True, matrixform=True)

    for chunk in range(n_chunks - 1):
        x_mix_chunk = x_mix[chunk * chunk_size:(chunk + 1) * chunk_size]
        x_ref_chunk = x_ref[chunk * chunk_size:(chunk + 1) * chunk_size]

        # forward transform
        cmix = nsgt.forward(x_mix_chunk)
        Cmix = numpy.asarray(cmix)

        Cmagmix = numpy.abs(Cmix)

        cref = nsgt.forward(x_ref_chunk)
        Cref = numpy.asarray(cref)

        Cmagref = numpy.abs(Cref)

        spec_in.append(Cmagmix)
        spec_out.append(Cmagref)

    for spec_pairs in zip(spec_in, spec_out):
        all_ndarray_rows.append(
            numpy.concatenate((spec_pairs[0], spec_pairs[1]), axis=1))

    return all_ndarray_rows
示例#6
0
def getiNSGTGriffinLim(C, L, Fs, resol=24, randPhase=False, NIters=20):
    from nsgt import NSGT, OctScale
    scl = OctScale(50, Fs, resol)
    nsgt = NSGT(scl, Fs, L, matrixform=True)
    eps = 2.2204e-16
    if randPhase:
        C = np.exp(
            np.complex(0, 1) * np.random.rand(C.shape[0], C.shape[1])) * C
    A = np.array(C, dtype=np.complex)
    for i in range(NIters):
        print("iNSGT Griffin Lim Iteration %i of %i" % (i + 1, NIters))
        Ai = np.array(nsgt.forward(nsgt.backward(C)))
        A = np.zeros_like(C)
        A[:, 0:Ai.shape[1]] = Ai
        Norm = np.sqrt(A * np.conj(A))
        Norm[Norm < eps] = 1
        A = np.abs(C) * (A / Norm)
    X = nsgt.backward(A)
    return np.real(X)
def gabor(s, args):
    """
    TODO: add default parameters to args"""
    fmin, fmax, real, matrixform, reducedform, rate, l_scale, bins, __time__ = args
    # define parameters for nsgt
    scales = {'log': LogScale, 'lin': LinScale, 'mel': MelScale, 'oct': OctScale}
    scale = scales[l_scale]
    
    # some default parameters
    Ls = len(s)
    if __time__:
        t1 = cputime()
        
    # parameters needed by nsgt  
    with warnings.catch_warnings(record=True) as w:
        scl = scale(fmin, fmax, bins)
        description = 'scl raises UserWarning:'
        userWarning_action(w, description)
        
        nsgt = NSGT(scl, rate, Ls, real, matrixform, reducedform)
        description = 'nsgt raises UserWarning:'
        userWarning_action(w, description)
        warnings.simplefilter("ignore")
        
    # forward transform 
    with warnings.catch_warnings(record=True) as w:
        c = nsgt.forward(s)
        description = 'UserWarning raised during forward transform:'
        userWarning_action(w, description)
        warnings.simplefilter("ignore")   
        
    logger.debug('Gabor transform performed on {} samples.'.format(Ls))
    if __time__:
        t2 = cputime()
        print('Gabor transform performed in {} seconds'.format(t2-t1))    
    return c
示例#8
0
    def build_cqt_nsgt_pipeline(self):
        from nsgt import NSGT, LogScale, LinScale, MelScale, OctScale

        print("")
        print("Configuring cqt_NSGT pipeline...")

        scales = {
            'log': LogScale,
            'lin': LinScale,
            'mel': MelScale,
            'oct': OctScale
        }
        nsgt_scale = scales[getattr(self, 'nsgt_scale', 'log')]
        nsgt_scale = nsgt_scale(getattr(self, 'fmin', 20),
                                getattr(self, 'fmax', self.sample_rate / 2),
                                getattr(self, 'n_bins', 96))
        nsgt = NSGT(nsgt_scale,
                    self.sample_rate,
                    self.audio_length,
                    real=getattr(self, 'real', False),
                    matrixform=getattr(self, 'matrix_form', True),
                    reducedform=getattr(self, 'reduced_form', False))
        self.n_bins = len(nsgt.wins)
        self.n_frames = nsgt.ncoefs

        self.output_shape = (2, int(self.n_bins / 2), int(self.n_frames))

        self._add_audio_loader()
        self._add_signal_zeropadding()
        self._add_fade_out()
        self._add_norm()
        self.pre_pipeline.extend([lambda x: x.reshape(-1, ), nsgt.forward])
        self.post_pipeline.insert(0, nsgt.backward)
        self._add_mag_phase()
        self._add_log_mag()
        self._add_ifreq()
        # Add folded cqt
        if getattr(self, 'fold_cqt', False):
            self.pre_pipeline.append(fold_cqt)
            self.post_pipeline.insert(0, unfold_cqt)
            self.output_shape = (4, int(self.n_bins / 2), int(self.n_frames))
示例#9
0
    trlen = args.trlen

if not args.nonsliced:
    slicq = NSGT_sliced(scl,
                        sllen,
                        trlen,
                        fs,
                        real=True,
                        matrixform=True,
                        multichannel=True,
                        device="cpu")
else:
    slicq = NSGT(scl,
                 fs,
                 signal.shape[-1],
                 real=True,
                 matrixform=True,
                 multichannel=True,
                 device="cpu")

# total number of coefficients to represent input signal
#ncoefs = int(sf.frames*slicq.coef_factor)

# generator for forward transformation
if args.nonsliced:
    c = slicq.forward(signal)
else:
    c = slicq.forward((signal, ))

# add a batch
c = torch.unsqueeze(c, dim=0)
示例#10
0
except KeyError:
    parser.error('scale unknown')

scl = scale(args.fmin, args.fmax, args.bins)

times = []

for _ in range(args.time or 1):
    t1 = cputime()

    # calculate transform parameters
    Ls = len(s)

    nsgt = NSGT(scl,
                fs,
                Ls,
                real=args.real,
                matrixform=args.matrixform,
                reducedform=args.reducedform)

    # forward transform
    c = nsgt.forward(s)

    #        c = N.array(c)
    #        print "c",len(c),N.array(map(len,c))

    # inverse transform
    s_r = nsgt.backward(c)

    t2 = cputime()
    times.append(t2 - t1)
示例#11
0
    try:
        scale = scales[options.scale]
    except KeyError:
        parser.error('scale unknown')

    scl = scale(options.fmin,options.fmax,options.bins)

    times = []

    for _ in xrange(options.time or 1):
        t1 = cputime()
        
        # calculate transform parameters
        Ls = len(s)
        
        nsgt = NSGT(scl,fs,Ls,real=options.real,matrixform=options.matrixform,reducedform=options.reducedform)
        
        # forward transform 
        c = nsgt.forward(s)

#        c = N.array(c)
#        print "c",len(c),N.array(map(len,c))
    
        # inverse transform 
        s_r = nsgt.backward(c)
 
        t2 = cputime()
        times.append(t2-t1)

    norm = lambda x: N.sqrt(N.sum(N.abs(N.square(x))))
    rec_err = norm(s-s_r)/norm(s)
示例#12
0
def xtract_mixin(x,
                 instrumental=False,
                 single_model=False,
                 pretrained_model_dir=None):
    if pretrained_model_dir is None:
        p_model = components["percussive"]["model_file"]
        h_model = components["harmonic"]["model_file"]
        v_model = components["vocal"]["model_file"]
    else:
        p_model = os.path.join(pretrained_model_dir, "model_percussive.h5")
        h_model = os.path.join(pretrained_model_dir, "model_harmonic.h5")
        v_model = os.path.join(pretrained_model_dir, "model_vocal.h5")

    print("Loading models from:\n\t{0}\n\t{1}\n\t{2}".format(
        h_model, p_model, v_model))
    percussive_model = Model(p_model).model
    harmonic_model = Model(h_model).model
    vocal_model = Model(v_model).model

    n_samples = x.shape[0]
    n_chunks = int(numpy.ceil(n_samples / chunk_size))
    n_pad = n_chunks * chunk_size - x.shape[0]

    x = numpy.concatenate((x, numpy.zeros(n_pad)))
    x_out_h = numpy.zeros_like(x)
    x_out_p = numpy.zeros_like(x)
    x_out_v = numpy.zeros_like(x)

    # calculate transform parameters
    L = chunk_size
    nsgt = NSGT(nsgt_scale, sample_rate, L, real=True, matrixform=True)

    for chunk in range(n_chunks - 1):
        s = x[chunk * chunk_size:(chunk + 1) * chunk_size]

        # forward transform
        c = nsgt.forward(s)
        C = numpy.asarray(c)

        Cmag_orig, Cphase_orig = librosa.magphase(C)
        Cmag_for_nn = numpy.reshape(Cmag_orig, (1, dim_1, dim_2, 1))

        # inference from model
        Cmag_p = percussive_model.predict(Cmag_for_nn)
        Cmag_p = numpy.reshape(Cmag_p, (dim_1, dim_2))

        Cmag_h = harmonic_model.predict(Cmag_for_nn)
        Cmag_h = numpy.reshape(Cmag_h, (dim_1, dim_2))

        Cmag_v = numpy.zeros_like(Cmag_h)
        if not instrumental:
            Cmag_v = vocal_model.predict(Cmag_for_nn)
            Cmag_v = numpy.reshape(Cmag_v, (dim_1, dim_2))

        if single_model:
            Ch_desired = _pol2cart(Cmag_h, Cphase_orig)
            Cp_desired = _pol2cart(Cmag_p, Cphase_orig)

            if not instrumental:
                Cv_desired = _pol2cart(Cmag_v, Cphase_orig)
        else:
            # soft mask first
            Mp = numpy.ones_like(Cmag_orig)
            Mh = numpy.ones_like(Cmag_orig)
            Mv = numpy.ones_like(Cmag_orig)

            tot = (numpy.power(Cmag_p, 2.0) + numpy.power(Cmag_h, 2.0) +
                   numpy.power(Cmag_v, 2.0) + K.epsilon())
            Mp = numpy.divide(numpy.power(Cmag_p, 2.0), tot)
            Mh = numpy.divide(numpy.power(Cmag_h, 2.0), tot)
            Mv = numpy.divide(numpy.power(Cmag_v, 2.0), tot)

            Cp_desired = numpy.multiply(Mp, C)
            Ch_desired = numpy.multiply(Mh, C)
            Cv_desired = numpy.multiply(Mv, C)

        # inverse transform
        s_p = nsgt.backward(Cp_desired)
        s_h = nsgt.backward(Ch_desired)

        s_v = numpy.zeros_like(s_h)
        if not instrumental:
            s_v = nsgt.backward(Cv_desired)

        x_out_p[chunk * chunk_size:(chunk + 1) * chunk_size] = s_p
        x_out_v[chunk * chunk_size:(chunk + 1) * chunk_size] = s_v
        x_out_h[chunk * chunk_size:(chunk + 1) * chunk_size] = s_h

    # strip off padding
    if n_pad > 0:
        x_out_p = x_out_p[:-n_pad]
        x_out_h = x_out_h[:-n_pad]
        x_out_v = x_out_v[:-n_pad]

    x_out_h = x_out_h.astype(numpy.float32)
    x_out_p = x_out_p.astype(numpy.float32)
    x_out_v = x_out_v.astype(numpy.float32)

    return x_out_h, x_out_p, x_out_v