Exemplo n.º 1
0
    def runit(self, siglen, fmin, fmax, obins, sllen, trlen, real):
        sig = rndsig[:siglen]

        scale = OctScale(fmin, fmax, obins)
        nsgt = NSGT_sliced(scale, fs=44100, sl_len=sllen, tr_area=trlen, real=real)

        c = nsgt.forward((sig,))

        rc = nsgt.backward(c)

        s_r = np.concatenate(list(map(list,rc)))[:len(sig)]
        
        close = np.allclose(sig, s_r, atol=1.e-3)
        if not close:
            print("Failing params:", siglen, fmin, fmax, obins, sllen, trlen, real)
            dev = np.abs(s_r-sig)
            print("Error", np.where(dev>1.e-3), np.max(dev))
        self.assertTrue(close)
Exemplo n.º 2
0
    def runit(self, siglen, fmin, fmax, obins, sllen, trlen, real):
        sig = rndsig[:siglen]

        scale = OctScale(fmin, fmax, obins)
        nsgt = NSGT_sliced(scale, fs=44100, sl_len=sllen, tr_area=trlen, real=real)

        c = nsgt.forward((sig,))

        rc = nsgt.backward(c)

        s_r = np.concatenate(map(list,rc))[:len(sig)]
        
        close = np.allclose(sig, s_r, atol=1.e-3)
        if not close:
            print "Failing params:", siglen, fmin, fmax, obins, sllen, trlen, real
            dev = np.abs(s_r-sig)
            print "Error", np.where(dev>1.e-3), np.max(dev)
        self.assertTrue(close)
Exemplo n.º 3
0
def main():
    parser = ArgumentParser()

    parser.add_argument(
        "--mask",
        type=str,
        default="soft",
        choices=("hard", "soft"),
        help="mask strategy",
    )
    parser.add_argument("--outdir", type=str, default="./", help="output directory")
    parser.add_argument(
        "--stream-size",
        type=int,
        default=1024,
        help="stream size for simulated realtime from wav (default=%(default)s)",
    )
    parser.add_argument("input", type=str, help="input file")

    args = parser.parse_args()

    prefix = args.input.split("/")[-1].split("_")[0]

    harm_out = os.path.join(args.outdir, prefix + "_harmonic.wav")
    perc_out = os.path.join(args.outdir, prefix + "_percussive.wav")
    print("writing files to {0}, {1}".format(harm_out, perc_out))

    lharm = 17
    lperc = 7

    # calculate transform parameters
    nsgt_scale = OctScale(80, 20000, 12)

    trlen = args.stream_size  # transition length
    sllen = 4 * args.stream_size  # slice length

    x, fs = librosa.load(args.input, sr=None)
    xh = numpy.zeros_like(x)
    xp = numpy.zeros_like(x)

    hop = trlen
    chunk_size = hop
    n_chunks = int(numpy.floor(x.shape[0] // hop))

    eps = numpy.finfo(numpy.float32).eps

    slicq = NSGT_sliced(
        nsgt_scale,
        sllen,
        trlen,
        fs,
        real=True,
        matrixform=True,
    )
    total_time = 0.0

    for chunk in range(n_chunks - 1):
        t1 = cputime()

        start = chunk * hop
        end = start + sllen

        s = x[start:end]
        signal = (s,)

        c = slicq.forward(signal)

        c = list(c)
        C = numpy.asarray(c)

        Cmag = numpy.abs(C)
        H = scipy.ndimage.median_filter(Cmag, size=(1, lharm, 1))
        P = scipy.ndimage.median_filter(Cmag, size=(1, 1, lperc))

        if args.mask == "soft":
            # soft mask first
            tot = numpy.power(H, 2.0) + numpy.power(P, 2.0) + eps
            Mp = numpy.divide(numpy.power(H, 2.0), tot)
            Mh = numpy.divide(numpy.power(P, 2.0), tot)
        else:
            Mh = numpy.divide(H, P + eps) > 2.0
            Mp = numpy.divide(P, H + eps) >= 2.0

        Cp = numpy.multiply(Mp, C)
        Ch = numpy.multiply(Mh, C)

        # generator for backward transformation
        outseq_h = slicq.backward(Ch)
        outseq_p = slicq.backward(Cp)

        # make single output array from iterator
        sh_r = next(reblock(outseq_h, len(s), fulllast=False))
        sh_r = sh_r.real

        sp_r = next(reblock(outseq_p, len(s), fulllast=False))
        sp_r = sp_r.real

        xh[start:end] = sh_r
        xp[start:end] = sp_r

        t2 = cputime()
        total_time += t2 - t1

    print("Calculation time per iter: %fs" % (total_time / n_chunks))

    scipy.io.wavfile.write(harm_out, fs, xh)
    scipy.io.wavfile.write(perc_out, fs, xp)

    return 0
Exemplo n.º 4
0
                    matrixform=args.matrixform, reducedform=args.reducedform, 
                    multithreading=args.multithreading
                    )

t1 = cputime()

signal = (s,)

# generator for forward transformation
c = slicq.forward(signal)

# realize transform from generator
c = list(c)

# generator for backward transformation
outseq = slicq.backward(c)

# make single output array from iterator
s_r = next(reblock(outseq, len(s), fulllast=False))
s_r = s_r.real

t2 = cputime()

norm = lambda x: np.sqrt(np.sum(np.abs(np.square(np.abs(x)))))
rec_err = norm(s-s_r)/norm(s)
print("Reconstruction error: %.3e"%rec_err)
print("Calculation time: %.3fs"%(t2-t1))

# Compare the sliced coefficients with non-sliced ones
if False:
    # not implemented yet!
Exemplo n.º 5
0
    t1 = time()
    
    signal = (s,)

    # generator for forward transformation
    c = slicq.forward(signal)

    # realize transform from generator
    c = list(c)
    
#    cl = map(len,c[0])
#    print "c",len(cl),cl
    
    # generator for backward transformation
    outseq = slicq.backward(c)

    # make single output array from iterator
    s_r = reblock(outseq,len(s),fulllast=False).next()
    s_r = s_r.real
    
    t2 = time()

    norm = lambda x: N.sqrt(N.sum(N.abs(N.square(N.abs(x)))))
    rec_err = norm(s-s_r)/norm(s)
    print "Reconstruction error: %.3e"%rec_err
    print "Calculation time: %.3f s"%(t2-t1)

    # Compare the sliced coefficients with non-sliced ones
    if False:
        # not implemented yet!
Exemplo n.º 6
0
        freq_start = freq_idx
        freq_end = C_block.shape[2]
        freq_idx += freq_end

        C_block_ola = torch.squeeze(overlap_add_slicq(
            torch.unsqueeze(C_block, dim=0)),
                                    dim=0)
        C_block_flatten = torch.squeeze(overlap_add_slicq(torch.unsqueeze(
            C_block, dim=0),
                                                          flatten=True),
                                        dim=0)
        print(
            f'\tblock {i}, f {freq_start}-{freq_start+freq_end-1}: {C_block.shape}, {C_block_ola.shape}, {C_block_flatten.shape}'
        )

signal_recon = slicq.backward(c, signal.shape[-1])

print(
    f'recon error (mse): {torch.nn.functional.mse_loss(signal_recon, signal)}')

print(f'comparing 4096 stft for fun')

print(f'signal: {signal.shape}')
S = torch.stft(signal,
               n_fft=4096,
               hop_length=1024,
               return_complex=True,
               center=False).type(torch.complex64)

print(f'stft with 4096/1024: {S.shape}')