示例#1
0
    def runit(self, siglen, fmin, fmax, obins, sllen, trlen, real):
        sig = rndsig[:siglen]

        scale = OctScale(fmin, fmax, obins)
        nsgt = NSGT_sliced(scale, fs=44100, sl_len=sllen, tr_area=trlen, real=real)

        c = nsgt.forward((sig,))

        rc = nsgt.backward(c)

        s_r = np.concatenate(map(list,rc))[:len(sig)]
        
        close = np.allclose(sig, s_r, atol=1.e-3)
        if not close:
            print "Failing params:", siglen, fmin, fmax, obins, sllen, trlen, real
            dev = np.abs(s_r-sig)
            print "Error", np.where(dev>1.e-3), np.max(dev)
        self.assertTrue(close)
示例#2
0
    def runit(self, siglen, fmin, fmax, obins, sllen, trlen, real):
        sig = rndsig[:siglen]

        scale = OctScale(fmin, fmax, obins)
        nsgt = NSGT_sliced(scale, fs=44100, sl_len=sllen, tr_area=trlen, real=real)

        c = nsgt.forward((sig,))

        rc = nsgt.backward(c)

        s_r = np.concatenate(list(map(list,rc)))[:len(sig)]
        
        close = np.allclose(sig, s_r, atol=1.e-3)
        if not close:
            print("Failing params:", siglen, fmin, fmax, obins, sllen, trlen, real)
            dev = np.abs(s_r-sig)
            print("Error", np.where(dev>1.e-3), np.max(dev))
        self.assertTrue(close)
示例#3
0
    parser.error("Input file '%s' not found"%args.input)

fs = args.sr

# build transform
scales = {'log':LogScale, 'lin':LinScale, 'mel':MelScale, 'oct':OctScale}
try:
    scale = scales[args.scale]
except KeyError:
    parser.error('Scale unknown (--scale option)')

scl = scale(args.fmin, args.fmax, args.bins, beyond=int(args.reducedform == 2))

slicq = NSGT_sliced(scl, args.sllen, args.trlen, fs, 
                    real=args.real, recwnd=args.recwnd, 
                    matrixform=args.matrixform, reducedform=args.reducedform, 
                    multithreading=args.multithreading,
                    multichannel=True
                    )

# Read audio data
sf = SndReader(args.input, sr=fs, chns=2)
signal = sf()

# duration of signal in s
dur = sf.frames/float(fs)

# total number of coefficients to represent input signal
ncoefs = int(sf.frames*slicq.coef_factor)

# read slices from audio file and mix down signal, if necessary at all
if not args.downmix_after:
示例#4
0
fs = args.sr

# build transform
scales = {'log': LogScale, 'lin': LinScale, 'mel': MelScale, 'oct': OctScale}
try:
    scale = scales[args.scale]
except KeyError:
    parser.error('Scale unknown (--scale option)')

scl = scale(args.fmin, args.fmax, args.bins, beyond=int(args.reducedform == 2))

slicq = NSGT_sliced(scl,
                    args.sllen,
                    args.trlen,
                    fs,
                    real=args.real,
                    recwnd=args.recwnd,
                    matrixform=args.matrixform,
                    reducedform=args.reducedform,
                    multithreading=args.multithreading,
                    multichannel=True)

# Read audio data
sf = SndReader(args.input, sr=fs, chns=2)
signal = sf()

# duration of signal in s
dur = sf.frames / float(fs)

# total number of coefficients to represent input signal
ncoefs = int(sf.frames * slicq.coef_factor)
示例#5
0
signal = torch.cat(signal, dim=-1)

# duration of signal in s
dur = sf.frames / float(fs)

if args.sllen is None:
    sllen, trlen = scl.suggested_sllen_trlen(fs)
else:
    sllen = args.sllen
    trlen = args.trlen

if not args.nonsliced:
    slicq = NSGT_sliced(scl,
                        sllen,
                        trlen,
                        fs,
                        real=True,
                        matrixform=True,
                        multichannel=True,
                        device="cpu")
else:
    slicq = NSGT(scl,
                 fs,
                 signal.shape[-1],
                 real=True,
                 matrixform=True,
                 multichannel=True,
                 device="cpu")

# total number of coefficients to represent input signal
#ncoefs = int(sf.frames*slicq.coef_factor)
示例#6
0
def main():
    parser = ArgumentParser()

    parser.add_argument(
        "--mask",
        type=str,
        default="soft",
        choices=("hard", "soft"),
        help="mask strategy",
    )
    parser.add_argument("--outdir", type=str, default="./", help="output directory")
    parser.add_argument(
        "--stream-size",
        type=int,
        default=1024,
        help="stream size for simulated realtime from wav (default=%(default)s)",
    )
    parser.add_argument("input", type=str, help="input file")

    args = parser.parse_args()

    prefix = args.input.split("/")[-1].split("_")[0]

    harm_out = os.path.join(args.outdir, prefix + "_harmonic.wav")
    perc_out = os.path.join(args.outdir, prefix + "_percussive.wav")
    print("writing files to {0}, {1}".format(harm_out, perc_out))

    lharm = 17
    lperc = 7

    # calculate transform parameters
    nsgt_scale = OctScale(80, 20000, 12)

    trlen = args.stream_size  # transition length
    sllen = 4 * args.stream_size  # slice length

    x, fs = librosa.load(args.input, sr=None)
    xh = numpy.zeros_like(x)
    xp = numpy.zeros_like(x)

    hop = trlen
    chunk_size = hop
    n_chunks = int(numpy.floor(x.shape[0] // hop))

    eps = numpy.finfo(numpy.float32).eps

    slicq = NSGT_sliced(
        nsgt_scale,
        sllen,
        trlen,
        fs,
        real=True,
        matrixform=True,
    )
    total_time = 0.0

    for chunk in range(n_chunks - 1):
        t1 = cputime()

        start = chunk * hop
        end = start + sllen

        s = x[start:end]
        signal = (s,)

        c = slicq.forward(signal)

        c = list(c)
        C = numpy.asarray(c)

        Cmag = numpy.abs(C)
        H = scipy.ndimage.median_filter(Cmag, size=(1, lharm, 1))
        P = scipy.ndimage.median_filter(Cmag, size=(1, 1, lperc))

        if args.mask == "soft":
            # soft mask first
            tot = numpy.power(H, 2.0) + numpy.power(P, 2.0) + eps
            Mp = numpy.divide(numpy.power(H, 2.0), tot)
            Mh = numpy.divide(numpy.power(P, 2.0), tot)
        else:
            Mh = numpy.divide(H, P + eps) > 2.0
            Mp = numpy.divide(P, H + eps) >= 2.0

        Cp = numpy.multiply(Mp, C)
        Ch = numpy.multiply(Mh, C)

        # generator for backward transformation
        outseq_h = slicq.backward(Ch)
        outseq_p = slicq.backward(Cp)

        # make single output array from iterator
        sh_r = next(reblock(outseq_h, len(s), fulllast=False))
        sh_r = sh_r.real

        sp_r = next(reblock(outseq_p, len(s), fulllast=False))
        sp_r = sp_r.real

        xh[start:end] = sh_r
        xp[start:end] = sp_r

        t2 = cputime()
        total_time += t2 - t1

    print("Calculation time per iter: %fs" % (total_time / n_chunks))

    scipy.io.wavfile.write(harm_out, fs, xh)
    scipy.io.wavfile.write(perc_out, fs, xp)

    return 0
示例#7
0
if sf.channels > 1: 
    s = np.mean(s, axis=1)
    
if args.length:
    s = s[:args.length]

scales = {'log':LogScale,'lin':LinScale,'mel':MelScale,'oct':OctScale}
try:
    scale = scales[args.scale]
except KeyError:
    parser.error('scale unknown')

scl = scale(args.fmin, args.fmax, args.bins)
slicq = NSGT_sliced(scl, args.sllen, args.trlen, fs, 
                    real=args.real, recwnd=args.recwnd, 
                    matrixform=args.matrixform, reducedform=args.reducedform, 
                    multithreading=args.multithreading
                    )

t1 = cputime()

signal = (s,)

# generator for forward transformation
c = slicq.forward(signal)

# realize transform from generator
c = list(c)

# generator for backward transformation
outseq = slicq.backward(c)
示例#8
0
    fs = sf.samplerate
    s = sf.read_frames(sf.nframes)
    if sf.channels > 1: 
        s = N.mean(s,axis=1)
        
    if options.length:
        s = s[:options.length]

    scales = {'log':LogScale,'lin':LinScale,'mel':MelScale,'oct':OctScale}
    try:
        scale = scales[options.scale]
    except KeyError:
        parser.error('scale unknown')

    scl = scale(options.fmin,options.fmax,options.bins)
    slicq = NSGT_sliced(scl,options.sl_len,options.tr_area,fs,real=options.real,recwnd=options.recwnd,matrixform=options.matrixform,reducedform=options.reducedform)

    t1 = time()
    
    signal = (s,)

    # generator for forward transformation
    c = slicq.forward(signal)

    # realize transform from generator
    c = list(c)
    
#    cl = map(len,c[0])
#    print "c",len(cl),cl
    
    # generator for backward transformation
示例#9
0
    scales = {
        'log': LogScale,
        'lin': LinScale,
        'mel': MelScale,
        'oct': OctScale
    }
    try:
        scale = scales[options.scale]
    except KeyError:
        parser.error('scale unknown')

    scl = scale(options.fmin, options.fmax, options.bins)
    slicq = NSGT_sliced(scl,
                        options.sl_len,
                        options.tr_area,
                        fs,
                        real=options.real,
                        recwnd=options.recwnd,
                        matrixform=options.matrixform,
                        reducedform=options.reducedform)

    t1 = time()

    signal = (s, )

    # generator for forward transformation
    c = slicq.forward(signal)

    # realize transform from generator
    c = list(c)

    #    cl = map(len,c[0])
示例#10
0
except KeyError:
    parser.error('Scale unknown (--scale option)')

scl = scale(args.fmin, args.fmax, args.bins)

if args.sllen is None:
    sllen, trlen = scl.suggested_sllen_trlen(fs)
else:
    sllen = args.sllen
    trlen = args.trlen

print(f'sllen: {sllen}, trlen: {trlen}')
slicq = NSGT_sliced(scl,
                    sllen,
                    trlen,
                    fs,
                    real=True,
                    matrixform=args.matrixform,
                    multichannel=True,
                    device="cpu")

# Read audio data
sf = SndReader(args.input, sr=fs, chns=2)
signal = sf()

signal = [torch.tensor(sig) for sig in signal]

#pad = signal[0].shape[-1]-signal[-1].shape[-1]
#signal[-1] = torch.nn.functional.pad(signal[-1], (0, pad), mode='constant', value=0)
signal = torch.cat(signal, dim=-1)

# duration of signal in s
示例#11
0
    slicq = NSGT_sliced_old(scl,
                            sllen,
                            trlen,
                            fs,
                            real=True,
                            matrixform=args.matrixform,
                            multithreading=args.multithreading,
                            multichannel=True)

    # read slices from audio file and mix down signal, if necessary at all
    signal = ((np.mean(s, axis=0), ) for s in signal_orig)
else:
    slicq = NSGT_sliced(scl,
                        sllen,
                        trlen,
                        fs,
                        real=True,
                        matrixform=args.matrixform,
                        multichannel=True,
                        device=args.torch_device)

    signal = [
        torch.tensor(sig, device=args.torch_device) for sig in signal_orig
    ]

    pad = signal[0].shape[-1] - signal[-1].shape[-1]
    signal[-1] = torch.nn.functional.pad(signal[-1], (0, pad),
                                         mode='constant',
                                         value=0)
    signal = torch.cat(signal, dim=-1)

tot = 0.