def runit(self, siglen, fmin, fmax, obins, sllen, trlen, real): sig = rndsig[:siglen] scale = OctScale(fmin, fmax, obins) nsgt = NSGT_sliced(scale, fs=44100, sl_len=sllen, tr_area=trlen, real=real) c = nsgt.forward((sig,)) rc = nsgt.backward(c) s_r = np.concatenate(map(list,rc))[:len(sig)] close = np.allclose(sig, s_r, atol=1.e-3) if not close: print "Failing params:", siglen, fmin, fmax, obins, sllen, trlen, real dev = np.abs(s_r-sig) print "Error", np.where(dev>1.e-3), np.max(dev) self.assertTrue(close)
def runit(self, siglen, fmin, fmax, obins, sllen, trlen, real): sig = rndsig[:siglen] scale = OctScale(fmin, fmax, obins) nsgt = NSGT_sliced(scale, fs=44100, sl_len=sllen, tr_area=trlen, real=real) c = nsgt.forward((sig,)) rc = nsgt.backward(c) s_r = np.concatenate(list(map(list,rc)))[:len(sig)] close = np.allclose(sig, s_r, atol=1.e-3) if not close: print("Failing params:", siglen, fmin, fmax, obins, sllen, trlen, real) dev = np.abs(s_r-sig) print("Error", np.where(dev>1.e-3), np.max(dev)) self.assertTrue(close)
parser.error("Input file '%s' not found"%args.input) fs = args.sr # build transform scales = {'log':LogScale, 'lin':LinScale, 'mel':MelScale, 'oct':OctScale} try: scale = scales[args.scale] except KeyError: parser.error('Scale unknown (--scale option)') scl = scale(args.fmin, args.fmax, args.bins, beyond=int(args.reducedform == 2)) slicq = NSGT_sliced(scl, args.sllen, args.trlen, fs, real=args.real, recwnd=args.recwnd, matrixform=args.matrixform, reducedform=args.reducedform, multithreading=args.multithreading, multichannel=True ) # Read audio data sf = SndReader(args.input, sr=fs, chns=2) signal = sf() # duration of signal in s dur = sf.frames/float(fs) # total number of coefficients to represent input signal ncoefs = int(sf.frames*slicq.coef_factor) # read slices from audio file and mix down signal, if necessary at all if not args.downmix_after:
fs = args.sr # build transform scales = {'log': LogScale, 'lin': LinScale, 'mel': MelScale, 'oct': OctScale} try: scale = scales[args.scale] except KeyError: parser.error('Scale unknown (--scale option)') scl = scale(args.fmin, args.fmax, args.bins, beyond=int(args.reducedform == 2)) slicq = NSGT_sliced(scl, args.sllen, args.trlen, fs, real=args.real, recwnd=args.recwnd, matrixform=args.matrixform, reducedform=args.reducedform, multithreading=args.multithreading, multichannel=True) # Read audio data sf = SndReader(args.input, sr=fs, chns=2) signal = sf() # duration of signal in s dur = sf.frames / float(fs) # total number of coefficients to represent input signal ncoefs = int(sf.frames * slicq.coef_factor)
signal = torch.cat(signal, dim=-1) # duration of signal in s dur = sf.frames / float(fs) if args.sllen is None: sllen, trlen = scl.suggested_sllen_trlen(fs) else: sllen = args.sllen trlen = args.trlen if not args.nonsliced: slicq = NSGT_sliced(scl, sllen, trlen, fs, real=True, matrixform=True, multichannel=True, device="cpu") else: slicq = NSGT(scl, fs, signal.shape[-1], real=True, matrixform=True, multichannel=True, device="cpu") # total number of coefficients to represent input signal #ncoefs = int(sf.frames*slicq.coef_factor)
def main(): parser = ArgumentParser() parser.add_argument( "--mask", type=str, default="soft", choices=("hard", "soft"), help="mask strategy", ) parser.add_argument("--outdir", type=str, default="./", help="output directory") parser.add_argument( "--stream-size", type=int, default=1024, help="stream size for simulated realtime from wav (default=%(default)s)", ) parser.add_argument("input", type=str, help="input file") args = parser.parse_args() prefix = args.input.split("/")[-1].split("_")[0] harm_out = os.path.join(args.outdir, prefix + "_harmonic.wav") perc_out = os.path.join(args.outdir, prefix + "_percussive.wav") print("writing files to {0}, {1}".format(harm_out, perc_out)) lharm = 17 lperc = 7 # calculate transform parameters nsgt_scale = OctScale(80, 20000, 12) trlen = args.stream_size # transition length sllen = 4 * args.stream_size # slice length x, fs = librosa.load(args.input, sr=None) xh = numpy.zeros_like(x) xp = numpy.zeros_like(x) hop = trlen chunk_size = hop n_chunks = int(numpy.floor(x.shape[0] // hop)) eps = numpy.finfo(numpy.float32).eps slicq = NSGT_sliced( nsgt_scale, sllen, trlen, fs, real=True, matrixform=True, ) total_time = 0.0 for chunk in range(n_chunks - 1): t1 = cputime() start = chunk * hop end = start + sllen s = x[start:end] signal = (s,) c = slicq.forward(signal) c = list(c) C = numpy.asarray(c) Cmag = numpy.abs(C) H = scipy.ndimage.median_filter(Cmag, size=(1, lharm, 1)) P = scipy.ndimage.median_filter(Cmag, size=(1, 1, lperc)) if args.mask == "soft": # soft mask first tot = numpy.power(H, 2.0) + numpy.power(P, 2.0) + eps Mp = numpy.divide(numpy.power(H, 2.0), tot) Mh = numpy.divide(numpy.power(P, 2.0), tot) else: Mh = numpy.divide(H, P + eps) > 2.0 Mp = numpy.divide(P, H + eps) >= 2.0 Cp = numpy.multiply(Mp, C) Ch = numpy.multiply(Mh, C) # generator for backward transformation outseq_h = slicq.backward(Ch) outseq_p = slicq.backward(Cp) # make single output array from iterator sh_r = next(reblock(outseq_h, len(s), fulllast=False)) sh_r = sh_r.real sp_r = next(reblock(outseq_p, len(s), fulllast=False)) sp_r = sp_r.real xh[start:end] = sh_r xp[start:end] = sp_r t2 = cputime() total_time += t2 - t1 print("Calculation time per iter: %fs" % (total_time / n_chunks)) scipy.io.wavfile.write(harm_out, fs, xh) scipy.io.wavfile.write(perc_out, fs, xp) return 0
if sf.channels > 1: s = np.mean(s, axis=1) if args.length: s = s[:args.length] scales = {'log':LogScale,'lin':LinScale,'mel':MelScale,'oct':OctScale} try: scale = scales[args.scale] except KeyError: parser.error('scale unknown') scl = scale(args.fmin, args.fmax, args.bins) slicq = NSGT_sliced(scl, args.sllen, args.trlen, fs, real=args.real, recwnd=args.recwnd, matrixform=args.matrixform, reducedform=args.reducedform, multithreading=args.multithreading ) t1 = cputime() signal = (s,) # generator for forward transformation c = slicq.forward(signal) # realize transform from generator c = list(c) # generator for backward transformation outseq = slicq.backward(c)
fs = sf.samplerate s = sf.read_frames(sf.nframes) if sf.channels > 1: s = N.mean(s,axis=1) if options.length: s = s[:options.length] scales = {'log':LogScale,'lin':LinScale,'mel':MelScale,'oct':OctScale} try: scale = scales[options.scale] except KeyError: parser.error('scale unknown') scl = scale(options.fmin,options.fmax,options.bins) slicq = NSGT_sliced(scl,options.sl_len,options.tr_area,fs,real=options.real,recwnd=options.recwnd,matrixform=options.matrixform,reducedform=options.reducedform) t1 = time() signal = (s,) # generator for forward transformation c = slicq.forward(signal) # realize transform from generator c = list(c) # cl = map(len,c[0]) # print "c",len(cl),cl # generator for backward transformation
scales = { 'log': LogScale, 'lin': LinScale, 'mel': MelScale, 'oct': OctScale } try: scale = scales[options.scale] except KeyError: parser.error('scale unknown') scl = scale(options.fmin, options.fmax, options.bins) slicq = NSGT_sliced(scl, options.sl_len, options.tr_area, fs, real=options.real, recwnd=options.recwnd, matrixform=options.matrixform, reducedform=options.reducedform) t1 = time() signal = (s, ) # generator for forward transformation c = slicq.forward(signal) # realize transform from generator c = list(c) # cl = map(len,c[0])
except KeyError: parser.error('Scale unknown (--scale option)') scl = scale(args.fmin, args.fmax, args.bins) if args.sllen is None: sllen, trlen = scl.suggested_sllen_trlen(fs) else: sllen = args.sllen trlen = args.trlen print(f'sllen: {sllen}, trlen: {trlen}') slicq = NSGT_sliced(scl, sllen, trlen, fs, real=True, matrixform=args.matrixform, multichannel=True, device="cpu") # Read audio data sf = SndReader(args.input, sr=fs, chns=2) signal = sf() signal = [torch.tensor(sig) for sig in signal] #pad = signal[0].shape[-1]-signal[-1].shape[-1] #signal[-1] = torch.nn.functional.pad(signal[-1], (0, pad), mode='constant', value=0) signal = torch.cat(signal, dim=-1) # duration of signal in s
slicq = NSGT_sliced_old(scl, sllen, trlen, fs, real=True, matrixform=args.matrixform, multithreading=args.multithreading, multichannel=True) # read slices from audio file and mix down signal, if necessary at all signal = ((np.mean(s, axis=0), ) for s in signal_orig) else: slicq = NSGT_sliced(scl, sllen, trlen, fs, real=True, matrixform=args.matrixform, multichannel=True, device=args.torch_device) signal = [ torch.tensor(sig, device=args.torch_device) for sig in signal_orig ] pad = signal[0].shape[-1] - signal[-1].shape[-1] signal[-1] = torch.nn.functional.pad(signal[-1], (0, pad), mode='constant', value=0) signal = torch.cat(signal, dim=-1) tot = 0.