Пример #1
0
def process_file(input_fname, output_fname, l1, l2, direction):
    w = Wave.read(input_fname)
    print(w.dtype)

    X = stft.process(w)
    ae = AutoEncoder()
    ae.load_state_dict(torch.load("ae_4x1_poly.pickle"))
    if torch.cuda.is_available(): ae.cuda()
    criterion  = torch.nn.MSELoss()
    optimizer =  optim.Adam(ae.parameters(), weight_decay = 0.01)
    v = get_features(X)
    for epoch in range (n_iterations):
        epoch_loss = 0
        ae.train()
        optimizer.zero_grad()
        output = ae(v)
        hdiff, vdiff = get_diffs(output[0,0,:,:])
        if (direction == 0):
            tsloss = hdiff/(vdiff+eps)
        else:
            tsloss = vdiff/(hdiff+eps)
        mse = criterion(output,v)
        loss1 =  torch.norm(output,1)
        loss2 =  tsloss
        print("mse {:>10.10f} loss 1 {:>10.10f} loss 2 {:>10.10f}".format(
            mse.data.item(), loss1.data.item(), loss2.data.item())
        )
        loss = mse + l1 * loss1 + l2 * loss2
        loss.backward()
        optimizer.step()
    O = ae(v)
    O = O[0,0,:,:].cpu().data.numpy()
    Y = np.abs(O.T) * np.exp(np.angle(X)*1j)
    y = istft.process(Spectrogram(Y, X.sample_rate))
    y.write(output_fname)
Пример #2
0
def process_file(input_fname, output_fname, l1):
    w = Wave.read(input_fname)
    X = stft.process(w)
    ae = AutoEncoder()
    ae.load_state_dict(torch.load("ae_4x1_mono.pickle"))
    if torch.cuda.is_available(): ae.cuda()
    criterion = torch.nn.MSELoss()
    optimizer = optim.Adam(ae.parameters(), weight_decay=0.01)
    loss_curve = []
    v = get_features(X)
    loss_curve = []
    for epoch in range(n_iterations):
        epoch_loss = 0
        ae.train()
        optimizer.zero_grad()
        output = ae(v)
        mse = criterion(output, v)
        loss1 = l1 * torch.norm(output, 1)
        print("mse {:>10.10f} loss 1 {:>10.10f}".format(
            mse.data.item(), loss1.data.item()))
        loss = mse + loss1
        loss.backward()
        optimizer.step()
        loss_curve.append(loss.data.item())
    O = ae(v)
    O = O[0, 0, :, :].cpu().data.numpy()
    Y = np.abs(O.T) * np.exp(np.angle(X) * 1j)
    Y = Spectrogram(Y, X.sample_rate)
    y = istft.process(Y)
    y.write(output_fname)
Пример #3
0
def get_features(path, fr=None, to=None):
    w = Wave.read(path)
    w = w[fr:to, 0]
    X = stft.process(w)
    f = np.abs(X).T
    M = f[np.newaxis, np.newaxis, :, :]
    v = Variable(torch.from_numpy(M.astype(np.float32)))
    if torch.cuda.is_available():
        v = v.cuda()
    return v
Пример #4
0
def process_file(input_fname, l1, l2, l3):
    print(l1,l2,l3)
    w = Wave.read(input_fname)
    X = stft.process(w)
    ae = AutoEncoder()
    ae.load_state_dict(torch.load("ae_4x2_poly.pickle"))

    if torch.cuda.is_available(): ae.cuda()
    criterion  = torch.nn.MSELoss()
    optimizer = optim.Adam(ae.parameters(), weight_decay = 0.5)
    v = get_features(X)
    for epoch in range (n_iterations):
        epoch_loss = 0
        ae.train()
        optimizer.zero_grad()
        output = ae(v)
        out1 = output[0,0,:,:]
        out2 = output[0,1,:,:]
        mix = out1 + out2
        tgt = v[0,0,:,:]
        mse = criterion(mix,tgt)
        hdiff1 = torch.sum(torch.pow(out1[:,1:] - out1[:,:-1], 2))/(torch.pow(torch.norm(out1,2),2)+eps)
        vdiff1 = torch.sum(torch.pow(out1[1:,:] - out1[:-1,:], 2))/(torch.pow(torch.norm(out1,2),2)+eps)
        hdiff2 = torch.sum(torch.pow(out2[:,1:] - out2[:,:-1], 2))/(torch.pow(torch.norm(out2,2),2)+eps)
        vdiff2 = torch.sum(torch.pow(out2[1:,:] - out2[:-1,:], 2))/(torch.pow(torch.norm(out2,2),2)+eps)
        loss1 = torch.norm(mix,1)
        tloss = hdiff1/(vdiff1+eps)
        sloss = vdiff2/(hdiff2+eps)
        print(
            "mse {:>10.10f} loss 1 {:>10.10f} T loss {:>10.10f} S loss {:>10.10f}".format(
                    mse.data.item(), loss1.data.item(), tloss.data.item(), sloss.data.item()
            )
        )
        loss = mse + l1 * loss1 + l2 * tloss + l3 * sloss
        loss.backward()
        optimizer.step()
    O = ae(v)
    O1 = O[0,0,:,:].cpu().data.numpy()
    O2 = O[0,1,:,:].cpu().data.numpy()

    T = O1.T
    S = O2.T

    Smask = S/(S+T+eps)
    Tmask = T/(S+T+eps)

    steady =  istft.process(Spectrogram(X*Smask, X.sample_rate))
    trans =  istft.process(Spectrogram(X*Tmask, X.sample_rate))
    steady.write(splitext(input_fname)[0]+"_steady.wav")
    trans.write(splitext(input_fname)[0]+"_trans.wav")
Пример #5
0
import numpy as np
import matplotlib.pyplot as plt
from untwist.data import Wave, RatioMask
from untwist.transforms import STFT, ISTFT
from untwist.factorizations import RPCA

stft = STFT()
istft = ISTFT()
rpca = RPCA(iterations = 100)

# Try with vocals over repetitive music background
x = Wave.read("mixture.wav")
X = stft.process(x[:,0])

# this will take some time
(L,S) = rpca.process(X.magnitude())

M = RatioMask(S, L)
v = istft.process(X * M)
v.write("vocal_estimate.wav")

plt.subplot(4,1,1)
X.plot(label_x = False, title="mixture")
plt.subplot(4,1,2)
L.plot(label_x = False, title="L")
plt.subplot(4,1,3)
S.plot(label_x = False, title="S")
plt.subplot(4,1,4)
M.plot(title="estimated mask")
plt.show()
Пример #6
0
def get_spectrogram(path):
    x = Wave.read(path).to_mono()
    return STFT().process(x).magnitude().T
Пример #7
0
import numpy as np
import matplotlib.pyplot as plt
import theano
from untwist.data import Wave, Dataset, BinaryMask
from untwist.transforms import STFT, ISTFT
from untwist.neuralnetworks import MLP, SGD
floatX = theano.config.floatX

n_bins = 513
train_frames = 10000

target = Wave.read("target.wav")[:, 0]
background = Wave.read("background.wav")[:, 0]
mix = target + background

stft = STFT()
istft = ISTFT()
mlp = MLP(n_bins, n_bins, [n_bins, n_bins])
sgd = SGD(mlp,
          learning_rate=0.05,
          momentum=0.2,
          batch_size=200,
          iterations=100)

X = stft.process(mix)
T = stft.process(target)
B = stft.process(background)

ideal_mask = BinaryMask(T.magnitude(), B.magnitude())
ds = Dataset(n_bins, floatX, n_bins, np.bool_)
Xtrain = X[:, :train_frames].magnitude().T
Пример #8
0
    nc /= nc.max()
    return nc


def novelty_seg(ftr, kernel_size):
    ftr -= ftr.min()
    ftr /= ftr.max()
    n = get_novelty_curve(ftr, kernel_size)
    print("Finding slices")
    tp, prop = signal.find_peaks(n, height=np.mean(n) + np.std(n), distance=1)
    return tp.astype(np.int32)


stft_file = sys.argv[1]
outPath = sys.argv[2]
kernel_size = int(sys.argv[3])
iterations = sys.argv[4]
net = ae.AutoEncoder(513, 13)
print("performing STFT")
x = Wave.read(stft_file).to_mono()
X = STFT().process(x).magnitude().T
print("done")
ae.train_ae(net, X)
print("Getting feature vectors")
features = ae.get_learnt_features(net, X)
print("Computing novelty")
boundaries = novelty_seg(features, kernel_size)
np.savetxt(
    os.path.expanduser(outPath + '/' + Path(stft_file).name + '.ae_segs.ds'),
    boundaries)
Пример #9
0
t2 = -0.2
t3 = 0.5
n_bins = fft_size/2 + 1

in_fname = str(sys.argv[1])
factor = float(sys.argv[2])
rank = float(sys.argv[3])
if len(sys.argv) > 4: t1 = float(sys.argv[4])
if len(sys.argv) > 5: t2 = float(sys.argv[5])
if len(sys.argv) > 6: t3 = float(sys.argv[6])

out_fname = splitext(in_fname)[0]+"_"+str(factor)+".wav"

stft = STFT(signal.hann(window_size, sym = False), fft_size,hop_size)
istft =  ISTFT(signal.hann(window_size, sym = False), fft_size,hop_size)
x = Wave.read(in_fname)
if len(x.shape) > 1 and x.shape[1] > 1:
    x = x[:,0]
sr = x.sample_rate
X = stft.process(x)

Xm = np.abs(X)
Xp = np.angle(X)

radian_bin_freqs = 2 * np.pi * np.arange(Xm.shape[0]) / fft_size
phase_increment = radian_bin_freqs * hop_size

phase_lock = True
lock_active = True # change for envelope preservation

if rank < 1: