def dnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'):

    # GPU cached data
    _M = theano.shared(M.T.astype(float64))
    dum = Th.vector('dum')

    # Get layer sizes
    K = []
    for i in range(len(W1)):
        K.append([W1[i].shape[0], W2[i].shape[0]])
    K.append([M.T.shape[1], M.T.shape[1]])

    # We have weights to discover, init = 2/(Nin+Nout)
    H = theano.shared(
        sqrt(2. / (K[0][0] + K[0][1] + M.shape[1])) *
        random.rand(M.T.shape[0], K[0][0] + K[0][1]).astype(float64))
    fI = InputLayer(shape=(M.T.shape[0], K[0][0] + K[0][1]), input_var=H)

    # Split in two pathways, one for each source's autoencoder
    H1 = (len(W1) + 1) * [None]
    H2 = (len(W1) + 1) * [None]
    H1[0] = SliceLayer(fI, indices=slice(0, K[0][0]), axis=1)
    H2[0] = SliceLayer(fI, indices=slice(K[0][0], K[0][0] + K[0][1]), axis=1)

    # Put the subsequent layers
    for i in range(len(W1)):
        H1[i + 1] = DenseLayer(H1[i],
                               num_units=K[i + 1][0],
                               W=W1[i].astype(float64),
                               nonlinearity=lambda x: psoftplus(x, spb),
                               b=None)
        H2[i + 1] = DenseLayer(H2[i],
                               num_units=K[i + 1][1],
                               W=W2[i].astype(float64),
                               nonlinearity=lambda x: psoftplus(x, spb),
                               b=None)

    # Add the two approximations
    R = ElemwiseSumLayer([H1[-1], H2[-1]])

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M +
                   Ro) + 0 * Th.mean(dum)
    for i in range(len(H1) - 1):
        cost += sp * Th.mean(abs(get_output(H1[i]))) + sp * Th.mean(
            abs(get_output(H2[i])))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([d]).astype(float64), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = nget(R, dum, array([0]).astype(float64)).T + eps
    _r1 = nget(H1[-1], dum, array([0]).astype(float64)).T
    _r2 = nget(H2[-1], dum, array([0]).astype(float64)).T

    return _r, _r1, _r2, er
def dnn_model(M,
              K=[20, 20],
              hh=.0001,
              ep=5000,
              d=0,
              wsp=0.0001,
              hsp=0,
              spb=3,
              bt=0,
              al='rprop'):

    # Sort out the activation
    from inspect import isfunction
    if isfunction(spb):
        act = spb
    else:
        act = lambda x: psoftplus(x, spb)

    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=(None, M.shape[0]), input_var=_M)

    # Setup the layers
    L = K + [M.T.shape[1]]
    H = len(L) * [None]
    Hd = len(L) * [None]

    # First layer
    H[0] = DenseLayer(I, num_units=K[0], nonlinearity=act, b=None)

    # All the rest
    for k in range(1, len(L)):
        # Optional dropout
        Hd[k - 1] = DropoutLayer(H[k - 1], d)

        # Next layer
        H[k] = DenseLayer(Hd[k - 1], num_units=L[k], nonlinearity=act, b=None)

    # Cost function
    Ro = get_output(H[-1]) + eps
    cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M + Ro)
    for k in range(len(L) - 1):
        cost += wsp * Th.mean(abs(H[k].W)) + hsp * Th.mean(get_output(H[k]))

    # Train it using Lasagne
    opt = downhill.build(al,
                         loss=cost,
                         inputs=[_M],
                         params=get_all_params(H[-1]))
    train = downhill.Dataset(M.T.astype(float64), batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    h = [nget(H[k], _M, M.T.astype(float64)).T for k in range(len(L))]
    w = [H[k].W.get_value() for k in range(len(L))]

    return h, w, er
def cnn_model(M,
              K=20,
              T=1,
              hh=.0001,
              ep=5000,
              d=0,
              hsp=0.0001,
              wsp=0,
              spb=3,
              bt=0,
              al='rprop'):
    # Facilitate reasonable convolutions core
    theano.config.dnn.conv.algo_fwd = 'fft_tiling'
    theano.config.dnn.conv.algo_bwd_filter = 'none'
    theano.config.dnn.conv.algo_bwd_data = 'none'

    # Reformat input data
    M3 = reshape(M.astype(float32), (1, M.shape[0], M.shape[1]))

    # Copy key variables to GPU
    _M = Th.tensor3('_M')

    # Input and forward transform
    I = InputLayer(shape=M3.shape, input_var=_M)

    # First layer is the transform to a non-negative subspace
    H = Conv1DLayer(I,
                    filter_size=T,
                    num_filters=K,
                    pad='same',
                    nonlinearity=lambda x: psoftplus(x, spb),
                    b=None)

    # Upper layer is the synthesizer
    R = Conv1DLayer(H,
                    filter_size=T,
                    num_filters=M.shape[0],
                    pad='same',
                    nonlinearity=lambda x: psoftplus(x, spb),
                    b=None)

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro) \
      + hsp*Th.mean( get_output( H))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R))
    train = downhill.Dataset(M3, batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation and hidden state
    _r = squeeze(nget(R, _M, M3))
    _h = squeeze(nget(H, _M, M3))

    return _r, R.W.get_value(), er, _h
def cnn_model_th(M, K=20, T=1, hh=.0001, ep=5000, d=0, wsp=0.0001, dp=0):

    rng = theano.tensor.shared_randomstreams.RandomStreams(0)

    # Shared variables to use
    x = Th.matrix('x')
    y = theano.shared(M.astype(theano.config.floatX))
    d = theano.shared(float32(dp))

    # Network weights
    W0 = theano.shared(
        sqrt(2. / (K + M.shape[0])) *
        random.randn(K, M.shape[0]).astype(theano.config.floatX))
    W1 = theano.shared(
        sqrt(2. / (K + M.shape[0])) *
        random.randn(M.shape[0], K).astype(theano.config.floatX))

    # First layer is the transform to a non-negative subspace
    h = psoftplus(W0.dot(x), 3.)

    # Dropout
    if dp > 0:
        h *= (1. / (1. - d) * (rng.uniform(size=h.shape) > d).astype(
            theano.config.floatX)).astype(theano.config.floatX)

    # Second layer reconstructs the input
    l1 = W1.dot(h)
    r = psoftplus(l1, 3.)

    # Approximate input using KL-like distance
    cost = Th.mean(y * (Th.log(y + eps) - Th.log(r + eps)) - y +
                   r) + wsp * Th.mean(abs(W1))

    # Make an optimizer and define the training input
    opt = downhill.build('rprop', loss=cost, inputs=[x], params=[W0, W1])
    train = downhill.Dataset(M.astype(theano.config.floatX), batch_size=0)

    # Train it
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    d = 0
    _h, _r = theano.function(inputs=[x], outputs=[h, r],
                             updates=[])(M.astype(theano.config.floatX))
    o = FE.ife(_r, P)
    sxr = bss_eval(o, 0, array([z]))

    return _r, W1.get_value(), _h.get_value(), er
def rnn_model(M,
              K=20,
              hh=.0001,
              ep=5000,
              d=0,
              wsp=0.0001,
              hsp=0,
              spb=3,
              bt=0,
              al='rmsprop',
              t=5):
    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=(None, M.shape[0]), input_var=_M)

    # First layer is the transform to a non-negative subspace
    H0 = DenseLayer(I,
                    num_units=K,
                    nonlinearity=lambda x: psoftplus(x, spb),
                    b=None)

    # Optional dropout
    H = DropoutLayer(H0, d)

    # Compute output
    R = RecurrentLayer(H,
                       num_units=M.T.shape[1],
                       nonlinearity=lambda x: psoftplus(x, spb),
                       gradient_steps=t,
                       b=None)

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro)  \
      + hsp*Th.mean( get_output( H0))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R))
    train = downhill.Dataset(M.T.astype(float32), batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    _r = nget(R, _M, M.T.astype(float32)).T
    _h = nget(H, _M, M.T.astype(float32)).T

    return _r, (R.W_in_to_hid.get_value(), R.W_hid_to_hid.get_value()), er, _h
def nn_model(M,
             K=20,
             hh=.0001,
             ep=5000,
             d=0,
             wsp=0.0001,
             hsp=0,
             spb=3,
             bt=0,
             al='rprop'):

    # Sort out the activation
    from inspect import isfunction
    if isfunction(spb):
        act = spb
    else:
        act = lambda x: psoftplus(x, spb)

    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=(None, M.shape[0]), input_var=_M)

    # First layer is the transform to a non-negative subspace
    H0 = DenseLayer(I, num_units=K, nonlinearity=act, b=None)

    # Optional dropout
    H = DropoutLayer(H0, d)

    # Compute output
    R = DenseLayer(H, num_units=M.T.shape[1], nonlinearity=act, b=None)

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro)  \
      + wsp*Th.mean( abs( R.W[0])) + hsp*Th.mean( get_output( H0))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R))
    train = downhill.Dataset(M.T.astype(float64), batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    _r = nget(R, _M, M.T.astype(float64)).T
    _h = nget(H, _M, M.T.astype(float64)).T

    return _r, R.W.get_value(), er, _h
def cnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'):
    # Facilitate reasonable convolutions core
    theano.config.dnn.conv.algo_fwd = 'fft_tiling'
    theano.config.dnn.conv.algo_bwd_filter = 'none'
    theano.config.dnn.conv.algo_bwd_data = 'none'

    # Reformat input data
    M3 = reshape(M.astype(float32), (1, M.shape[0], M.shape[1]))

    # Copy key variables to GPU
    _M = theano.shared(M3.astype(float32))

    # Get dictionary shapes
    K = [W1.shape[1], W2.shape[1]]
    T = W1.shape[2]

    # We have weights to discover
    H = theano.shared(
        sqrt(2. / (K[0] + K[1] + M.shape[1])) *
        random.rand(1, K[0] + K[1], M.T.shape[0]).astype(float32))
    fI = InputLayer(shape=(1, K[0] + K[1], M.T.shape[0]), input_var=H)

    # Split in two pathways
    H1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    H2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Compute source modulators using previously learned convolutional dictionaries
    R1 = Conv1DLayer(H1,
                     filter_size=T,
                     W=W1,
                     num_filters=M.shape[0],
                     pad='same',
                     nonlinearity=lambda x: psoftplus(x, spb),
                     b=None)
    R2 = Conv1DLayer(H2,
                     filter_size=T,
                     W=W2,
                     num_filters=M.shape[0],
                     pad='same',
                     nonlinearity=lambda x: psoftplus(x, spb),
                     b=None)

    # Add the two approximations
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    dum = Th.vector('dum')
    Ro = get_output(R) + eps
    cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M +
                   Ro) + 0 * Th.mean(dum) + sp * Th.mean(abs(H))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([0]).astype(float32), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = squeeze(nget(R, dum, array([0]).astype(float32))) + eps
    _r1 = squeeze(nget(R1, dum, array([0]).astype(float32)))
    _r2 = squeeze(nget(R2, dum, array([0]).astype(float32)))

    return _r, _r1, _r2, er
def rnn_sep(M,
            W1,
            W2,
            hh=.0001,
            ep=5000,
            d=0,
            sp=.0001,
            spb=3,
            al='rmsprop',
            t=5):
    # Get dictionary shapes
    K = [W1[0].shape[0], W2[0].shape[0]]

    # GPU cached data
    _M = theano.shared(M.T.astype(float32))
    dum = Th.vector('dum')

    # We have weights to discover
    H = theano.shared(
        sqrt(2. / (K[0] + K[1] + M.shape[1])) *
        random.rand(M.T.shape[0], K[0] + K[1]).astype(float32))
    fI = InputLayer(shape=(M.T.shape[0], K[0] + K[1]), input_var=H)

    # Split in two pathways
    fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Dropout?
    dfW1 = DropoutLayer(fW1, dum[0])
    dfW2 = DropoutLayer(fW2, dum[0])

    # Compute source modulators using previously learned dictionaries
    R1 = RecurrentLayer(dfW1,
                        num_units=M.T.shape[1],
                        b=None,
                        W_in_to_hid=W1[0].astype(float32),
                        W_hid_to_hid=W1[1].astype(float32),
                        nonlinearity=lambda x: psoftplus(x, spb),
                        gradient_steps=5)
    R2 = RecurrentLayer(dfW2,
                        num_units=M.T.shape[1],
                        b=None,
                        W_in_to_hid=W2[0].astype(float32),
                        W_hid_to_hid=W2[1].astype(float32),
                        nonlinearity=lambda x: psoftplus(x, spb),
                        gradient_steps=5)

    # Add the two approximations
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    Ro = get_output(R) + eps
    cost = (_M*(Th.log(_M+eps) - Th.log( Ro+eps)) - _M + Ro).mean() \
       + sp*Th.mean( abs( H)) + 0*Th.mean( dum)

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([d]).astype(float32), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = nget(R, dum, array([0]).astype(float32)).T + eps
    _r1 = nget(R1, dum, array([0]).astype(float32)).T
    _r2 = nget(R2, dum, array([0]).astype(float32)).T

    return _r, _r1, _r2, er
def nn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'):

    # Sort out the activation
    from inspect import isfunction
    if isfunction(spb):
        act = spb
    else:
        act = lambda x: psoftplus(x, spb)

    # Get dictionary shapes
    K = [W1.shape[0], W2.shape[0]]

    # GPU cached data
    _M = theano.shared(M.T.astype(float64))
    dum = Th.vector('dum')

    # We have weights to discover
    H = theano.shared(
        sqrt(2. / (K[0] + K[1] + M.shape[1])) *
        random.rand(M.T.shape[0], K[0] + K[1]).astype(float64))
    fI = InputLayer(shape=(M.T.shape[0], K[0] + K[1]), input_var=H)

    # Split in two pathways
    fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Dropout?
    dfW1 = DropoutLayer(fW1, dum[0])
    dfW2 = DropoutLayer(fW2, dum[0])

    # Compute source modulators using previously learned dictionaries
    R1 = DenseLayer(dfW1,
                    num_units=M.T.shape[1],
                    W=W1.astype(float64),
                    nonlinearity=act,
                    b=None)
    R2 = DenseLayer(dfW2,
                    num_units=M.T.shape[1],
                    W=W2.astype(float64),
                    nonlinearity=act,
                    b=None)

    # Add the two approximations
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    Ro = get_output(R) + eps
    cost = (_M*(Th.log(_M+eps) - Th.log( Ro+eps)) - _M + Ro).mean() \
       + sp*Th.mean( abs( H)) + 0*Th.mean( dum)

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    #train = downhill.Dataset( array( [0]).astype(float32), batch_size=0)
    if isinstance(d, list):
        train = downhill.Dataset(array([d[0]]).astype(float64), batch_size=0)
        er = downhill_train(opt, train, hh, ep / 2, None)
        train = downhill.Dataset(array([d[1]]).astype(float64), batch_size=0)
        er += downhill_train(opt, train, hh, ep / 2, None)
    else:
        train = downhill.Dataset(array([d]).astype(float64), batch_size=0)
        er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = nget(R, dum, array([0]).astype(float64)).T + eps
    _r1 = nget(R1, dum, array([0]).astype(float64)).T
    _r2 = nget(R2, dum, array([0]).astype(float64)).T

    return _r, _r1, _r2, er