Exemplo n.º 1
0
def getTrainedRNN():
    """Read from file and set the params"""
    # TODO: Refactor so as to do this only once)
    input_size = 39
    hidden_size = 50
    num_output_classes = 29
    learning_rate = 0.001
    output_size = num_output_classes + 1
    batch_size = None
    input_seq_length = None
    gradient_clipping = 5

    l_in = InputLayer(shape=(batch_size, input_seq_length, input_size))
    n_batch, n_time_steps, n_features = l_in.input_var.shape  # Unnecessary in this version. Just collecting the info so that we can reshape the output back to the original shape
    # h_1 = DenseLayer(l_in, num_units=hidden_size, nonlinearity=clipped_relu)
    l_rec_forward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping,
                                   nonlinearity=clipped_relu)
    l_rec_backward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping,
                                    nonlinearity=clipped_relu, backwards=True)
    l_rec_accumulation = ElemwiseSumLayer([l_rec_forward, l_rec_backward])
    l_rec_reshaped = ReshapeLayer(l_rec_accumulation, (-1, hidden_size))
    l_h2 = DenseLayer(l_rec_reshaped, num_units=hidden_size, nonlinearity=clipped_relu)
    l_out = DenseLayer(l_h2, num_units=output_size, nonlinearity=lasagne.nonlinearities.linear)
    l_out_reshaped = ReshapeLayer(l_out, (n_batch, n_time_steps, output_size))  # Reshaping back
    l_out_softmax = NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax)
    l_out_softmax_reshaped = ReshapeLayer(l_out_softmax, (n_batch, n_time_steps, output_size))

    with np.load('CTC_model.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(l_out_softmax_reshaped, param_values, trainable=True)
    output = lasagne.layers.get_output(l_out_softmax_reshaped)
    return l_in, output
Exemplo n.º 2
0
def test_recurrent_unroll_scan_bck():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    x = T.tensor3()
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)
    x_in = np.random.random(in_shp).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_rec_scan = RecurrentLayer(l_inp,
                                num_units=num_units,
                                backwards=True,
                                unroll_scan=False)
    lasagne.random.get_rng().seed(1234)
    l_rec_unroll = RecurrentLayer(l_inp,
                                  num_units=num_units,
                                  backwards=True,
                                  unroll_scan=True)
    output_scan = helper.get_output(l_rec_scan, x)
    output_unrolled = helper.get_output(l_rec_unroll, x)
    output_scan_val = output_scan.eval({x: x_in})
    output_unrolled_val = output_unrolled.eval({x: x_in})

    np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
Exemplo n.º 3
0
def test_recurrent_unroll_scan_fwd():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)
    l_mask_inp = InputLayer(in_shp[:2])

    x_in = np.random.random(in_shp).astype('float32')
    mask_in = np.ones(in_shp[:2]).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_rec_scan = RecurrentLayer(l_inp,
                                num_units=num_units,
                                backwards=False,
                                unroll_scan=False,
                                mask_input=l_mask_inp)
    lasagne.random.get_rng().seed(1234)
    l_rec_unroll = RecurrentLayer(l_inp,
                                  num_units=num_units,
                                  backwards=False,
                                  unroll_scan=True,
                                  mask_input=l_mask_inp)
    output_scan = helper.get_output(l_rec_scan)
    output_unrolled = helper.get_output(l_rec_unroll)

    output_scan_val = output_scan.eval({
        l_inp.input_var: x_in,
        l_mask_inp.input_var: mask_in
    })
    output_unrolled_val = output_unrolled.eval({
        l_inp.input_var: x_in,
        l_mask_inp.input_var: mask_in
    })
    np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
Exemplo n.º 4
0
def test_recurrent_precompute():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)
    l_mask_inp = InputLayer(in_shp[:2])

    x_in = np.random.random(in_shp).astype('float32')
    mask_in = np.ones((num_batch, seq_len), dtype='float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_rec_precompute = RecurrentLayer(l_inp,
                                      num_units=num_units,
                                      precompute_input=True,
                                      mask_input=l_mask_inp)
    lasagne.random.get_rng().seed(1234)
    l_rec_no_precompute = RecurrentLayer(l_inp,
                                         num_units=num_units,
                                         precompute_input=False,
                                         mask_input=l_mask_inp)
    output_precompute = helper.get_output(l_rec_precompute).eval({
        l_inp.input_var:
        x_in,
        l_mask_inp.input_var:
        mask_in
    })
    output_no_precompute = helper.get_output(l_rec_no_precompute).eval({
        l_inp.input_var:
        x_in,
        l_mask_inp.input_var:
        mask_in
    })

    np.testing.assert_almost_equal(output_precompute, output_no_precompute)
Exemplo n.º 5
0
def exe_rnn(use_embedd, length, num_units, position, binominal):
    batch_size = BATCH_SIZE

    input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
    target_var = T.ivector(name='targets')

    layer_input = lasagne.layers.InputLayer(shape=(None, length, 1),
                                            input_var=input_var,
                                            name='input')
    if use_embedd:
        layer_position = construct_position_input(batch_size, length,
                                                  num_units)
        layer_input = lasagne.layers.concat([layer_input, layer_position],
                                            axis=2)

    layer_rnn = RecurrentLayer(layer_input,
                               num_units,
                               nonlinearity=nonlinearities.tanh,
                               only_return_final=True,
                               W_in_to_hid=lasagne.init.GlorotUniform(),
                               W_hid_to_hid=lasagne.init.GlorotUniform(),
                               b=lasagne.init.Constant(0.),
                               name='RNN')
    # W = layer_rnn.W_hid_to_hid.sum()
    # U = layer_rnn.W_in_to_hid.sum()
    # b = layer_rnn.b.sum()

    layer_output = DenseLayer(layer_rnn,
                              num_units=1,
                              nonlinearity=nonlinearities.sigmoid,
                              name='output')

    return train(layer_output, layer_rnn, input_var, target_var, batch_size,
                 length, position, binominal)
Exemplo n.º 6
0
def test_recurrent_tensor_init():
    # check if passing in a TensorVariable to hid_init works
    num_units = 5
    batch_size = 3
    seq_len = 2
    n_inputs = 4
    in_shp = (batch_size, seq_len, n_inputs)
    l_inp = InputLayer(in_shp)
    hid_init = T.matrix()
    x = T.tensor3()

    l_rec = RecurrentLayer(l_inp,
                           num_units,
                           learn_init=True,
                           hid_init=hid_init)
    # check that the tensor is used
    assert hid_init == l_rec.hid_init

    # b, W_hid_to_hid and W_in_to_hid, should not return any inits
    assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 3

    # b, should not return any inits
    assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 1

    # check that it compiles and runs
    output = lasagne.layers.get_output(l_rec, x)
    x_test = np.ones(in_shp, dtype='float32')
    hid_init_test = np.ones((batch_size, num_units), dtype='float32')
    output_val = output.eval({x: x_test, hid_init: hid_init_test})
    assert isinstance(output_val, np.ndarray)
Exemplo n.º 7
0
def test_recurrent_grad():
    num_batch, seq_len, n_features = 5, 3, 10
    num_units = 6
    l_inp = InputLayer((num_batch, seq_len, n_features))
    l_rec = RecurrentLayer(l_inp, num_units=num_units)
    output = helper.get_output(l_rec)
    g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_rec))
    assert isinstance(g, (list, tuple))
Exemplo n.º 8
0
def test_recurrent_hid_init_layer_eval():
    # Test `hid_init` as a `Layer` with some dummy input. Compare the output of
    # a network with a `Layer` as input to `hid_init` to a network with a
    # `np.array` as input to `hid_init`
    n_units = 7
    n_test_cases = 2
    in_shp = (n_test_cases, 2, 3)
    in_h_shp = (1, n_units)

    # dummy inputs
    X_test = np.ones(in_shp, dtype=theano.config.floatX)
    Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX)
    Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1))

    # network with `Layer` initializer for hid_init
    l_inp = InputLayer(in_shp)
    l_inp_h = InputLayer(in_h_shp)
    l_rec_inp_layer = RecurrentLayer(l_inp, n_units, hid_init=l_inp_h,
                                     nonlinearity=None)

    # network with `np.array` initializer for hid_init
    l_rec_nparray = RecurrentLayer(l_inp, n_units, hid_init=Xh_test,
                                   nonlinearity=None)

    # copy network parameters from l_rec_inp_layer to l_rec_nparray
    l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()])
    l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()])
    for k, v in l_rn_param.items():
        if k in l_il_param:
            v.set_value(l_il_param[k].get_value())

    # build the theano functions
    X = T.tensor3()
    Xh = T.matrix()
    output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer,
                                                 {l_inp: X, l_inp_h: Xh})
    output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X})

    # test both nets with dummy input
    output_val_inp_layer = output_inp_layer.eval({X: X_test,
                                                  Xh: Xh_test_batch})
    output_val_nparray = output_nparray.eval({X: X_test})

    # check output given `Layer` is the same as with `np.array`
    assert np.allclose(output_val_inp_layer, output_val_nparray)
Exemplo n.º 9
0
def test_recurrent_return_final():
    num_batch, seq_len, n_features = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features)
    x_in = np.random.random(in_shp).astype('float32')

    l_inp = InputLayer(in_shp)
    lasagne.random.get_rng().seed(1234)
    l_rec_final = RecurrentLayer(l_inp, num_units, only_return_final=True)
    lasagne.random.get_rng().seed(1234)
    l_rec_all = RecurrentLayer(l_inp, num_units, only_return_final=False)

    output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in})
    output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in})

    assert output_final.shape == (output_all.shape[0], output_all.shape[2])
    assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final)
    assert np.allclose(output_final, output_all[:, -1])
Exemplo n.º 10
0
def test_recurrent_nparams_learn_init():
    l_inp = InputLayer((2, 2, 3))
    l_rec = RecurrentLayer(l_inp, 5, learn_init=True)

    # b, W_hid_to_hid and W_in_to_hid + hid_init
    assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 4

    # b + hid_init
    assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 2
Exemplo n.º 11
0
def test_recurrent_hid_init_layer():
    # test that you can set hid_init to be a layer
    l_inp = InputLayer((2, 2, 3))
    l_inp_h = InputLayer((2, 5))
    l_rec = RecurrentLayer(l_inp, 5, hid_init=l_inp_h)

    x = T.tensor3()
    h = T.matrix()

    output = lasagne.layers.get_output(l_rec, {l_inp: x, l_inp_h: h})
Exemplo n.º 12
0
def test_recurrent_grad_clipping():
    num_units = 5
    batch_size = 3
    seq_len = 2
    n_inputs = 4
    in_shp = (batch_size, seq_len, n_inputs)
    l_inp = InputLayer(in_shp)
    x = T.tensor3()
    l_rec = RecurrentLayer(l_inp, num_units, grad_clipping=1.0)
    output = lasagne.layers.get_output(l_rec, x)
Exemplo n.º 13
0
def test_gradient_steps_error():
    # Check that error is raised if gradient_steps is not -1 and scan_unroll
    # is true
    l_in = InputLayer((2, 2, 3))
    with pytest.raises(ValueError):
        RecurrentLayer(l_in, 5, gradient_steps=3, unroll_scan=True)

    with pytest.raises(ValueError):
        LSTMLayer(l_in, 5, gradient_steps=3, unroll_scan=True)

    with pytest.raises(ValueError):
        GRULayer(l_in, 5, gradient_steps=3, unroll_scan=True)
Exemplo n.º 14
0
def test_unroll_none_input_error():
    # Test that a ValueError is raised if unroll scan is True and the input
    # sequence length is specified as None.
    l_in = InputLayer((2, None, 3))
    with pytest.raises(ValueError):
        RecurrentLayer(l_in, 5, unroll_scan=True)

    with pytest.raises(ValueError):
        LSTMLayer(l_in, 5, unroll_scan=True)

    with pytest.raises(ValueError):
        GRULayer(l_in, 5, unroll_scan=True)
Exemplo n.º 15
0
def test_recurrent_bck():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    x = T.tensor3()
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)

    x_in = np.ones(in_shp).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_rec_fwd = RecurrentLayer(l_inp, num_units=num_units, backwards=False)
    lasagne.random.get_rng().seed(1234)
    l_rec_bck = RecurrentLayer(l_inp, num_units=num_units, backwards=True)
    l_out_fwd = helper.get_output(l_rec_fwd, x)
    l_out_bck = helper.get_output(l_rec_bck, x)

    output_fwd = l_out_fwd.eval({l_out_fwd: x_in})
    output_bck = l_out_bck.eval({l_out_bck: x_in})

    # test that the backwards model reverses its final input
    np.testing.assert_almost_equal(output_fwd, output_bck[:, ::-1])
Exemplo n.º 16
0
def test_recurrent_hid_init_mask():
    # test that you can set hid_init to be a layer when a mask is provided
    l_inp = InputLayer((2, 2, 3))
    l_inp_h = InputLayer((2, 5))
    l_inp_msk = InputLayer((2, 2))
    l_rec = RecurrentLayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk)

    x = T.tensor3()
    h = T.matrix()
    msk = T.matrix()

    inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk}
    output = lasagne.layers.get_output(l_rec, inputs)
Exemplo n.º 17
0
def test_recurrent_variable_input_size():
    # check that seqlen and batchsize None works
    num_batch, n_features1 = 6, 5
    num_units = 13
    x = T.tensor3()

    in_shp = (None, None, n_features1)
    l_inp = InputLayer(in_shp)
    x_in1 = np.ones((num_batch + 1, 10, n_features1)).astype('float32')
    x_in2 = np.ones((num_batch, 15, n_features1)).astype('float32')
    l_rec = RecurrentLayer(l_inp, num_units=num_units, backwards=False)
    output = helper.get_output(l_rec, x)
    output_val1 = output.eval({x: x_in1})
    output_val2 = output.eval({x: x_in2})
Exemplo n.º 18
0
def test_recurrent_return_shape():
    num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11
    num_units = 6
    x = T.tensor4()
    in_shp = (num_batch, seq_len, n_features1, n_features2)
    l_inp = InputLayer(in_shp)
    l_rec = RecurrentLayer(l_inp, num_units=num_units)

    x_in = np.random.random(in_shp).astype('float32')
    output = helper.get_output(l_rec, x)
    output_val = output.eval({x: x_in})

    assert helper.get_output_shape(l_rec, x_in.shape) == output_val.shape
    assert output_val.shape == (num_batch, seq_len, num_units)
def rnn_model(M,
              K=20,
              hh=.0001,
              ep=5000,
              d=0,
              wsp=0.0001,
              hsp=0,
              spb=3,
              bt=0,
              al='rmsprop',
              t=5):
    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=(None, M.shape[0]), input_var=_M)

    # First layer is the transform to a non-negative subspace
    H0 = DenseLayer(I,
                    num_units=K,
                    nonlinearity=lambda x: psoftplus(x, spb),
                    b=None)

    # Optional dropout
    H = DropoutLayer(H0, d)

    # Compute output
    R = RecurrentLayer(H,
                       num_units=M.T.shape[1],
                       nonlinearity=lambda x: psoftplus(x, spb),
                       gradient_steps=t,
                       b=None)

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro)  \
      + hsp*Th.mean( get_output( H0))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R))
    train = downhill.Dataset(M.T.astype(float32), batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    _r = nget(R, _M, M.T.astype(float32)).T
    _h = nget(H, _M, M.T.astype(float32)).T

    return _r, (R.W_in_to_hid.get_value(), R.W_hid_to_hid.get_value()), er, _h
Exemplo n.º 20
0
def test_recurrent_nparams_hid_init_layer():
    # test that you can see layers through hid_init
    l_inp = InputLayer((2, 2, 3))
    l_inp_h = InputLayer((2, 5))
    l_inp_h_de = DenseLayer(l_inp_h, 7)
    l_rec = RecurrentLayer(l_inp, 7, hid_init=l_inp_h_de)

    # directly check the layers can be seen through hid_init
    assert lasagne.layers.get_all_layers(l_rec) == [l_inp, l_inp_h, l_inp_h_de,
                                                    l_rec]

    # b, W_hid_to_hid and W_in_to_hid + W + b
    assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 5

    # b (recurrent) + b (dense)
    assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 2
Exemplo n.º 21
0
def test_recurrent_hid_init_layer_eval():
    # Test `hid_init` as a `Layer` with some dummy input. Compare the output of
    # a network with a `Layer` as input to `hid_init` to a network with a
    # `np.array` as input to `hid_init`
    n_units = 7
    n_test_cases = 2
    in_shp = (n_test_cases, 2, 3)
    in_h_shp = (1, n_units)

    # dummy inputs
    X_test = np.ones(in_shp, dtype=theano.config.floatX)
    Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX)
    Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1))

    # network with `Layer` initializer for hid_init
    l_inp = InputLayer(in_shp)
    l_inp_h = InputLayer(in_h_shp)
    l_rec_inp_layer = RecurrentLayer(l_inp,
                                     n_units,
                                     hid_init=l_inp_h,
                                     nonlinearity=None)

    # network with `np.array` initializer for hid_init
    l_rec_nparray = RecurrentLayer(l_inp,
                                   n_units,
                                   hid_init=Xh_test,
                                   nonlinearity=None)

    # copy network parameters from l_rec_inp_layer to l_rec_nparray
    l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()])
    l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()])
    for k, v in l_rn_param.items():
        if k in l_il_param:
            v.set_value(l_il_param[k].get_value())

    # build the theano functions
    X = T.tensor3()
    Xh = T.matrix()
    output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, {
        l_inp: X,
        l_inp_h: Xh
    })
    output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X})

    # test both nets with dummy input
    output_val_inp_layer = output_inp_layer.eval({
        X: X_test,
        Xh: Xh_test_batch
    })
    output_val_nparray = output_nparray.eval({X: X_test})

    # check output given `Layer` is the same as with `np.array`
    assert np.allclose(output_val_inp_layer, output_val_nparray)
Exemplo n.º 22
0
def create_rnn(input_vars, num_inputs, hidden_layer_size, num_outputs):
    network = InputLayer((None, None, num_inputs), input_vars)
    batch_size_theano, seqlen, _ = network.input_var.shape
    network = GaussianNoiseLayer(network, sigma=0.05)

    for i in range(1):
        network = RecurrentLayer(network,
                                 hidden_layer_size,
                                 W_hid_to_hid=GlorotUniform(),
                                 W_in_to_hid=GlorotUniform(),
                                 b=Constant(1.0),
                                 nonlinearity=leaky_rectify,
                                 learn_init=True)

    network = ReshapeLayer(network, (-1, hidden_layer_size))
    network = DenseLayer(network, num_outputs, nonlinearity=softmax)
    network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs))

    return network
Exemplo n.º 23
0
def main():
    input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
    target_var = T.ivector(name='targets')

    layer_input = lasagne.layers.InputLayer(shape=(None, LENGTH, 1),
                                            input_var=input_var,
                                            name='input')

    layer_rnn = RecurrentLayer(layer_input,
                               NUM_UNITS,
                               nonlinearity=nonlinearities.tanh,
                               only_return_final=True,
                               W_in_to_hid=lasagne.init.Constant(1),
                               W_hid_to_hid=lasagne.init.Constant(2),
                               b=None,
                               name='RNN')
    W = layer_rnn.W_hid_to_hid
    U = layer_rnn.W_in_to_hid

    output = lasagne.layers.get_output(layer_rnn)
    output = output.mean(axis=1)
    prediction = T.switch(T.gt(output, 0), 1, -1)
    acc = T.eq(prediction, target_var)
    acc = acc.sum()
    # get the output before activation function tanh
    epsilon = 1e-6
    prob = 0.5 * T.log((1 + output + epsilon) / (1 - output + epsilon))
    prob = nonlinearities.sigmoid(prob)
    loss = -0.5 * ((1 + target_var) * T.log(prob) +
                   (1 - target_var) * T.log(1 - prob))
    loss = loss.sum()

    batch_size = 100
    learning_rate = 0.01
    steps_per_epoch = 1000
    params = lasagne.layers.get_all_params(layer_rnn, trainable=True)
    updates = lasagne.updates.sgd(loss,
                                  params=params,
                                  learning_rate=learning_rate)
    train_fn = theano.function([input_var, target_var],
                               [loss, acc, W, U, output],
                               updates=updates)

    for epoch in range(3):
        print 'Epoch %d (learning rate=%.4f)' % (epoch, learning_rate)
        loss = 0.0
        correct = 0.0
        num_back = 0
        for step in range(steps_per_epoch):
            x, y = get_batch(batch_size)
            err, corr, w, u, pred = train_fn(x, y)
            # print x
            # print y
            # print pred
            loss += err
            correct += corr
            num_inst = (step + 1) * batch_size
            # update log
            sys.stdout.write("\b" * num_back)
            log_info = 'inst: %d loss: %.4f, corr: %d, acc: %.2f%%, W: %.6f, U: %.6f' % (
                num_inst, loss / num_inst, correct, correct * 100 / num_inst,
                w.sum(), u.sum())
            sys.stdout.write(log_info)
            num_back = len(log_info)
            # raw_input()
        # update training log after each epoch
        sys.stdout.write("\b" * num_back)
        assert num_inst == batch_size * steps_per_epoch
        print 'inst: %d loss: %.4f, corr: %d, acc: %.2f%%' % (
            num_inst, loss / num_inst, correct, correct * 100 / num_inst)
Exemplo n.º 24
0
def get_model():

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.matrix('targets')

    # input layer with unspecified batch size
    layer_both_0 = InputLayer(shape=(None, 30, 64, 64), input_var=input_var)

    # Z-score?

    # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_both_1 = batch_norm(
        Conv2DLayer(layer_both_0,
                    64, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_both_2 = batch_norm(
        Conv2DLayer(layer_both_1,
                    64, (3, 3),
                    pad='valid',
                    nonlinearity=leaky_rectify))
    layer_both_3 = MaxPool2DLayer(layer_both_2,
                                  pool_size=(2, 2),
                                  stride=(2, 2),
                                  pad=(1, 1))
    layer_both_4 = DropoutLayer(layer_both_3, p=0.25)

    # Systole : Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_systole_0 = batch_norm(
        Conv2DLayer(layer_both_4,
                    96, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_systole_1 = batch_norm(
        Conv2DLayer(layer_systole_0,
                    96, (3, 3),
                    pad='valid',
                    nonlinearity=leaky_rectify))
    layer_systole_2 = MaxPool2DLayer(layer_systole_1,
                                     pool_size=(2, 2),
                                     stride=(2, 2),
                                     pad=(1, 1))
    layer_systole_3 = DropoutLayer(layer_systole_2, p=0.25)

    # Diastole : Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_diastole_0 = batch_norm(
        Conv2DLayer(layer_both_4,
                    96, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_diastole_1 = batch_norm(
        Conv2DLayer(layer_diastole_0,
                    96, (3, 3),
                    pad='valid',
                    nonlinearity=leaky_rectify))
    layer_diastole_2 = MaxPool2DLayer(layer_diastole_1,
                                      pool_size=(2, 2),
                                      stride=(2, 2),
                                      pad=(1, 1))
    layer_diastole_3 = DropoutLayer(layer_diastole_2, p=0.25)

    # Systole : Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_systole_4 = batch_norm(
        Conv2DLayer(layer_systole_3,
                    128, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_systole_5 = batch_norm(
        Conv2DLayer(layer_systole_4,
                    128, (3, 3),
                    pad='valid',
                    nonlinearity=leaky_rectify))
    layer_systole_6 = MaxPool2DLayer(layer_systole_5,
                                     pool_size=(2, 2),
                                     stride=(2, 2),
                                     pad=(1, 1))
    layer_systole_7 = DropoutLayer(layer_systole_6, p=0.25)

    # Diastole : Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_diastole_4 = batch_norm(
        Conv2DLayer(layer_diastole_3,
                    128, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_diastole_5 = batch_norm(
        Conv2DLayer(layer_diastole_4,
                    128, (3, 3),
                    pad='valid',
                    nonlinearity=leaky_rectify))
    layer_diastole_6 = MaxPool2DLayer(layer_diastole_5,
                                      pool_size=(2, 2),
                                      stride=(2, 2),
                                      pad=(1, 1))
    layer_diastole_7 = DropoutLayer(layer_diastole_6, p=0.25)

    # Systole : Last layers
    layer_systole_8 = FlattenLayer(layer_systole_7)
    layer_systole_9 = DenseLayer(layer_systole_8,
                                 1024,
                                 nonlinearity=leaky_rectify)
    layer_systole_10 = DropoutLayer(layer_systole_9, p=0.5)
    layer_systole_11 = DenseLayer(layer_systole_10, 600, nonlinearity=softmax)

    # Diastole : Last layers
    layer_diastole_8 = FlattenLayer(layer_diastole_7)
    layer_diastole_9 = DenseLayer(layer_diastole_8,
                                  1024,
                                  nonlinearity=leaky_rectify)
    layer_diastole_10 = DropoutLayer(layer_diastole_9, p=0.5)
    layer_diastole_11 = DenseLayer(layer_diastole_10,
                                   600,
                                   nonlinearity=softmax)

    # Add reccurrent layer and merge layer for output
    layer_recurrent = RecurrentLayer(
        ConcatLayer([layer_systole_9, layer_diastole_9]), 512)
    layer_both_5 = ConcatLayer([layer_systole_11, layer_diastole_11])

    # Loss
    prediction = get_output(layer_both_5)
    loss = squared_error(prediction, target_var)
    loss = loss.mean() + regularize_layer_params(
        layer_systole_9, l2) + regularize_layer_params(layer_diastole_9, l2)

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params = get_all_params(layer_both_5, trainable=True)
    updates = nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction = get_output(layer_both_5, deterministic=True)
    test_loss = squared_error(test_prediction, target_var)
    test_loss = test_loss.mean()

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], test_loss)

    # Compule a third function computing the prediction
    predict_fn = theano.function([input_var], test_prediction)

    return [layer_both_5, train_fn, val_fn, predict_fn]
def rnn_sep(M,
            W1,
            W2,
            hh=.0001,
            ep=5000,
            d=0,
            sp=.0001,
            spb=3,
            al='rmsprop',
            t=5):
    # Get dictionary shapes
    K = [W1[0].shape[0], W2[0].shape[0]]

    # GPU cached data
    _M = theano.shared(M.T.astype(float32))
    dum = Th.vector('dum')

    # We have weights to discover
    H = theano.shared(
        sqrt(2. / (K[0] + K[1] + M.shape[1])) *
        random.rand(M.T.shape[0], K[0] + K[1]).astype(float32))
    fI = InputLayer(shape=(M.T.shape[0], K[0] + K[1]), input_var=H)

    # Split in two pathways
    fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Dropout?
    dfW1 = DropoutLayer(fW1, dum[0])
    dfW2 = DropoutLayer(fW2, dum[0])

    # Compute source modulators using previously learned dictionaries
    R1 = RecurrentLayer(dfW1,
                        num_units=M.T.shape[1],
                        b=None,
                        W_in_to_hid=W1[0].astype(float32),
                        W_hid_to_hid=W1[1].astype(float32),
                        nonlinearity=lambda x: psoftplus(x, spb),
                        gradient_steps=5)
    R2 = RecurrentLayer(dfW2,
                        num_units=M.T.shape[1],
                        b=None,
                        W_in_to_hid=W2[0].astype(float32),
                        W_hid_to_hid=W2[1].astype(float32),
                        nonlinearity=lambda x: psoftplus(x, spb),
                        gradient_steps=5)

    # Add the two approximations
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    Ro = get_output(R) + eps
    cost = (_M*(Th.log(_M+eps) - Th.log( Ro+eps)) - _M + Ro).mean() \
       + sp*Th.mean( abs( H)) + 0*Th.mean( dum)

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([d]).astype(float32), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = nget(R, dum, array([0]).astype(float32)).T + eps
    _r1 = nget(R1, dum, array([0]).astype(float32)).T
    _r2 = nget(R2, dum, array([0]).astype(float32)).T

    return _r, _r1, _r2, er
Exemplo n.º 26
0
def test_recurrent_init_val_error():
    # check if errors are raised when init is non matrix tensor
    hid_init = T.vector()
    with pytest.raises(ValueError):
        l_rec = RecurrentLayer(InputLayer((2, 2, 3)), 5, hid_init=hid_init)
Exemplo n.º 27
0
                pulse_end = pulse_start + PULSE_WIDTH
                X[batch_i, pulse_start:pulse_end, 0] = OFF
            t[batch_i, :, 0] = X[batch_i, :, 0].copy()
    X += noise()
    return X, t


X_val, t_val = gen_data()

# Configure layers
layers = [InputLayer(shape=SHAPE)]
for i in range(N_HIDDEN_LAYERS):
    layer = RecurrentLayer(
        layers[-1],
        N_UNITS_PER_LAYER,
        nonlinearity=tanh,
        W_in_to_hid=Normal(std=1.0 /
                           np.sqrt(layers[-1].get_output_shape()[-1])),
        gradient_steps=100)
    layers.append(layer)
layers.append(
    ReshapeLayer(layers[-1],
                 (N_SEQ_PER_BATCH * SEQ_LENGTH, N_UNITS_PER_LAYER)))
layers.append(
    MixtureDensityLayer(layers[-1],
                        num_units=t_val.shape[-1],
                        num_components=N_COMPONENTS,
                        min_sigma=0))

print("Total parameters: {}".format(
    sum([