def crop_layer(self):
     from lasagne.layers.merge import ElemwiseSumLayer
     l1 = Mock(output_shapes=((None, None), ))
     l2 = Mock(output_shapes=((None, None), ))
     return ElemwiseSumLayer((l1, l2),
                             coeffs=[2, -1],
                             cropping=['lower'] * 2)
示例#2
0
    def test_broadcasting_pattern(self):
        from lasagne.layers import ElemwiseSumLayer, InputLayer
        import lasagne
        import theano.tensor as T
        import numpy as np
        import theano
        a, b = T.matrices('a', 'b')
        a_ = np.ones((2, 1), dtype=theano.config.floatX)
        b_ = np.ones((2, 5), dtype=theano.config.floatX)
        l_a = InputLayer((2, 1))
        l_b = InputLayer((2, 5))
        l_o = ElemwiseSumLayer([l_a, l_b])
        shp = l_o.output_shape  # set broadcastable table
        output = lasagne.layers.get_output(l_o, {
            l_a: a,
            l_b: b
        }).eval({
            a: a_,
            b: b_
        })
        np.testing.assert_array_almost_equal(output, np.ones((2, 5)) + 1.0)
        assert shp == output.shape

        # test that None dimensions are not modified
        l_a = InputLayer((2, None))
        l_b = InputLayer((2, None))
        l_o = ElemwiseSumLayer([l_a, l_b])
        shp = l_o.output_shape  # set broadcastable table
        a = T.addbroadcast(a, 1)
        output = lasagne.layers.get_output(l_o, {
            l_a: a,
            l_b: b
        }).eval({
            a: a_,
            b: b_
        })
        np.testing.assert_array_almost_equal(output, np.ones((2, 5)) + 1.0)
        assert shp == (2, None)
示例#3
0
def get_rnn_unit(l_in,
                 mask,
                 rev_mask,
                 state,
                 rev_state,
                 n_units,
                 prefix,
                 grad_clip=0,
                 context=None,
                 attention=False):

    net = OrderedDict()
    hid = state
    rg = Gate(W_in=input, W_hid=inner, W_cell=None)
    ug = Gate(W_in=input, W_hid=inner, W_cell=None)
    hg = Gate(W_in=input, W_hid=inner, W_cell=None, nonlinearity=tanh)

    net[prefix + 'gru'] = GRULayer(l_in,
                                   num_units=n_units,
                                   resetgate=rg,
                                   updategate=ug,
                                   hidden_update=hg,
                                   mask_input=mask,
                                   hid_init=hid,
                                   learn_init=False,
                                   only_return_final=False,
                                   grad_clipping=grad_clip,
                                   context_input=context,
                                   use_attention=attention,
                                   name='gru')

    if rev_mask is not None and rev_state is not None:
        net[prefix + 'gru_rev'] = GRULayer(l_in,
                                           num_units=n_units,
                                           resetgate=rg,
                                           updategate=ug,
                                           hidden_update=hg,
                                           mask_input=rev_mask,
                                           hid_init=rev_state,
                                           only_return_final=False,
                                           learn_init=False,
                                           grad_clipping=grad_clip,
                                           context_input=context,
                                           backwards=True,
                                           name='gru_rev')

        net['context'] = ElemwiseSumLayer(net.values()[-2:], name='context')

    return net
示例#4
0
def create_blstm(input_vars, mask_vars, num_inputs, depth, hidden_layer_size,
                 num_outputs):
    network = lasagne.layers.InputLayer(shape=(None, 1, 1, num_inputs),
                                        input_var=input_vars)
    mask = InputLayer((None, None), mask_vars)
    network = GaussianNoiseLayer(network, sigma=0.01)
    for i in range(depth):
        forward = LSTMLayer(network,
                            hidden_layer_size,
                            mask_input=mask,
                            learn_init=True)
        backward = LSTMLayer(network,
                             hidden_layer_size,
                             mask_input=mask,
                             learn_init=True,
                             backwards=True)
        network = ElemwiseSumLayer([forward, backward])
    network = ReshapeLayer(network, (-1, hidden_layer_size))
    network = DenseLayer(network, num_outputs, nonlinearity=softmax)
    return network
示例#5
0
 def test_bad_coeffs_fails(self, layer):
     from lasagne.layers.merge import ElemwiseSumLayer
     with pytest.raises(ValueError):
         ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, 3, -1])
示例#6
0
 def crop_layer(self):
     from lasagne.layers.merge import ElemwiseSumLayer
     return ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, -1],
                             cropping=['lower'] * 2)
示例#7
0
 def layer(self):
     from lasagne.layers.merge import ElemwiseSumLayer
     return ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, -1])
 def test_bad_coeffs_fails(self, layer):
     from lasagne.layers.merge import ElemwiseSumLayer
     with pytest.raises(ValueError):
         l1 = Mock(output_shapes=((None, None), ))
         l2 = Mock(output_shapes=((None, None), ))
         ElemwiseSumLayer((l1, l2), coeffs=[2, 3, -1])
示例#9
0
def residual_block(
    l,
    batch_norm_alpha,
    batch_norm_epsilon,
    nonlinearity,
    survival_prob,
    add_after_nonlin,
    reduction_method,
    reduction_pool_mode,
    increase_units_factor=None,
    half_time=False,
    projection=False,
):
    assert survival_prob <= 1 and survival_prob >= 0
    input_num_filters = l.output_shape[1]
    if increase_units_factor is not None:
        out_num_filters = int(input_num_filters * increase_units_factor)
        assert (out_num_filters - input_num_filters) % 2 == 0, (
            "Need even "
            "number of extra channels in order to be able to pad correctly")
    else:
        out_num_filters = input_num_filters

    if (not half_time) or (reduction_method == 'conv'):
        stack_1 = batch_norm(Conv2DLayer(l,
                                         num_filters=out_num_filters,
                                         filter_size=(3, 3),
                                         stride=(1, 1),
                                         nonlinearity=nonlinearity,
                                         pad='same',
                                         W=lasagne.init.HeNormal(gain='relu')),
                             epsilon=batch_norm_epsilon,
                             alpha=batch_norm_alpha)
    else:
        assert half_time and reduction_method == 'pool'
        stack_1 = Pool2DLayer(l,
                              pool_size=(3, 1),
                              stride=(1, 1),
                              pad=(1, 0),
                              mode=reduction_pool_mode)
        # 1x1 conv here, therefore can do stride later without problems
        # otherwise would have to do stride here before
        # and make extra if condition later (only reshape with stride
        # in case of reduction method conv)...
        stack_1 = batch_norm(Conv2DLayer(stack_1,
                                         num_filters=out_num_filters,
                                         filter_size=(1, 1),
                                         stride=(1, 1),
                                         nonlinearity=nonlinearity,
                                         pad='same',
                                         W=lasagne.init.HeNormal(gain='relu')),
                             epsilon=batch_norm_epsilon,
                             alpha=batch_norm_alpha)

    if half_time:
        stack_1 = StrideReshapeLayer(stack_1, n_stride=2)
    stack_2 = batch_norm(Conv2DLayer(stack_1,
                                     num_filters=out_num_filters,
                                     filter_size=(3, 3),
                                     stride=(1, 1),
                                     nonlinearity=None,
                                     pad='same',
                                     W=lasagne.init.HeNormal(gain='relu')),
                         epsilon=batch_norm_epsilon,
                         alpha=batch_norm_alpha)

    # add shortcut connections
    shortcut = l
    if half_time:
        # note since we are only reshaping
        # this is ok both for later identity and later projection
        # 1x1 conv of projection is same if we do it before or after this reshape
        # (would not be true if it was anything but 1x1 conv(!))
        shortcut = StrideReshapeLayer(shortcut, n_stride=2)
    if increase_units_factor is not None:
        if projection:
            # projection shortcut, as option B in paper
            shortcut = batch_norm(Conv2DLayer(shortcut,
                                              num_filters=out_num_filters,
                                              filter_size=(1, 1),
                                              stride=(1, 1),
                                              nonlinearity=None,
                                              pad='same',
                                              b=None),
                                  epsilon=batch_norm_epsilon,
                                  alpha=batch_norm_alpha)
        else:
            # identity shortcut, as option A in paper
            n_extra_chans = out_num_filters - input_num_filters
            shortcut = PadLayer(shortcut, [n_extra_chans // 2, 0, 0],
                                batch_ndim=1)
    if add_after_nonlin:
        stack_2 = NonlinearityLayer(stack_2)
        block = ElemwiseSumLayer([stack_2, shortcut])
    else:
        block = NonlinearityLayer(ElemwiseSumLayer([stack_2, shortcut]),
                                  nonlinearity=nonlinearity)
    if survival_prob != 1:
        # Hack to make both be broadcastable along empty third dim
        # Otherwise I get an error that they are of different type:
        # shortcut: TensorType(False,False,False,True)
        # block: TensorType4d(32) or sth
        shortcut = ExpressionLayer(shortcut, lambda x: T.addbroadcast(x, 3))
        block = ExpressionLayer(block, lambda x: T.addbroadcast(x, 3))
        block = RandomSwitchLayer(block, shortcut, survival_prob)
    return block