def crop_layer(self): from lasagne.layers.merge import ElemwiseSumLayer l1 = Mock(output_shapes=((None, None), )) l2 = Mock(output_shapes=((None, None), )) return ElemwiseSumLayer((l1, l2), coeffs=[2, -1], cropping=['lower'] * 2)
def test_broadcasting_pattern(self): from lasagne.layers import ElemwiseSumLayer, InputLayer import lasagne import theano.tensor as T import numpy as np import theano a, b = T.matrices('a', 'b') a_ = np.ones((2, 1), dtype=theano.config.floatX) b_ = np.ones((2, 5), dtype=theano.config.floatX) l_a = InputLayer((2, 1)) l_b = InputLayer((2, 5)) l_o = ElemwiseSumLayer([l_a, l_b]) shp = l_o.output_shape # set broadcastable table output = lasagne.layers.get_output(l_o, { l_a: a, l_b: b }).eval({ a: a_, b: b_ }) np.testing.assert_array_almost_equal(output, np.ones((2, 5)) + 1.0) assert shp == output.shape # test that None dimensions are not modified l_a = InputLayer((2, None)) l_b = InputLayer((2, None)) l_o = ElemwiseSumLayer([l_a, l_b]) shp = l_o.output_shape # set broadcastable table a = T.addbroadcast(a, 1) output = lasagne.layers.get_output(l_o, { l_a: a, l_b: b }).eval({ a: a_, b: b_ }) np.testing.assert_array_almost_equal(output, np.ones((2, 5)) + 1.0) assert shp == (2, None)
def get_rnn_unit(l_in, mask, rev_mask, state, rev_state, n_units, prefix, grad_clip=0, context=None, attention=False): net = OrderedDict() hid = state rg = Gate(W_in=input, W_hid=inner, W_cell=None) ug = Gate(W_in=input, W_hid=inner, W_cell=None) hg = Gate(W_in=input, W_hid=inner, W_cell=None, nonlinearity=tanh) net[prefix + 'gru'] = GRULayer(l_in, num_units=n_units, resetgate=rg, updategate=ug, hidden_update=hg, mask_input=mask, hid_init=hid, learn_init=False, only_return_final=False, grad_clipping=grad_clip, context_input=context, use_attention=attention, name='gru') if rev_mask is not None and rev_state is not None: net[prefix + 'gru_rev'] = GRULayer(l_in, num_units=n_units, resetgate=rg, updategate=ug, hidden_update=hg, mask_input=rev_mask, hid_init=rev_state, only_return_final=False, learn_init=False, grad_clipping=grad_clip, context_input=context, backwards=True, name='gru_rev') net['context'] = ElemwiseSumLayer(net.values()[-2:], name='context') return net
def create_blstm(input_vars, mask_vars, num_inputs, depth, hidden_layer_size, num_outputs): network = lasagne.layers.InputLayer(shape=(None, 1, 1, num_inputs), input_var=input_vars) mask = InputLayer((None, None), mask_vars) network = GaussianNoiseLayer(network, sigma=0.01) for i in range(depth): forward = LSTMLayer(network, hidden_layer_size, mask_input=mask, learn_init=True) backward = LSTMLayer(network, hidden_layer_size, mask_input=mask, learn_init=True, backwards=True) network = ElemwiseSumLayer([forward, backward]) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) return network
def test_bad_coeffs_fails(self, layer): from lasagne.layers.merge import ElemwiseSumLayer with pytest.raises(ValueError): ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, 3, -1])
def crop_layer(self): from lasagne.layers.merge import ElemwiseSumLayer return ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, -1], cropping=['lower'] * 2)
def layer(self): from lasagne.layers.merge import ElemwiseSumLayer return ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, -1])
def test_bad_coeffs_fails(self, layer): from lasagne.layers.merge import ElemwiseSumLayer with pytest.raises(ValueError): l1 = Mock(output_shapes=((None, None), )) l2 = Mock(output_shapes=((None, None), )) ElemwiseSumLayer((l1, l2), coeffs=[2, 3, -1])
def residual_block( l, batch_norm_alpha, batch_norm_epsilon, nonlinearity, survival_prob, add_after_nonlin, reduction_method, reduction_pool_mode, increase_units_factor=None, half_time=False, projection=False, ): assert survival_prob <= 1 and survival_prob >= 0 input_num_filters = l.output_shape[1] if increase_units_factor is not None: out_num_filters = int(input_num_filters * increase_units_factor) assert (out_num_filters - input_num_filters) % 2 == 0, ( "Need even " "number of extra channels in order to be able to pad correctly") else: out_num_filters = input_num_filters if (not half_time) or (reduction_method == 'conv'): stack_1 = batch_norm(Conv2DLayer(l, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=nonlinearity, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) else: assert half_time and reduction_method == 'pool' stack_1 = Pool2DLayer(l, pool_size=(3, 1), stride=(1, 1), pad=(1, 0), mode=reduction_pool_mode) # 1x1 conv here, therefore can do stride later without problems # otherwise would have to do stride here before # and make extra if condition later (only reshape with stride # in case of reduction method conv)... stack_1 = batch_norm(Conv2DLayer(stack_1, num_filters=out_num_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=nonlinearity, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) if half_time: stack_1 = StrideReshapeLayer(stack_1, n_stride=2) stack_2 = batch_norm(Conv2DLayer(stack_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) # add shortcut connections shortcut = l if half_time: # note since we are only reshaping # this is ok both for later identity and later projection # 1x1 conv of projection is same if we do it before or after this reshape # (would not be true if it was anything but 1x1 conv(!)) shortcut = StrideReshapeLayer(shortcut, n_stride=2) if increase_units_factor is not None: if projection: # projection shortcut, as option B in paper shortcut = batch_norm(Conv2DLayer(shortcut, num_filters=out_num_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) else: # identity shortcut, as option A in paper n_extra_chans = out_num_filters - input_num_filters shortcut = PadLayer(shortcut, [n_extra_chans // 2, 0, 0], batch_ndim=1) if add_after_nonlin: stack_2 = NonlinearityLayer(stack_2) block = ElemwiseSumLayer([stack_2, shortcut]) else: block = NonlinearityLayer(ElemwiseSumLayer([stack_2, shortcut]), nonlinearity=nonlinearity) if survival_prob != 1: # Hack to make both be broadcastable along empty third dim # Otherwise I get an error that they are of different type: # shortcut: TensorType(False,False,False,True) # block: TensorType4d(32) or sth shortcut = ExpressionLayer(shortcut, lambda x: T.addbroadcast(x, 3)) block = ExpressionLayer(block, lambda x: T.addbroadcast(x, 3)) block = RandomSwitchLayer(block, shortcut, survival_prob) return block