Exemplo n.º 1
0
def test_orthogonal_multi():
    import numpy as np
    from lasagne.init import Orthogonal

    sample = Orthogonal().sample((100, 50, 80))
    sample = sample.reshape(100, 50*80)
    assert np.allclose(np.dot(sample, sample.T), np.eye(100), atol=1e-6)
Exemplo n.º 2
0
def test_orthogonal_multi():
    import numpy as np
    from lasagne.init import Orthogonal

    sample = Orthogonal().sample((100, 50, 80))
    sample = sample.reshape(100, 50 * 80)
    assert np.allclose(np.dot(sample, sample.T), np.eye(100), atol=1e-6)
Exemplo n.º 3
0
def test_orthogonal():
    import numpy as np
    from lasagne.init import Orthogonal

    sample = Orthogonal().sample((100, 200))
    assert np.allclose(np.dot(sample, sample.T), np.eye(100), atol=1e-6)

    sample = Orthogonal().sample((200, 100))
    assert np.allclose(np.dot(sample.T, sample), np.eye(100), atol=1e-6)
Exemplo n.º 4
0
def highway_dense(incoming, Wh=Orthogonal(), bh=Constant(0.0),
                  Wt=Orthogonal(), bt=Constant(-4.0),
                  nonlinearity=rectify, **kwargs):
    num_inputs = int(np.prod(incoming.output_shape[1:]))

    l_h = DenseLayer(incoming, num_units=num_inputs, W=Wh, b=bh, nonlinearity=nonlinearity)
    l_t = DenseLayer(incoming, num_units=num_inputs, W=Wt, b=bt, nonlinearity=sigmoid)

    return MultiplicativeGatingLayer(gate=l_t, input1=l_h, input2=incoming)
Exemplo n.º 5
0
def reset():
    if any(np.isnan(scale.get_value()) for scale in scales):
        for scale in scales:
            scale.set_value(1.)
    for l in l_hiddens:
        l.b.set_value(Constant()(l.b.get_value().shape))
        l.W.set_value(Orthogonal()(l.W.get_value().shape))
    l_out.b.set_value(Constant()(l_out.b.get_value().shape))
    l_out.W.set_value(Orthogonal()(l_out.W.get_value().shape))
    for p in (p for u in (updates_ada, updates_other, updates_scal) for p in u
              if p not in get_all_params(l_out)):
        p.set_value(Constant()(p.get_value().shape))
def test_orthogonal_gain():
    import numpy as np
    from lasagne.init import Orthogonal

    gain = 2
    sample = Orthogonal(gain).sample((100, 200))
    assert np.allclose(np.dot(sample, sample.T),
                       gain * gain * np.eye(100),
                       atol=1e-6)

    gain = np.sqrt(2)
    sample = Orthogonal('relu').sample((100, 200))
    assert np.allclose(np.dot(sample, sample.T),
                       gain * gain * np.eye(100),
                       atol=1e-6)
Exemplo n.º 7
0
def define_lat_pars_dict(d):
	"""
	Defines the parameters and neural networks pertaining the Latent Evolution
	Model
	
	Args:
		d:	The object-dictionary
	Output:
		LatParsDict:		A dictionary containing all these hyperparameters and NNs, to be
						fed to the Latent Evolution Model
	"""
	xDim = d.xDim
	nnodesEvlv = d.nnodesMevlv
	
	NNEvolve = InputLayer((None, xDim), name='Ev_IL')
	NNEvolve = DenseLayer(NNEvolve, nnodesEvlv, nonlinearity=softmax, W=Orthogonal(), 
						num_leading_axes=1, name='Ev_HL1')
	NNEvolve = DenseLayer(NNEvolve, xDim**2, nonlinearity=linear, W=Uniform(0.9), 
						num_leading_axes=1, name='Ev_OL')
	
	cmn_dct = dict([('NNEvolve', NNEvolve),
					('alpha', options.alpha)
					])
	LatParsDict = npdict_to_theanodict(cmn_dct)
	
	return LatParsDict
Exemplo n.º 8
0
def define_rec_pars_dict(d, MuX_layers_In=None, LX_layers_In=None):
	"""
	Defines the parameters and neural networks pertaining the Recognition Model.
	
	Args:
		d:	The object-dictionary
	Output:
		RecParsDict:		A dictionary containing all these hyperparameters and NNs, to be
						fed to the Recognition Model
	"""
#	 outWrecscale = d.outWrecscale
	mux_dpth = d.mux_depth
	nnodesMrec = d.nnodesMrec
	act_func_dict = {'softplus' : softplus, 'tanh' : tanh}
	nl = act_func_dict[d.act_func_hls]

	NNMuX = InputLayer((None, None, d.yDim))
	for i in range(mux_dpth):
		NNMuX = DenseLayer(NNMuX, nnodesMrec, nonlinearity=nl, W=Normal(0.5), 
						num_leading_axes=2, name='MuX_HL' + str(i))
	NNMuX = DenseLayer(NNMuX, d.xDim, nonlinearity=linear, W=Orthogonal(), 
					num_leading_axes=2, name='MuX_OL')
# 	if d.loadfit:
# 		assert MuX_layers_In is not None, "List of layers not provided."
# 		MuX_layers = lasagne.layers.get_all_layers(NNMuX)[1:]
# 		init_layers(MuX_layers_In, MuX_layers)
	
	
	LambdaX_dpth = d.lbdaxdepth
	NNLambdaX = lasagne.layers.InputLayer((None, None, d.yDim))
	for i in range(LambdaX_dpth):
		NNLambdaX = DenseLayer(NNLambdaX, nnodesMrec, nonlinearity=nl, W=Normal(0.5), 
							num_leading_axes=2, name='LX_HL' + str(i))
	NNLambdaX = DenseLayer(NNLambdaX, d.xDim**2, nonlinearity=linear, W=Orthogonal(), 
						num_leading_axes=2, name='LX_OL')
# 	if options.loadfit:
# 		assert LX_layers_In is not None, "List of layers not provided."
# 		LX_layers = lasagne.layers.get_all_layers(NNLambdaX)[1:]
# 		init_layers(LX_layers_In, LX_layers)
	
	RecParsDict = {'LATCLASS' : LocallyLinearEvolution, 
				   'NNMuX' : NNMuX, 
				   'NNLambdaX' : NNLambdaX}
	
	return RecParsDict
Exemplo n.º 9
0
def build_segmenter_simple():
    inp = ll.InputLayer(shape=(None, 1, None, None), name='input')
    conv1 = ll.Conv2DLayer(inp,
                           num_filters=32,
                           filter_size=(7, 7),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1')
    conv2 = ll.Conv2DLayer(conv1,
                           num_filters=64,
                           filter_size=(5, 5),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2')
    conv3 = ll.Conv2DLayer(conv2,
                           num_filters=128,
                           filter_size=(5, 5),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3')
    conv4 = ll.Conv2DLayer(conv3,
                           num_filters=64,
                           filter_size=(5, 5),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4')
    conv5 = ll.Conv2DLayer(conv4,
                           num_filters=32,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv5')
    conv6 = ll.Conv2DLayer(conv5,
                           num_filters=16,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv6')

    # our output layer is also convolutional, remember that our Y is going to be the same exact size as the
    conv_final = ll.Conv2DLayer(conv6,
                                num_filters=2,
                                filter_size=(3, 3),
                                pad='same',
                                W=Orthogonal(),
                                name='conv_final',
                                nonlinearity=linear)
    # we need to reshape it to be a (batch*n*m x 3), i.e. unroll s.t. the feature dimension is preserved
    softmax = Softmax4D(conv_final, name='4dsoftmax')

    return [softmax]
Exemplo n.º 10
0
def define_obs_pars_dict(d, MuY_layers_In=None):
	"""
	Defines the parameters and neural networks pertaining the Observation Model.
	
	Args:
		d:	The object-dictionary
	Output:
		ObsParsDict:		A dictionary containing all these hyperparameters and NNs, to be
						fed to the Observation Model
	"""
	act_func_dict = {'softplus' : softplus, 'tanh' : tanh}
	output_nl_dict = {'linear' : linear, 'softplus'  : softplus}
	
	output_nl = output_nl_dict[d.output_nl]
	nl = act_func_dict[d.act_func_hls]
	
	nnodesMrec = d.nnodesMrec
	MuY_dpth = d.muydepth
	
	NNMuY = InputLayer((None, None, d.xDim), name='MuY_IL')
	for i in range(MuY_dpth):
		NNMuY = DenseLayer(NNMuY, nnodesMrec, nonlinearity=nl, W=Orthogonal(), 
						num_leading_axes=2, name='MuY_HL' + str(i))
	NNMuY = DenseLayer(NNMuY, d.yDim, nonlinearity=output_nl, W=Orthogonal(), 
					num_leading_axes=2, name='MuY_OL')
	
	# Initialize layers weight if so desired.
# 	if d.loadfit:
# 		MuY_layers = lasagne.layers.get_all_layers(NNMuY)[1:]
# 		init_layers(MuY_layers_In, MuY_layers)
	
	ObsParsDict = {'NNMuY' : NNMuY, 
				   'NNMuY_W' : d.outWgenscale, 
				   'NNMuY_b' : d.outbgenscale, 
				   'LATCLASS' : LocallyLinearEvolution,
				   'is_out_positive' : d.is_out_positive}
	
	return ObsParsDict
Exemplo n.º 11
0
def build_segmenter_simple_absurd_res():
    sys.setrecursionlimit(1500)
    inp = ll.InputLayer(shape=(None, 1, None, None), name='input')
    n_layers = 64  # should get a 128 x 128 receptive field
    layers = [inp]
    for i in range(n_layers):
        # every 2 layers, add a skip connection
        layers.append(
            ll.Conv2DLayer(layers[-1],
                           num_filters=8,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=linear,
                           name='conv%d' % (i + 1)))
        layers.append(ll.BatchNormLayer(layers[-1], name='bn%i' % (i + 1)))
        if (i % 2 == 0) and (i != 0):
            layers.append(
                ll.ElemwiseSumLayer([
                    layers[-1],  # prev layer
                    layers[-6],
                ]  # 3 actual layers per block, skip the previous block
                                    ))
        layers.append(ll.NonlinearityLayer(layers[-1], nonlinearity=rectify))

    # our output layer is also convolutional, remember that our Y is going to be the same exact size as the
    conv_final = ll.Conv2DLayer(layers[-1],
                                num_filters=2,
                                filter_size=(3, 3),
                                pad='same',
                                W=Orthogonal(),
                                name='conv_final',
                                nonlinearity=linear)
    # we need to reshape it to be a (batch*n*m x 3), i.e. unroll s.t. the feature dimension is preserved
    softmax = Softmax4D(conv_final, name='4dsoftmax')

    return [softmax]
Exemplo n.º 12
0
    def __init__(self,
                 ObsPars,
                 yDim,
                 xDim,
                 Y=None,
                 X=None,
                 lat_ev_model=None,
                 LATCLASS=LocallyLinearEvolution):
        """
        """
        self.yDim = yDim
        self.xDim = xDim
        self.Y = Y = T.tensor3('Y') if Y is None else Y

        if lat_ev_model is None:
            self.common_lat = False
            self.X = X = T.tensor3('X') if X is None else X
            self.lat_ev_model = lat_ev_model = LATCLASS({},
                                                        xDim,
                                                        X,
                                                        nnname='GenEv')
        else:
            self.common_lat = True
            self.lat_ev_model = lat_ev_model
            self.X = lat_ev_model.get_X() if X is None else X

        for key in ObsPars.keys():
            setattr(self, key, ObsPars[key])

        if not hasattr(self, 'is_out_positive'): self.is_out_positive = True
        if not hasattr(self, 'NNMuY_W'): self.NNMuY_W = 1.0
        if not hasattr(self, 'NNMuY_b'): self.NNMuY_b = 1.0
        if not hasattr(self, 'NNMuY'):
            NNMuY = InputLayer((None, None, self.xDim), name='MuY_IL')
            self.NNMuY = DenseLayer(NNMuY,
                                    self.yDim,
                                    nonlinearity=softplus,
                                    W=Orthogonal(),
                                    num_leading_axes=2,
                                    name='MuY_OL')
            self.NNMuY.W.set_value(self.NNMuY_W * self.NNMuY.W.get_value())
            self.NNMuY.b.set_value(self.NNMuY_b * np.ones(self.yDim) + 2.0 *
                                   (np.random.rand(yDim)))
        inv_tau = 0.2
        self.Rate = (lasagne.layers.get_output(
            self.NNMuY, self.X) if self.is_out_positive else T.exp(
                inv_tau * lasagne.layers.get_output(self.NNMuY, self.X)))
Exemplo n.º 13
0
def define_model(N_HIDDEN,
                 depth,
                 LEARNING_RATE=0.01,
                 GRAD_CLIP=100,
                 trans_vocab_size=0,
                 vocab_size=0,
                 is_train=False):

    ### Defines lasagne model
    ### Returns output layer and theano functions for training and computing the cost

    l_input = lasagne.layers.InputLayer(shape=(None, None, trans_vocab_size))
    network = l_input
    symbolic_batch_size = lasagne.layers.get_output(network).shape[0]

    while depth > 0:

        l_forward = lasagne.layers.LSTMLayer(
            network,
            N_HIDDEN,
            grad_clipping=GRAD_CLIP,
            ingate=lasagne.layers.Gate(W_in=Orthogonal(gain=1.5),
                                       W_hid=Orthogonal(gain=1.5),
                                       W_cell=Normal(0.1)),
            forgetgate=lasagne.layers.Gate(W_in=Orthogonal(gain=1.5),
                                           W_hid=Orthogonal(gain=1.5),
                                           W_cell=Normal(0.1)),
            cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh,
                                     W_in=Orthogonal(gain=1.5),
                                     W_hid=Orthogonal(gain=1.5)),
            outgate=lasagne.layers.Gate(W_in=Orthogonal(gain=1.5),
                                        W_hid=Orthogonal(gain=1.5),
                                        W_cell=Normal(0.1)),
            backwards=False)
        l_backward = lasagne.layers.LSTMLayer(
            network,
            N_HIDDEN,
            grad_clipping=GRAD_CLIP,
            ingate=lasagne.layers.Gate(W_in=Orthogonal(gain=1.5),
                                       W_hid=Orthogonal(gain=1.5),
                                       W_cell=Normal(0.1)),
            forgetgate=lasagne.layers.Gate(W_in=Orthogonal(gain=1.5),
                                           W_hid=Orthogonal(gain=1.5),
                                           W_cell=Normal(0.1)),
            cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh,
                                     W_in=Orthogonal(gain=1.5),
                                     W_hid=Orthogonal(gain=1.5)),
            outgate=lasagne.layers.Gate(W_in=Orthogonal(gain=1.5),
                                        W_hid=Orthogonal(gain=1.5),
                                        W_cell=Normal(0.1)),
            backwards=True)

        if depth == 1:
            l_cell_forward = LSTMLayer(
                network,
                N_HIDDEN,
                grad_clipping=GRAD_CLIP,
                ingate=lasagne.layers.Gate(
                    W_in=l_forward.W_in_to_ingate,
                    W_hid=l_forward.W_hid_to_ingate,
                    #                                        W_cell=l_forward.W_cell_to_ingate,
                    b=l_forward.b_ingate),
                forgetgate=lasagne.layers.Gate(
                    W_in=l_forward.W_in_to_forgetgate,
                    W_hid=l_forward.W_hid_to_forgetgate,
                    #                                       W_cell=l_forward.W_cell_to_forgetgate,
                    b=l_forward.b_forgetgate),
                cell=lasagne.layers.Gate(
                    W_cell=None,
                    nonlinearity=lasagne.nonlinearities.tanh,
                    W_in=l_forward.W_in_to_cell,
                    W_hid=l_forward.W_hid_to_cell,
                    b=l_forward.b_cell),
                outgate=lasagne.layers.Gate(
                    W_in=l_forward.W_in_to_outgate,
                    W_hid=l_forward.W_hid_to_outgate,
                    #                                        W_cell=l_forward.W_cell_to_outgate,
                    b=l_forward.b_outgate),
                backwards=False,
                peepholes=False)

            l_cell_backwards = LSTMLayer(
                network,
                N_HIDDEN,
                grad_clipping=GRAD_CLIP,
                ingate=lasagne.layers.Gate(
                    W_in=l_backward.W_in_to_ingate,
                    W_hid=l_backward.W_hid_to_ingate,
                    #                                        W_cell=l_backward.W_cell_to_ingate,
                    b=l_backward.b_ingate),
                forgetgate=lasagne.layers.Gate(
                    W_in=l_backward.W_in_to_forgetgate,
                    W_hid=l_backward.W_hid_to_forgetgate,
                    #                                        W_cell=l_backward.W_cell_to_forgetgate,
                    b=l_backward.b_forgetgate),
                cell=lasagne.layers.Gate(
                    W_cell=None,
                    nonlinearity=lasagne.nonlinearities.tanh,
                    W_in=l_backward.W_in_to_cell,
                    W_hid=l_backward.W_hid_to_cell,
                    b=l_backward.b_cell),
                outgate=lasagne.layers.Gate(
                    W_in=l_backward.W_in_to_outgate,
                    W_hid=l_backward.W_hid_to_outgate,
                    #                                        W_cell=l_backward.W_cell_to_outgate,
                    b=l_backward.b_outgate),
                backwards=True,
                peepholes=False)

        concat_layer = lasagne.layers.ConcatLayer(
            incomings=[l_forward, l_backward], axis=2)
        concat_layer = lasagne.layers.ReshapeLayer(concat_layer,
                                                   (-1, 2 * N_HIDDEN))
        network = lasagne.layers.DenseLayer(
            concat_layer,
            num_units=N_HIDDEN,
            W=Orthogonal(),
            nonlinearity=lasagne.nonlinearities.tanh)
        network = lasagne.layers.ReshapeLayer(
            network, (symbolic_batch_size, -1, N_HIDDEN))

        depth -= 1

    network = lasagne.layers.ReshapeLayer(network, (-1, N_HIDDEN))
    l_input_reshape = lasagne.layers.ReshapeLayer(l_input,
                                                  (-1, trans_vocab_size))
    network = lasagne.layers.ConcatLayer(incomings=[network, l_input_reshape],
                                         axis=1)

    l_out = lasagne.layers.DenseLayer(
        network,
        num_units=vocab_size,
        W=lasagne.init.Normal(),
        nonlinearity=lasagne.nonlinearities.softmax)

    target_values = T.dmatrix('target_output')

    network_output = lasagne.layers.get_output(l_out)
    network = lasagne.layers.get_output(network)
    concat_layer = lasagne.layers.get_output(concat_layer)
    last_lstm_cells_forward = lasagne.layers.get_output(l_cell_forward)
    last_lstm_cells_backwards = lasagne.layers.get_output(l_cell_backwards)
    #gates = l_cell_forward.get_gates()

    cost = T.nnet.categorical_crossentropy(network_output,
                                           target_values).mean()

    all_params = lasagne.layers.get_all_params(l_out, trainable=True)

    print("Compiling Functions ...")

    if is_train:

        print("Computing Updates ...")
        #updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE)
        updates = lasagne.updates.adam(
            cost, all_params, beta1=0.5,
            learning_rate=LEARNING_RATE)  # from DCGAN paper

        compute_cost = theano.function([l_input.input_var, target_values],
                                       cost,
                                       allow_input_downcast=True)
        train = theano.function([l_input.input_var, target_values],
                                cost,
                                updates=updates,
                                allow_input_downcast=True)
        return (l_out, train, compute_cost)

    else:
        guess = theano.function(
            [l_input.input_var],
            [
                network_output, network, concat_layer, last_lstm_cells_forward,
                last_lstm_cells_backwards
                #gates[0], gates[1], gates[2]
            ],
            allow_input_downcast=True)
        return (l_out, guess)
Exemplo n.º 14
0
from lasagne.nonlinearities import softmax, tanh, sigmoid, rectify, LeakyRectify

seed(SEED)
print 'set random seed to {0} while loading NNet'.format(SEED)

nonlinearities = {
    'tanh': tanh,
    'sigmoid': sigmoid,
    'rectify': rectify,
    'leaky2': LeakyRectify(leakiness=0.02),
    'leaky20': LeakyRectify(leakiness=0.2),
    'softmax': softmax,
}

initializers = {
    'orthogonal': Orthogonal(),
    'sparse': Sparse(),
    'glorot_normal': GlorotNormal(),
    'glorot_uniform': GlorotUniform(),
    'he_normal': HeNormal(),
    'he_uniform': HeUniform(),
}


class NNet(BaseEstimator, ClassifierMixin):
    def __init__(
        self,
        name='nameless_net',  # used for saving, so maybe make it unique
        dense1_size=60,
        dense1_nonlinearity='tanh',
        dense1_init='orthogonal',
Exemplo n.º 15
0
                                      num_units=num_rnn_units,
                                      only_return_final=True,
                                      backwards=True,
                                      grad_clipping=GRAD_CLIP,
                                      nonlinearity=lasagne.nonlinearities.tanh,
                                      mask_input=net['mask'])
    net['lstm7'] = lasagne.layers.ConcatLayer(
        [net['lstm7_forward'], net['lstm7_backward']])
else:
    net['lstm7'] = LSTMLayer(net['fc6_resize'],
                             num_units=num_rnn_units,
                             unroll_scan=True,
                             only_return_final=True,
                             grad_clipping=GRAD_CLIP,
                             nonlinearity=lasagne.nonlinearities.tanh,
                             cell_init=Orthogonal(),
                             hid_init=Orthogonal(),
                             learn_init=True,
                             mask_input=net['mask'])

print 'dim_lstm7:', net['lstm7'].output_shape
net['lstm7_dropout'] = DropoutLayer(net['lstm7'], p=0.5)
# l_fc8 = DenseLayer(net['lstm7']_dropout, num_units=num_classes, W=HeNormal(), nonlinearity=softmax)
net['fc8-1'] = DenseLayer(net['lstm7_dropout'],
                          num_units=num_classes,
                          nonlinearity=None,
                          W=lasagne.init.Normal(std=0.01),
                          b=lasagne.init.Constant(0.))
net['prob'] = NonlinearityLayer(net['fc8-1'], softmax)

########################################################################################################################
Exemplo n.º 16
0
 def __init__(self, train_raw, test_raw, dim, mode, l2, l1,
              batch_norm, dropout, batch_size,
              ihm_C, los_C, ph_C, decomp_C,
              partition, nbins, **kwargs):
             
     print "==> not used params in network class:", kwargs.keys()
     self.train_raw = train_raw
     self.test_raw = test_raw
     
     self.dim = dim
     self.mode = mode
     self.l2 = l2
     self.l1 = l1
     self.batch_norm = batch_norm
     self.dropout = dropout
     self.batch_size = batch_size
     self.ihm_C = ihm_C
     self.los_C = los_C
     self.ph_C = ph_C
     self.decomp_C = decomp_C
     self.nbins = nbins
     
     if (partition == 'log'):
         self.get_bin = metrics.get_bin_log
         self.get_estimate = metrics.get_estimate_log
     else:
         assert self.nbins == 10
         self.get_bin = metrics.get_bin_custom
         self.get_estimate = metrics.get_estimate_custom
     
     self.train_batch_gen = self.get_batch_gen(self.train_raw)
     self.test_batch_gen = self.get_batch_gen(self.test_raw)    
     
     self.input_var = T.tensor3('X')
     self.input_lens = T.ivector('L')
     
     self.ihm_pos = T.ivector('ihm_pos')
     self.ihm_mask = T.ivector('ihm_mask')
     self.ihm_label = T.ivector('ihm_label')
     
     self.los_mask = T.imatrix('los_mask')
     self.los_label = T.matrix('los_label') # for regression
     #self.los_label = T.imatrix('los_label')
     
     self.ph_label = T.imatrix('ph_label')
     
     self.decomp_mask = T.imatrix('decomp_mask')
     self.decomp_label = T.imatrix('decomp_label')
     
     print "==> Building neural network"
     
     # common network
     network = layers.InputLayer((None, None, self.train_raw[0][0].shape[1]), 
                                 input_var=self.input_var)
     
     if (self.dropout > 0):
         network = layers.DropoutLayer(network, p=self.dropout)
     
     network = layers.LSTMLayer(incoming=network, num_units=dim,
                                only_return_final=False,
                                grad_clipping=10,
                                ingate=lasagne.layers.Gate(
                                     W_in=Orthogonal(),
                                     W_hid=Orthogonal(),
                                     W_cell=Normal(0.1)),
                                forgetgate=lasagne.layers.Gate(
                                     W_in=Orthogonal(),
                                     W_hid=Orthogonal(),
                                     W_cell=Normal(0.1)),
                                cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh,
                                     W_in=Orthogonal(),
                                     W_hid=Orthogonal()),
                                outgate=lasagne.layers.Gate(
                                     W_in=Orthogonal(),
                                     W_hid=Orthogonal(),
                                     W_cell=Normal(0.1)))
     
     if (self.dropout > 0):
         network = layers.DropoutLayer(network, p=self.dropout)
     
     lstm_output = layers.get_output(network)
     self.params = layers.get_all_params(network, trainable=True)
     self.reg_params = layers.get_all_params(network, regularizable=True)
     
     # for each example in minibatch take the last output
     last_outputs = []
     for index in range(self.batch_size):
         last_outputs.append(lstm_output[index, self.input_lens[index]-1, :])
     last_outputs = T.stack(last_outputs)
     
     # take 48h outputs for fixed mortality task
     mid_outputs = []
     for index in range(self.batch_size):
         mid_outputs.append(lstm_output[index, self.ihm_pos[index], :])
     mid_outputs = T.stack(mid_outputs)
     
     
     # in-hospital mortality related network
     ihm_network = layers.InputLayer((None, dim), input_var=mid_outputs)
     ihm_network = layers.DenseLayer(incoming=ihm_network, num_units=2,
                                    nonlinearity=softmax)
     self.ihm_prediction = layers.get_output(ihm_network)
     self.ihm_det_prediction = layers.get_output(ihm_network, deterministic=True)
     self.params += layers.get_all_params(ihm_network, trainable=True)
     self.reg_params += layers.get_all_params(ihm_network, regularizable=True)
     self.ihm_loss = (self.ihm_mask * categorical_crossentropy(self.ihm_prediction, 
                                                       self.ihm_label)).mean()
     
     
     # length of stay related network
     # Regression
     los_network = layers.InputLayer((None, None, dim), input_var=lstm_output)
     los_network = layers.ReshapeLayer(los_network, (-1, dim))
     los_network = layers.DenseLayer(incoming=los_network, num_units=1,
                                     nonlinearity=rectify)
     los_network = layers.ReshapeLayer(los_network, (lstm_output.shape[0], -1))
     self.los_prediction = layers.get_output(los_network)
     self.los_det_prediction = layers.get_output(los_network, deterministic=True)
     self.params += layers.get_all_params(los_network, trainable=True)
     self.reg_params += layers.get_all_params(los_network, regularizable=True)
     self.los_loss = (self.los_mask * squared_error(self.los_prediction,
                                                   self.los_label)).mean(axis=1).mean(axis=0)
     
     
     # phenotype related network
     ph_network = layers.InputLayer((None, dim), input_var=last_outputs)
     ph_network = layers.DenseLayer(incoming=ph_network, num_units=25,
                                    nonlinearity=sigmoid)
     self.ph_prediction = layers.get_output(ph_network)
     self.ph_det_prediction = layers.get_output(ph_network, deterministic=True)
     self.params += layers.get_all_params(ph_network, trainable=True)
     self.reg_params += layers.get_all_params(ph_network, regularizable=True)
     self.ph_loss = nn_utils.multilabel_loss(self.ph_prediction, self.ph_label)
             
     
     # decompensation related network
     decomp_network = layers.InputLayer((None, None, dim), input_var=lstm_output)
     decomp_network = layers.ReshapeLayer(decomp_network, (-1, dim))
     decomp_network = layers.DenseLayer(incoming=decomp_network, num_units=2,
                                    nonlinearity=softmax)
     decomp_network = layers.ReshapeLayer(decomp_network, (lstm_output.shape[0], -1, 2))
     self.decomp_prediction = layers.get_output(decomp_network)[:, :, 1]
     self.decomp_det_prediction = layers.get_output(decomp_network, deterministic=True)[:, :, 1]
     self.params += layers.get_all_params(decomp_network, trainable=True)
     self.reg_params += layers.get_all_params(decomp_network, regularizable=True)
     self.decomp_loss = nn_utils.multilabel_loss_with_mask(self.decomp_prediction,
                                                       self.decomp_label,
                                                       self.decomp_mask)
     
     """
     data = next(self.train_batch_gen)
     print max(data[1])
     print lstm_output.eval({self.input_var:data[0]}).shape
     exit()
     """
     
     
     if self.l2 > 0: 
         self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params)
     else: 
         self.loss_l2 = T.constant(0)
     
     if self.l1 > 0: 
         self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params)
     else: 
         self.loss_l1 = T.constant(0)
     
     self.reg_loss = self.loss_l1 + self.loss_l2
     
     self.loss = (ihm_C * self.ihm_loss + los_C * self.los_loss + 
                  ph_C * self.ph_loss + decomp_C * self.decomp_loss + 
                  self.reg_loss)
           
     #updates = lasagne.updates.adadelta(self.loss, self.params,
     #                                    learning_rate=0.001)
     #updates = lasagne.updates.momentum(self.loss, self.params,
     #                                    learning_rate=0.00003)
     #updates = lasagne.updates.adam(self.loss, self.params)
     updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5,
                                    learning_rate=0.0001) # from DCGAN paper
     #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9,
     #                                             learning_rate=0.001,
     
     all_inputs = [self.input_var, self.input_lens,
                   self.ihm_pos, self.ihm_mask, self.ihm_label,
                   self.los_mask, self.los_label,
                   self.ph_label,
                   self.decomp_mask, self.decomp_label]
     
     train_outputs = [self.ihm_prediction, self.los_prediction,
                      self.ph_prediction, self.decomp_prediction,
                      self.loss,
                      self.ihm_loss, self.los_loss,
                      self.ph_loss, self.decomp_loss,
                      self.reg_loss]
                      
     test_outputs = [self.ihm_det_prediction, self.los_det_prediction,
                     self.ph_det_prediction, self.decomp_det_prediction,
                     self.loss,
                     self.ihm_loss, self.los_loss,
                     self.ph_loss, self.decomp_loss,
                     self.reg_loss]
     
     ## compiling theano functions
     if self.mode == 'train':
         print "==> compiling train_fn"
         self.train_fn = theano.function(inputs=all_inputs,
                                         outputs=train_outputs,
                                         updates=updates)
     
     print "==> compiling test_fn"
     self.test_fn = theano.function(inputs=all_inputs,
                                    outputs=test_outputs)
Exemplo n.º 17
0
import numpy as np
import re
import sys
import theano

from common import ENCODING, EPSILON, FMAX, FMIN, MAX_EPOCHS, MIN_EPOCHS, \
    NONMATCH_RE, NEGATIVE_IDX, NEUTRAL_IDX, POSITIVE_IDX, \
    floatX, sgd_updates_adadelta
from common import POSITIVE as POSITIVE_LBL
from common import NEGATIVE as NEGATIVE_LBL
from germanet import normalize

##################################################################
# Constants
SPACE_RE = re.compile(r"\s+")
ORTHOGONAL = Orthogonal()
HE_UNIFORM = HeUniform()


##################################################################
# Methods
def digitize_trainset(w2i, a_pos, a_neg, a_neut, a_pos_re, a_neg_re):
    """Method for generating sentiment lexicons using Velikovich's approach.

    @param a_N - number of terms to extract
    @param a_emb_fname - files of the original corpus
    @param a_pos - initial set of positive terms to be expanded
    @param a_neg - initial set of negative terms to be expanded
    @param a_neut - initial set of neutral terms to be expanded
    @param a_pos_re - regular expression for matching positive terms
    @param a_neg_re - regular expression for matching negative terms
Exemplo n.º 18
0
    ('pool2', layers.MaxPool2DLayer),
    ('conv31', layers.Conv2DLayer),
    ('conv32', layers.Conv2DLayer),
    ('conv33', layers.Conv2DLayer),
    ('pool3', layers.MaxPool2DLayer),
    ('dropout4', layers.DropoutLayer),
    ('hidden4', layers.DenseLayer),
    ('dropout5', layers.DropoutLayer),
    ('hidden5', layers.DenseLayer),
    ('output', layers.DenseLayer),
],
                input_shape=(None, 5, 44, 44),
                conv11_num_filters=32,
                conv11_filter_size=(5, 5),
                conv11_nonlinearity=leaky_rectify,
                conv11_W=Orthogonal(np.sqrt(2 / (1 + 0.01**2))),
                conv11_b=Constant(0.1),
                conv12_num_filters=32,
                conv12_filter_size=(3, 3),
                conv12_pad=1,
                conv12_nonlinearity=leaky_rectify,
                conv12_W=Orthogonal(np.sqrt(2 / (1 + 0.01**2))),
                conv12_b=Constant(0.1),
                pool1_pool_size=(2, 2),
                conv21_num_filters=64,
                conv21_filter_size=(3, 3),
                conv21_pad=1,
                conv21_nonlinearity=leaky_rectify,
                conv21_W=Orthogonal(np.sqrt(2 / (1 + 0.01**2))),
                conv21_b=Constant(0.1),
                conv22_num_filters=64,
Exemplo n.º 19
0
def build_kpextractor128():
    inp = ll.InputLayer(shape=(None, 1, 128, 128), name='input')
    # alternate pooling and conv layers to minimize parameters
    filter_pad = lambda x, y: (x // 2, y // 2)
    filter3 = (3, 3)
    same_pad3 = filter_pad(*filter3)
    conv1 = ll.Conv2DLayer(inp,
                           num_filters=16,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1')
    mp1 = ll.MaxPool2DLayer(conv1, 2, stride=2)  # now down to 64 x 64
    bn1 = ll.BatchNormLayer(mp1)
    conv2 = ll.Conv2DLayer(bn1,
                           num_filters=32,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2')
    mp2 = ll.MaxPool2DLayer(conv2, 2, stride=2)  # now down to 32 x 32
    bn2 = ll.BatchNormLayer(mp2)
    conv3 = ll.Conv2DLayer(bn2,
                           num_filters=64,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3')
    mp3 = ll.MaxPool2DLayer(conv3, 2, stride=2)  # now down to 16 x 16
    bn3 = ll.BatchNormLayer(mp3)
    conv4 = ll.Conv2DLayer(bn3,
                           num_filters=128,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4')
    mp4 = ll.MaxPool2DLayer(conv4, 2, stride=2)  # now down to 8 x 8
    bn4 = ll.BatchNormLayer(mp4)
    conv5 = ll.Conv2DLayer(bn4,
                           num_filters=256,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv5')
    mp5 = ll.MaxPool2DLayer(conv5, 2, stride=2)  # down to 4 x 4
    bn5 = ll.BatchNormLayer(mp5)

    conv6 = ll.Conv2DLayer(bn5,
                           num_filters=512,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv6')
    mp6 = ll.MaxPool2DLayer(conv6, 2, stride=2)  # down to 4 x 4
    bn6 = ll.BatchNormLayer(mp6)

    # now let's bring it down to a FC layer that takes in the 2x2x64 mp4 output
    fc1 = ll.DenseLayer(bn6, num_units=256, nonlinearity=rectify)
    bn1_fc = ll.BatchNormLayer(fc1)
    #dp1 = ll.DropoutLayer(bn1, p=0.5)
    fc2 = ll.DenseLayer(bn1_fc, num_units=64, nonlinearity=rectify)
    #dp2 = ll.DropoutLayer(fc2, p=0.5)
    bn2_fc = ll.BatchNormLayer(fc2)
    out = ll.DenseLayer(bn2_fc, num_units=6, nonlinearity=linear)
    out_rs = ll.ReshapeLayer(out, ([0], 3, 2))

    return out_rs
Exemplo n.º 20
0
def build_kpextractor256_decoupled():
    inp = ll.InputLayer(shape=(None, 1, 256, 256), name='input')
    # alternate pooling and conv layers to minimize parameters
    filter_pad = lambda x, y: (x // 2, y // 2)
    filter3 = (3, 3)
    same_pad3 = filter_pad(*filter3)
    conv1 = ll.Conv2DLayer(inp,
                           num_filters=8,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1')
    mp1 = ll.MaxPool2DLayer(conv1, 2, stride=2)  # now down to 128 x 128
    bn1 = ll.BatchNormLayer(mp1)
    conv2 = ll.Conv2DLayer(bn1,
                           num_filters=16,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2')
    mp2 = ll.MaxPool2DLayer(conv2, 2, stride=2)  # now down to 128 x 128
    bn2 = ll.BatchNormLayer(mp2)

    conv3 = ll.Conv2DLayer(bn2,
                           num_filters=32,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3')
    mp3 = ll.MaxPool2DLayer(conv3, 2, stride=2)  # now down to 32 x 32
    bn3 = ll.BatchNormLayer(mp3)
    conv4 = ll.Conv2DLayer(bn3,
                           num_filters=64,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4')
    mp4 = ll.MaxPool2DLayer(conv4, 2, stride=2)  # now down to 16 x 16
    bn4 = ll.BatchNormLayer(mp4)
    conv5 = ll.Conv2DLayer(bn4,
                           num_filters=128,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv5')
    mp5 = ll.MaxPool2DLayer(conv5, 2, stride=2)  # now down to 8 x 8
    bn5 = ll.BatchNormLayer(mp5)
    conv6 = ll.Conv2DLayer(bn5,
                           num_filters=256,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv6')
    mp6 = ll.MaxPool2DLayer(conv6, 2, stride=2)  # down to 4 x 4
    bn6 = ll.BatchNormLayer(mp6)

    conv7 = ll.Conv2DLayer(bn6,
                           num_filters=512,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv7')
    mp7 = ll.MaxPool2DLayer(conv7, 2, stride=2)  # down to 4 x 4
    bn7 = ll.BatchNormLayer(mp7)
    dp0 = ll.DropoutLayer(bn7, p=0.5)

    # now let's bring it down to a FC layer that takes in the 2x2x64 mp4 output
    fc1 = ll.DenseLayer(dp0, num_units=256, nonlinearity=rectify)
    bn1_fc = ll.BatchNormLayer(fc1)
    dp1 = ll.DropoutLayer(bn1_fc, p=0.5)
    # so what we're going to do here instead is break this into three separate layers (each 32 units)
    # then each of these layers goes into a separate out, and out_rs will be a merge and then reshape
    fc2_left = ll.DenseLayer(dp1, num_units=32, nonlinearity=rectify)
    fc2_right = ll.DenseLayer(dp1, num_units=32, nonlinearity=rectify)
    fc2_notch = ll.DenseLayer(dp1, num_units=32, nonlinearity=rectify)

    out_left = ll.DenseLayer(fc2_left, num_units=2, nonlinearity=linear)
    out_right = ll.DenseLayer(fc2_right, num_units=2, nonlinearity=linear)
    out_notch = ll.DenseLayer(fc2_notch, num_units=2, nonlinearity=linear)

    out = ll.ConcatLayer([out_left, out_right, out_notch], axis=1)
    out_rs = ll.ReshapeLayer(out, ([0], 3, 2))

    return out_rs
Exemplo n.º 21
0
def test_orthogonal_1d_not_supported():
    from lasagne.init import Orthogonal

    with pytest.raises(RuntimeError):
        Orthogonal().sample((100, ))
Exemplo n.º 22
0
def config_4c_1233_3d(batch_iterator="BatchIterator", max_epochs=30):
    custom_batch_iterator = globals()[batch_iterator]
    net1 = NeuralNet(
        layers=[
            ('input', layers.InputLayer),
            ('conv1', layers.Conv2DLayer),
            ('pool1', layers.MaxPool2DLayer),
            ('conv2_1', layers.Conv2DLayer),
            ('conv2_2', layers.Conv2DLayer),
            ('pool2', layers.MaxPool2DLayer),
            ('conv3_1', layers.Conv2DLayer),
            ('conv3_2', layers.Conv2DLayer),
            ('conv3_3', layers.Conv2DLayer),
            ('pool3', layers.MaxPool2DLayer),
            ('conv4_1', layers.Conv2DLayer),
            ('conv4_2', layers.Conv2DLayer),
            ('conv4_3', layers.Conv2DLayer),
            ('pool4', layers.MaxPool2DLayer),
            ('dense1', layers.DenseLayer),
            ('dense2', layers.DenseLayer),
            ('dense3', layers.DenseLayer),
            ('output', layers.DenseLayer),
        ],
        # layer parameters:
        input_shape=(None, 3, 256, 256),
        conv1_num_filters=86,
        conv1_filter_size=(5, 5),
        conv1_stride=(2, 2),
        conv1_pad=(1, 1),
        pool1_pool_size=(2, 2),
        conv2_1_num_filters=128,
        conv2_1_filter_size=(3, 3),
        conv2_1_pad=(1, 1),
        conv2_2_num_filters=128,
        conv2_2_filter_size=(3, 3),
        conv2_2_pad=(1, 1),
        pool2_pool_size=(2, 2),
        conv3_1_num_filters=256,
        conv3_1_filter_size=(3, 3),
        conv3_1_pad=(1, 1),
        conv3_2_num_filters=256,
        conv3_2_filter_size=(3, 3),
        conv3_2_pad=(1, 1),
        conv3_3_num_filters=256,
        conv3_3_filter_size=(3, 3),
        conv3_3_pad=(1, 1),
        pool3_pool_size=(2, 2),
        conv4_1_num_filters=196,
        conv4_1_filter_size=(3, 3),
        conv4_1_pad=(1, 1),
        conv4_2_num_filters=196,
        conv4_2_filter_size=(3, 3),
        conv4_2_pad=(1, 1),
        conv4_3_num_filters=196,
        conv4_3_filter_size=(3, 3),
        conv4_3_pad=(1, 1),
        pool4_pool_size=(2, 2),
        conv1_W=Orthogonal(gain=1.0),
        conv2_1_W=Orthogonal(gain=1.0),
        conv2_2_W=Orthogonal(gain=1.0),
        conv3_1_W=Orthogonal(gain=1.0),
        conv3_2_W=Orthogonal(gain=1.0),
        conv3_3_W=Orthogonal(gain=1.0),
        conv4_1_W=Orthogonal(gain=1.0),
        conv4_2_W=Orthogonal(gain=1.0),
        conv4_3_W=Orthogonal(gain=1.0),
        dense1_num_units=2048,
        dense2_num_units=1024,
        dense3_num_units=512,
        # dense1_nonlinearity=lasagne.nonlinearities.rectify,
        # dense2_nonlinearity=lasagne.nonlinearities.rectify,
        dense3_nonlinearity=lasagne.nonlinearities.sigmoid,
        dense1_W=Orthogonal(gain=1.0),
        dense2_W=Orthogonal(gain=1.0),
        dense3_W=Orthogonal(gain=1.0),
        # output layer uses identity function
        output_nonlinearity=None,
        output_num_units=4,

        # optimization method:
        update=nesterov_momentum,
        update_learning_rate=0.01,
        update_momentum=0.975,
        batch_iterator_train=custom_batch_iterator(batch_size=64),
        batch_iterator_test=custom_batch_iterator(batch_size=64),
        regression=True,
        max_epochs=max_epochs,
        verbose=1,
    )
    return net1
Exemplo n.º 23
0
def config_4c_1234_3d_smoothl1_lr_step(batch_iterator="BatchIterator",
                                       max_epochs=30):
    custom_batch_iterator = globals()[batch_iterator]
    net1 = NeuralNet(
        layers=[
            ('input', layers.InputLayer),
            ('conv1_1', layers.Conv2DLayer),
            ('conv1_2', layers.Conv2DLayer),
            ('pool1', layers.MaxPool2DLayer),
            ('conv2_1', layers.Conv2DLayer),
            ('conv2_2', layers.Conv2DLayer),
            ('pool2', layers.MaxPool2DLayer),
            ('conv3_1', layers.Conv2DLayer),
            ('conv3_2', layers.Conv2DLayer),
            ('conv3_3', layers.Conv2DLayer),
            ('pool3', layers.MaxPool2DLayer),
            ('conv4_1', layers.Conv2DLayer),
            ('conv4_2', layers.Conv2DLayer),
            ('conv4_3', layers.Conv2DLayer),
            ('conv4_4', layers.Conv2DLayer),
            ('pool4', layers.MaxPool2DLayer),
            ('dense1', layers.DenseLayer),
            # ('drop1', layers.DropoutLayer),
            ('dense2', layers.DenseLayer),
            # ('drop2', layers.DropoutLayer),
            ('dense3', layers.DenseLayer),
            # ('drop3', layers.DropoutLayer),
            ('output', layers.DenseLayer),
        ],
        # layer parameters:
        input_shape=(None, 3, 256, 256),
        conv1_1_num_filters=86,
        conv1_1_filter_size=(5, 5),
        conv1_1_stride=(2, 2),
        conv1_2_num_filters=104,
        conv1_2_filter_size=(3, 3),
        conv1_2_pad=(1, 1),
        pool1_pool_size=(2, 2),
        pool1_stride=(2, 2),
        conv2_1_num_filters=128,
        conv2_1_filter_size=(3, 3),
        conv2_1_pad=(1, 1),
        conv2_2_num_filters=128,
        conv2_2_filter_size=(3, 3),
        conv2_2_pad=(1, 1),
        pool2_pool_size=(3, 3),
        pool2_stride=(2, 2),
        conv3_1_num_filters=256,
        conv3_1_filter_size=(3, 3),
        conv3_1_pad=(1, 1),
        conv3_2_num_filters=256,
        conv3_2_filter_size=(3, 3),
        conv3_2_pad=(1, 1),
        conv3_3_num_filters=256,
        conv3_3_filter_size=(3, 3),
        conv3_3_pad=(1, 1),
        pool3_pool_size=(3, 3),
        pool3_stride=(2, 2),
        conv4_1_num_filters=196,
        conv4_1_filter_size=(3, 3),
        conv4_1_pad=(1, 1),
        conv4_2_num_filters=196,
        conv4_2_filter_size=(3, 3),
        conv4_2_pad=(1, 1),
        conv4_3_num_filters=196,
        conv4_3_filter_size=(3, 3),
        conv4_3_pad=(1, 1),
        conv4_4_num_filters=196,
        conv4_4_filter_size=(3, 3),
        conv4_4_pad=(1, 1),
        pool4_pool_size=(2, 2),
        pool4_stride=(2, 2),
        conv1_1_W=Orthogonal(gain=1.0),
        conv1_2_W=Orthogonal(gain=1.0),
        conv2_1_W=Orthogonal(gain=1.0),
        conv2_2_W=Orthogonal(gain=1.0),
        conv3_1_W=Orthogonal(gain=1.0),
        conv3_2_W=Orthogonal(gain=1.0),
        conv3_3_W=Orthogonal(gain=1.0),
        conv4_1_W=Orthogonal(gain=1.0),
        conv4_2_W=Orthogonal(gain=1.0),
        conv4_3_W=Orthogonal(gain=1.0),
        conv4_4_W=Orthogonal(gain=1.0),
        dense1_num_units=4096,  # drop1_p=0.5,
        dense2_num_units=2048,  # drop2_p=0.5,
        dense3_num_units=512,  # drop3_p=0.5,
        dense3_nonlinearity=lasagne.nonlinearities.sigmoid,
        # output layer uses identity function
        output_nonlinearity=None,
        output_num_units=4,

        # optimization method:
        update=nesterov_momentum,
        update_learning_rate=theano.shared(np.float32(0.0001)),
        update_momentum=theano.shared(np.float32(0.9)),
        batch_iterator_train=custom_batch_iterator(batch_size=72),
        batch_iterator_test=custom_batch_iterator(batch_size=48),
        on_epoch_finished=[
            StepVariableUpdate('update_learning_rate', changes={30: 0.00005}),
            AdjustVariable('update_momentum', start=0.9, stop=0.98)
        ],
        objective_loss_function=smooth_l1_loss,
        # objective_loss_function=iou_loss,
        custom_scores=[
            # ('smoothl1', smooth_l1_loss_val),
            ('iou_loss', iou_loss_val),
            ('squared_error', mean_squared_error)
        ],
        regression=True,
        max_epochs=max_epochs,
        verbose=1,
    )
    return net1
Exemplo n.º 24
0
def build_segmenter_jet_preconv():
    # downsample down to a small region, then upsample all the way back up, using jet architecture
    # recreate basic FCN-8s structure (though more aptly 1s here since we upsample back to the original input size)
    # this jet will have another conv layer in the final upsample
    # difference here is that instead of combining softmax layers in the jet, we'll upsample before the conv_f* layer
    # this will certainly make the model slower, but should give us better predictions...
    # The awkward part here is combining the intermediate conv layers when they have different filter shapes
    # We could:
    #   concat them
    #   have intermediate conv layers that bring them to the shape needed then merge them
    # in the interests of speed we'll just concat them, though we'll have a ton of filters at the end
    inp = ll.InputLayer(shape=(None, 1, None, None), name='input')
    conv1 = ll.Conv2DLayer(inp,
                           num_filters=32,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1_1')
    bn1 = ll.BatchNormLayer(conv1, name='bn1')
    conv2 = ll.Conv2DLayer(conv1,
                           num_filters=64,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1_2')
    bn2 = ll.BatchNormLayer(conv2, name='bn2')
    mp1 = ll.MaxPool2DLayer(conv2, 2, stride=2, name='mp1')  # 2x downsample
    conv3 = ll.Conv2DLayer(mp1,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2_1')
    bn3 = ll.BatchNormLayer(conv3, name='bn3')
    conv4 = ll.Conv2DLayer(conv3,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2_2')
    bn4 = ll.BatchNormLayer(conv4, name='bn4')
    mp2 = ll.MaxPool2DLayer(conv4, 2, stride=2, name='mp2')  # 4x downsample
    conv5 = ll.Conv2DLayer(mp2,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3_1')
    bn5 = ll.BatchNormLayer(conv5, name='bn5')
    conv6 = ll.Conv2DLayer(conv5,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3_2')
    bn6 = ll.BatchNormLayer(conv6, name='bn6')
    mp3 = ll.MaxPool2DLayer(conv6, 2, stride=2, name='mp3')  # 8x downsample
    conv7 = ll.Conv2DLayer(mp3,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4_1')
    bn7 = ll.BatchNormLayer(conv7, name='bn7')
    conv8 = ll.Conv2DLayer(conv7,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4_2')
    bn8 = ll.BatchNormLayer(conv8, name='bn8')
    # f 68 s 8
    # now start the upsample
    ## FIRST UPSAMPLE PREDICTION (akin to FCN-32s)

    up8 = ll.Upscale2DLayer(
        bn8, 8,
        name='upsample_8x')  # take loss here, 8x upsample from 8x downsample
    conv_f8 = ll.Conv2DLayer(up8,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_8xpred')
    softmax_8 = Softmax4D(conv_f8, name='4dsoftmax_8x')

    ## COMBINE BY UPSAMPLING CONV 8 AND CONV 6
    conv_8_up2 = ll.Upscale2DLayer(bn8, 2,
                                   name='upsample_c8_2')  # 4x downsample
    concat_c8_c6 = ll.ConcatLayer([conv_8_up2, bn6],
                                  axis=1,
                                  name='concat_c8_c6')
    up4 = ll.Upscale2DLayer(
        concat_c8_c6, 4,
        name='upsample_4x')  # take loss here, 4x upsample from 4x downsample
    conv_f4 = ll.Conv2DLayer(up4,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_4xpred')
    softmax_4 = Softmax4D(conv_f4, name='4dsoftmax_4x')  # 4x downsample

    ## COMBINE BY UPSAMPLING CONCAT_86 AND CONV 4
    concat_86_up2 = ll.Upscale2DLayer(
        concat_c8_c6, 2, name='upsample_concat_86_2')  # 2x downsample
    concat_ct86_c4 = ll.ConcatLayer([concat_86_up2, bn4],
                                    axis=1,
                                    name='concat_ct86_c4')

    up2 = ll.Upscale2DLayer(
        concat_ct86_c4, 2, name='upsample_2x'
    )  # final loss here, 2x upsample from a 2x downsample
    conv_f2 = ll.Conv2DLayer(up2,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_2xpred')

    softmax_2 = Softmax4D(conv_f2, name='4dsoftmax_2x')

    ## COMBINE BY UPSAMPLING CONCAT_864 AND CONV 2
    concat_864_up2 = ll.Upscale2DLayer(
        concat_ct86_c4, 2, name='upsample_concat_86_2')  # no downsample
    concat_864_c2 = ll.ConcatLayer([concat_864_up2, bn2],
                                   axis=1,
                                   name='concat_ct864_c2')
    conv_f1 = ll.Conv2DLayer(concat_864_c2,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_1xpred')

    softmax_1 = Softmax4D(conv_f1, name='4dsoftmax_1x')

    # this is where up1 would go but that doesn't make any sense
    return [softmax_8, softmax_4, softmax_2, softmax_1]
Exemplo n.º 25
0
    def __init__(self, train_raw, test_raw, dim, mode, l2, l1,
                 batch_norm, dropout, batch_size, **kwargs):
                
        print "==> not used params in network class:", kwargs.keys()
        self.train_raw = train_raw
        self.test_raw = test_raw
        
        self.dim = dim
        self.mode = mode
        self.l2 = l2
        self.l1 = l1
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.batch_size = batch_size
        
        self.train_batch_gen = self.get_batch_gen(self.train_raw)
        self.test_batch_gen = self.get_batch_gen(self.test_raw)    
        
        self.input_var = T.tensor3('X')
        self.input_lens = T.ivector('L')
        self.target_var = T.imatrix('y')
        
        """
        for i in range(700//self.batch_size):
            ret=next(self.train_batch_gen)
            print len(ret[0])
            print ret[0][0].shape
            print len(ret[1])
            print type(ret[1][0])
            print "---"
        exit()
        """
                
        print "==> Building neural network"
        network = layers.InputLayer((None, None, self.train_raw[0][0].shape[1]), 
                                    input_var=self.input_var)
        
        #print "!!!!!!!!!!! WARNING: dropout on input is disabled !!!!!!!!!!!!!!!!"
        if (self.dropout > 0):
            network = layers.DropoutLayer(network, p=self.dropout)

        network = layers.LSTMLayer(incoming=network, num_units=dim,
                                   grad_clipping=10,
                                   ingate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   forgetgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   cell=lasagne.layers.Gate(W_cell=None,
                                        nonlinearity=lasagne.nonlinearities.tanh,
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal()),
                                   outgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)))
        
        if (self.dropout > 0):
            network = layers.DropoutLayer(network, p=self.dropout)
        
        network = layers.LSTMLayer(incoming=network, num_units=dim,
                                   only_return_final=False,
                                   grad_clipping=10,
                                   ingate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   forgetgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   cell=lasagne.layers.Gate(W_cell=None,
                                        nonlinearity=lasagne.nonlinearities.tanh,
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal()),
                                   outgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)))
              
        lstm_output = layers.get_output(network)
        self.params = layers.get_all_params(network, trainable=True)
        self.reg_params = layers.get_all_params(network, regularizable=True)
        
        """
        data = next(self.train_batch_gen)
        print max(data[1])
        print lstm_output.eval({self.input_var:data[0]}).shape
        exit()
        """
        
        # for each example in minibatch take the last output
        last_outputs = []
        for index in range(self.batch_size):
            last_outputs.append(lstm_output[index, self.input_lens[index]-1, :])
        last_outputs = T.stack(last_outputs)

        """
        data = next(self.train_batch_gen)
        print max(data[1])
        print last_outputs.eval({self.input_var:data[0],
            self.input_lens:data[1],
        }).shape
        exit()
        """
        
        network = layers.InputLayer(shape=(self.batch_size, self.dim), 
                                    input_var=last_outputs)
        if (self.dropout > 0):
            network = layers.DropoutLayer(network, p=self.dropout)
        network = layers.DenseLayer(incoming=network,
                                    num_units=train_raw[1][0].shape[0],
                                    nonlinearity=sigmoid)
        
        self.prediction = layers.get_output(network)
        self.det_prediction = layers.get_output(network, deterministic=True)
        self.params += layers.get_all_params(network, trainable=True)
        self.reg_params += layers.get_all_params(network, regularizable=True)
        
        self.loss_multilabel = -(self.target_var * T.log(self.prediction) + \
            (1 - self.target_var) * T.log(1 - self.prediction)).mean(axis=1)\
                                                               .mean(axis=0)
        
        if self.l2 > 0: 
            self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params)
        else: 
            self.loss_l2 = 0
        
        if self.l1 > 0: 
            self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params)
        else: 
            self.loss_l1 = 0
            
        self.loss = self.loss_multilabel + self.loss_l2 + self.loss_l1
              
        #updates = lasagne.updates.adadelta(self.loss, self.params,
        #                                    learning_rate=0.001)
        #updates = lasagne.updates.momentum(self.loss, self.params,
        #                                    learning_rate=0.00003)
        #updates = lasagne.updates.adam(self.loss, self.params)
        updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5,
                                       learning_rate=0.0001) # from DCGAN paper
        #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9,
        #                                             learning_rate=0.001,
        
        ## compiling theano functions
        if self.mode == 'train':
            print "==> compiling train_fn"
            self.train_fn = theano.function(inputs=[self.input_var,
                                                    self.input_lens,
                                                    self.target_var],
                                            outputs=[self.prediction, self.loss],
                                            updates=updates)
        
        print "==> compiling test_fn"
        self.test_fn = theano.function(inputs=[self.input_var,
                                               self.input_lens,
                                               self.target_var],
                                       outputs=[self.det_prediction, self.loss])
Exemplo n.º 26
0
def build_segmenter_upsample():
    # downsample down to a small region, then upsample all the way back up
    # Note: w/o any learning on the upsampler, we're limited in how far we can downsample
    # there will always be an error signal unless the loss fn is run on downsampled targets...
    inp = ll.InputLayer(shape=(None, 1, None, None), name='input')
    conv1 = ll.Conv2DLayer(inp,
                           num_filters=32,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1_1')
    bn1 = ll.BatchNormLayer(conv1, name='bn1')
    conv2 = ll.Conv2DLayer(bn1,
                           num_filters=64,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1_2')
    bn2 = ll.BatchNormLayer(conv2, name='bn2')
    mp1 = ll.MaxPool2DLayer(bn2, 2, stride=2, name='mp1')  # 2x downsample
    conv3 = ll.Conv2DLayer(mp1,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2_1')
    bn3 = ll.BatchNormLayer(conv3, name='bn3')
    conv4 = ll.Conv2DLayer(bn3,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2_2')
    bn4 = ll.BatchNormLayer(conv4, name='bn4')
    mp2 = ll.MaxPool2DLayer(bn4, 2, stride=2, name='mp2')  # 4x downsample
    conv5 = ll.Conv2DLayer(mp2,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3_1')
    bn5 = ll.BatchNormLayer(conv5, name='bn5')
    conv6 = ll.Conv2DLayer(bn5,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3_2')
    bn6 = ll.BatchNormLayer(conv6, name='bn6')
    mp3 = ll.MaxPool2DLayer(bn6, 2, stride=2, name='mp3')  # 8x downsample
    conv7 = ll.Conv2DLayer(mp3,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4_1')
    bn7 = ll.BatchNormLayer(conv7, name='bn7')
    conv8 = ll.Conv2DLayer(bn7,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4_2')
    bn8 = ll.BatchNormLayer(conv8, name='bn8')
    # f 68 s 8
    # now start the upsample
    up = ll.Upscale2DLayer(bn8, 8, name='upsample_8x')
    conv_f = ll.Conv2DLayer(up,
                            num_filters=2,
                            filter_size=(3, 3),
                            pad='same',
                            W=Orthogonal(),
                            nonlinearity=linear,
                            name='conv_final')
    softmax = Softmax4D(conv_f, name='4dsoftmax')
    return [softmax]
    params = [zipped_grads, running_grads, running_grads2, updir]
    return (f_grad_shared, f_update, params)


##################################################################
# Variables and Constants
MAX_ITERS = 150  # 450

CONV_EPS = 1e-5
DFLT_VDIM = 100

_HE_NORMAL = HeNormal()
HE_NORMAL = lambda x: floatX(_HE_NORMAL.sample(x))

_HE_UNIFORM = HeUniform()
HE_UNIFORM = lambda x: floatX(_HE_UNIFORM.sample(x))

_HE_UNIFORM_RELU = HeUniform(gain=np.sqrt(2))
HE_UNIFORM_RELU = lambda x: floatX(_HE_UNIFORM_RELU.sample(x))

_RELU_ALPHA = 0.
_HE_UNIFORM_LEAKY_RELU = HeUniform(
    gain=np.sqrt(2. / (1 + (_RELU_ALPHA or 1e-6)**2)))
HE_UNIFORM_LEAKY_RELU = lambda x: \
    floatX(_HE_UNIFORM_LEAKY_RELU.sample(x))

_ORTHOGONAL = Orthogonal()
ORTHOGONAL = lambda x: floatX(_ORTHOGONAL.sample(x))

TRNG = RandomStreams()
Exemplo n.º 28
0
def build_segmenter_jet_2():
    # downsample down to a small region, then upsample all the way back up, using jet architecture
    # recreate basic FCN-8s structure (though more aptly 1s here since we upsample back to the original input size)
    # this jet will have another conv layer in the final upsample
    inp = ll.InputLayer(shape=(None, 1, None, None), name='input')
    conv1 = ll.Conv2DLayer(inp,
                           num_filters=32,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1_1')
    bn1 = ll.BatchNormLayer(conv1, name='bn1')
    conv2 = ll.Conv2DLayer(bn1,
                           num_filters=64,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1_2')
    bn2 = ll.BatchNormLayer(conv2, name='bn2')
    mp1 = ll.MaxPool2DLayer(bn2, 2, stride=2, name='mp1')  # 2x downsample
    conv3 = ll.Conv2DLayer(mp1,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2_1')
    bn3 = ll.BatchNormLayer(conv3, name='bn3')
    conv4 = ll.Conv2DLayer(bn3,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2_2')
    bn4 = ll.BatchNormLayer(conv4, name='bn4')
    mp2 = ll.MaxPool2DLayer(bn4, 2, stride=2, name='mp2')  # 4x downsample
    conv5 = ll.Conv2DLayer(mp2,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3_1')
    bn5 = ll.BatchNormLayer(conv5, name='bn5')
    conv6 = ll.Conv2DLayer(bn5,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3_2')
    bn6 = ll.BatchNormLayer(conv6, name='bn6')
    mp3 = ll.MaxPool2DLayer(bn6, 2, stride=2, name='mp3')  # 8x downsample
    conv7 = ll.Conv2DLayer(mp3,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4_1')
    bn7 = ll.BatchNormLayer(conv7, name='bn7')
    conv8 = ll.Conv2DLayer(bn7,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4_2')
    bn8 = ll.BatchNormLayer(conv8, name='bn8')
    # f 68 s 8
    # now start the upsample
    ## FIRST UPSAMPLE PREDICTION (akin to FCN-32s)
    conv_f8 = ll.Conv2DLayer(bn8,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_8xpred')
    softmax_8 = Softmax4D(conv_f8, name='4dsoftmax_8x')
    up8 = ll.Upscale2DLayer(
        softmax_8, 8,
        name='upsample_8x')  # take loss here, 8x upsample from 8x downsample

    ## COMBINE BY UPSAMPLING SOFTMAX 8 AND PRED ON CONV 6
    softmax_4up = ll.Upscale2DLayer(softmax_8, 2,
                                    name='upsample_4x_pre')  # 4x downsample
    conv_f6 = ll.Conv2DLayer(bn6,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_4xpred')
    softmax_4 = Softmax4D(conv_f6, name='4dsoftmax_4x')  # 4x downsample
    softmax_4_merge = ll.ElemwiseSumLayer([softmax_4, softmax_4up],
                                          coeffs=0.5,
                                          name='softmax_4_merge')

    up4 = ll.Upscale2DLayer(
        softmax_4_merge, 4,
        name='upsample_4x')  # take loss here, 4x upsample from 4x downsample

    ## COMBINE BY UPSAMPLING SOFTMAX_4_MERGE AND CONV 4
    softmax_2up = ll.Upscale2DLayer(softmax_4_merge, 2,
                                    name='upsample_2x_pre')  # 2x downsample
    conv_f4 = ll.Conv2DLayer(bn4,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_2xpred')

    softmax_2 = Softmax4D(conv_f4, name='4dsoftmax_2x')
    softmax_2_merge = ll.ElemwiseSumLayer([softmax_2, softmax_2up],
                                          coeffs=0.5,
                                          name='softmax_2_merge')

    up2 = ll.Upscale2DLayer(
        softmax_2_merge, 2, name='upsample_2x'
    )  # final loss here, 2x upsample from a 2x downsample

    ## COMBINE BY UPSAMPLING SOFTMAX_2_MERGE AND CONV 2
    softmax_1up = ll.Upscale2DLayer(
        softmax_2_merge, 2,
        name='upsample_1x_pre')  # 1x downsample (i.e. no downsample)
    conv_f2 = ll.Conv2DLayer(bn2,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_1xpred')

    softmax_1 = Softmax4D(conv_f2, name='4dsoftmax_1x')
    softmax_1_merge = ll.ElemwiseSumLayer([softmax_1, softmax_1up],
                                          coeffs=0.5,
                                          name='softmax_1_merge')

    # this is where up1 would go but that doesn't make any sense
    return [up8, up4, up2, softmax_1_merge]
Exemplo n.º 29
0
                                 test_frac=0.3)
for train, classes, test in validator.yield_cross_validation_sets():
    # create tensor objects
    trainT = train.astype(config.floatX)
    testT = train.astype(config.floatX)
    classT = classes.astype('int32')

    # First, construct an input layer.
    # The shape parameter defines the expected input shape, which is just the shape of our data matrix X.
    l_in = InputLayer(shape=trainT.shape, W=Constant())
    # A dense layer implements a linear mix (xW + b) followed by a nonlinearity.
    l_hidden = DenseLayer(
        l_in,  # The first argument is the input to this layer
        num_units=25,  # This defines the layer's output dimensionality
        nonlinearity=tanh,
        W=Orthogonal(),
    )  # Various nonlinearities are available
    # For our output layer, we'll use a dense layer with a softmax nonlinearity.
    l_output = DenseLayer(l_hidden,
                          num_units=len(classes),
                          nonlinearity=softmax,
                          W=Constant())
    # Now, we can generate the symbolic expression of the network's output given an input variable.
    net_input = T.matrix('net_input')
    net_output = l_output.get_output(net_input)

    # As a loss function, we'll use Theano's categorical_crossentropy function.
    # This allows for the network output to be class probabilities,
    # but the target output to be class labels.
    true_output = T.ivector('true_output')
    loss = T.mean(T.nnet.categorical_crossentropy(net_output, true_output))
Exemplo n.º 30
0
def build_kpextractor64():
    inp = ll.InputLayer(shape=(None, 1, 64, 64), name='input')
    # we're going to build something like what Daniel Nouri made for Facial Keypoint detection for a base reference
    # http://danielnouri.org/notes/2014/12/17/using-convolutional-neural-nets-to-detect-facial-keypoints-tutorial/
    # alternate pooling and conv layers to minimize parameters
    filter_pad = lambda x, y: (x // 2, y // 2)
    filter3 = (3, 3)
    same_pad3 = filter_pad(*filter3)
    conv1 = ll.Conv2DLayer(inp,
                           num_filters=16,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1')
    mp1 = ll.MaxPool2DLayer(conv1, 2, stride=2)  # now down to 32 x 32
    bn1 = ll.BatchNormLayer(mp1)
    conv2 = ll.Conv2DLayer(bn1,
                           num_filters=32,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2')
    mp2 = ll.MaxPool2DLayer(conv2, 2, stride=2)  # now down to 16 x 16
    bn2 = ll.BatchNormLayer(mp2)
    conv3 = ll.Conv2DLayer(bn2,
                           num_filters=64,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3')
    mp3 = ll.MaxPool2DLayer(conv3, 2, stride=2)  # now down to 8 x 8
    bn3 = ll.BatchNormLayer(mp3)
    conv4 = ll.Conv2DLayer(bn3,
                           num_filters=128,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4')
    # larger max pool to reduce parameters in the FC layer
    mp4 = ll.MaxPool2DLayer(conv4, 2, stride=2)  # now down to 4x4
    bn4 = ll.BatchNormLayer(mp4)
    conv5 = ll.Conv2DLayer(bn4,
                           num_filters=256,
                           filter_size=filter3,
                           pad=same_pad3,
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv5')
    mp5 = ll.MaxPool2DLayer(conv5, 2, stride=2)  # down to 2x2
    bn5 = ll.BatchNormLayer(mp5)
    # now let's bring it down to a FC layer that takes in the 2x2x64 mp4 output
    fc1 = ll.DenseLayer(bn5, num_units=256, nonlinearity=rectify)
    bn6 = ll.BatchNormLayer(fc1)
    #dp1 = ll.DropoutLayer(bn1, p=0.5)
    fc2 = ll.DenseLayer(bn6, num_units=64, nonlinearity=rectify)
    #dp2 = ll.DropoutLayer(fc2, p=0.5)
    bn7 = ll.BatchNormLayer(fc2)
    out = ll.DenseLayer(bn7, num_units=6, nonlinearity=linear)
    out_rs = ll.ReshapeLayer(out, ([0], 3, 2))

    return out_rs
Exemplo n.º 31
0
    def __init__(self, dim, mode, l2, l1, batch_norm, dropout,
                 batch_size, input_dim=76, **kwargs):
                
        print "==> not used params in network class:", kwargs.keys()
        
        self.dim = dim
        self.mode = mode
        self.l2 = l2
        self.l1 = l1
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.batch_size = batch_size
        
        self.input_var = T.tensor3('X')
        self.input_lens = T.ivector('L')
        self.target_var = T.ivector('y')
        self.weight = T.vector('w')
        
        print "==> Building neural network"
        network = layers.InputLayer((None, None, input_dim), 
                                    input_var=self.input_var)
        network = layers.LSTMLayer(incoming=network, num_units=dim,
                                   only_return_final=False,
                                   grad_clipping=10,
                                   ingate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   forgetgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   cell=lasagne.layers.Gate(W_cell=None,
                                        nonlinearity=lasagne.nonlinearities.tanh,
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal()),
                                   outgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)))
        lstm_output = layers.get_output(network)
        
        self.params = layers.get_all_params(network, trainable=True)
        self.reg_params = layers.get_all_params(network, regularizable=True)
        
        # for each example in minibatch take the last output
        last_outputs = []
        for index in range(self.batch_size):
            last_outputs.append(lstm_output[index, self.input_lens[index]-1, :])
        last_outputs = T.stack(last_outputs)

        network = layers.InputLayer(shape=(self.batch_size, self.dim), 
                                    input_var=last_outputs)
        network = layers.DenseLayer(incoming=network, num_units=2,
                                    nonlinearity=softmax)
        
        self.prediction = layers.get_output(network)
        self.params += layers.get_all_params(network, trainable=True)
        self.reg_params += layers.get_all_params(network, regularizable=True)
        
        self.loss_ce = (self.weight * categorical_crossentropy(self.prediction, 
                                                self.target_var)).mean()
        if self.l2 > 0: 
            self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params)
        else: 
            self.loss_l2 = 0
        
        if self.l1 > 0: 
            self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params)
        else: 
            self.loss_l1 = 0
            
        self.loss = self.loss_ce + self.loss_l2 + self.loss_l1
        
        #updates = lasagne.updates.adadelta(self.loss, self.params,
        #                                    learning_rate=0.001)
        #updates = lasagne.updates.momentum(self.loss, self.params,
        #                                    learning_rate=0.00003)
        #updates = lasagne.updates.adam(self.loss, self.params)
        updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5,
                                       learning_rate=0.0001) # from DCGAN paper
        #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9,
        #                                             learning_rate=0.001,
        
        ## compiling theano functions
        if self.mode == 'train':
            print "==> compiling train_fn"
            self.train_fn = theano.function(inputs=[self.input_var,
                                                    self.input_lens,
                                                    self.target_var,
                                                    self.weight],
                                            outputs=[self.prediction, self.loss],
                                            updates=updates)
        
        print "==> compiling test_fn"
        self.test_fn = theano.function(inputs=[self.input_var,
                                               self.input_lens,
                                               self.target_var,
                                               self.weight],
                                       outputs=[self.prediction, self.loss])
Exemplo n.º 32
0
def get_model():

    dtensor4 = T.TensorType('float32', (False,)*4)
    input_var = dtensor4('inputs')
    dtensor2 = T.TensorType('float32', (False,)*2)
    target_var = dtensor2('targets')

    # input layer with unspecified batch size
    layer_input     = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    layer_0         = DimshuffleLayer(layer_input, (0, 'x', 1, 2, 3))

    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_1         = batch_norm(Conv3DDNNLayer(incoming=layer_0, num_filters=64, filter_size=(3,3,3), stride=(1,3,3), pad='same', nonlinearity=leaky_rectify, W=Orthogonal()))
    layer_2         = MaxPool3DDNNLayer(layer_1, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_3         = DropoutLayer(layer_2, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_4         = batch_norm(Conv3DDNNLayer(incoming=layer_3, num_filters=128, filter_size=(3,3,3), stride=(1,3,3), pad='same', nonlinearity=leaky_rectify, W=Orthogonal()))
    layer_5         = MaxPool3DDNNLayer(layer_4, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_6         = DropoutLayer(layer_5, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_7         = batch_norm(Conv3DDNNLayer(incoming=layer_6, num_filters=256, filter_size=(3,3,3), stride=(1,3,3), pad='same', nonlinearity=leaky_rectify, W=Orthogonal()))
    layer_8         = MaxPool3DDNNLayer(layer_7, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_9         = DropoutLayer(layer_8, p=0.25)
    
    # Recurrent layer
    layer_10         = DimshuffleLayer(layer_9, (0,2,1,3,4))
    layer_11         = LSTMLayer(layer_10, num_units=612, hid_init=Orthogonal(), only_return_final=False)

    # Output Layer
    layer_systole    = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal())
    layer_diastole   = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal())
    layer_systole_1  = DropoutLayer(layer_systole, p=0.3)
    layer_diastole_1 = DropoutLayer(layer_diastole, p=0.3)

    layer_systole_2   = DenseLayer(layer_systole_1, 1, nonlinearity=None, W=Orthogonal())
    layer_diastole_2  = DenseLayer(layer_diastole_1, 1, nonlinearity=None, W=Orthogonal())
    layer_output      = ConcatLayer([layer_systole_2, layer_diastole_2])

    # Loss
    prediction           = get_output(layer_output) 
    loss                 = squared_error(prediction, target_var)
    loss                 = loss.mean()

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum Or Adam
    params               = get_all_params(layer_output, trainable=True)
    updates              = adam(loss, params)
    #updates_0            = rmsprop(loss, params)
    #updates              = apply_nesterov_momentum(updates_0, params)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction      = get_output(layer_output, deterministic=True)
    test_loss            = squared_error(test_prediction, target_var)
    test_loss            = test_loss.mean()

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn             = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)

    # Compile a second function computing the validation loss and accuracy
    val_fn               = theano.function([input_var, target_var], test_loss, allow_input_downcast=True)

    # Compule a third function computing the prediction
    predict_fn           = theano.function([input_var], test_prediction, allow_input_downcast=True)

    return [layer_output, train_fn, val_fn, predict_fn]