def build_model_example(batch_size, learning_rate, rng, x, y): nkerns = [64] print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = _dropout_from_layer(rng, x.reshape((batch_size, 7, 4, 7, 7)), p=0.2) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPool3dLayer(rng, input=layer0_input, image_shape=(batch_size, 7, 4, 7, 7), filter_shape=(nkerns[0], 5, 4, 5, 5), ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) # layer1 = LeNetConvPoolLayer(rng, input=layer0.output, # image_shape=(batch_size, nkerns[0], 12, 12), # filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer1_input = layer0.output.flatten(2) # layer1 = HiddenLayer(rng, input=x, n_in=4*7*7*7, layer1 = HiddenLayer(rng, input=layer1_input, n_in=nkerns[-1] * 3 ** 3, n_out=1000, activation=relu, ) # p=0.5) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer1.output, n_in=1000, n_out=1000, activation=relu) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=1000, n_out=2) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # params = layer3.params + layer0.params + layer1.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) return cost, layer3, updates
def __init__(self, label_struct, n_features, filter_h=1, presample_size=1, batch_size=512, nkerns=[10,10,10,30], dropout_rate=0): self.label_struct = label_struct self.batch_size = batch_size self.nkerns = nkerns self.dropout_rate = dropout_rate rng = numpy.random.RandomState(39316) print '...building the model' x = T.tensor3('x') xs = T.matrix('xs') y = T.imatrix('y') # Reshape matrix of signals of shape (batch_size, 50) to a 4D tensor layer0_input = x.dimshuffle(0, 'x', 1, 2) # Construct the first convolutional pooling layer: # filtering reduces the signal size to (num_sensor, (50-10)/2+1) = (1, 21) # 4D output tensor is thus of shape (batch_size, nkerns[0], num_sensor/filter_height, 21) if presample_size > 1: layer0_input = downsample.max_pool_2d(input=layer0_input, ds=(1,presample_size),) if filter_h == 0: filter_h = n_features['alg'] layer0 = ConvPoolLayer( rng, input=layer0_input, filter_shape=(nkerns[0], 1, filter_h, 10), signal_shape=(batch_size, 1, n_features['alg'], 50), stride=(filter_h, 2), ) layer1_input = T.switch(layer0.output < 0, 0, layer0.output) h = n_features['alg'] / filter_h # Construct the second convolutional pooling layer: # filtering reduces the signal size to (h, (21-5)/2+1) = (h, 9) # maxpooling reduces this further to (h, 9/3) = (h, 3) # 4D output tensor is thus of shape (batch_size, nkerns[1], h, 3) layer1 = ConvPoolLayer( rng, input=layer1_input, filter_shape=(nkerns[1], nkerns[0], 1, 5), signal_shape=(batch_size, nkerns[0], h, 21), stride=2, poolsize=3, ) layer2_input = T.switch(layer1.output < 0, 0, layer1.output) # Construct the third convolutional pooling layer: # filtering reduces the signal size to (h, (3-3)+1) = (h, 1) # 4D output tensor is thus of shape (batch_size, nkerns[2], h, 1) layer2 = ConvPoolLayer( rng, input=layer2_input, filter_shape=(nkerns[2], nkerns[1], 1, 3), signal_shape=(batch_size, nkerns[1], h, 3), ) # This will generate a matrix of shape (batch_size, nkerns[2]), convlayer_output = T.switch(layer2.output < 0, 0, layer2.output).flatten(2) layer3_input = T.concatenate([convlayer_output, xs], axis=1) n_in3 = nkerns[2] * h + n_features['xs'] # construct a fully-connected sigmoidal layer # dropout the input dropout_layer3_input = _dropout_from_layer(rng, layer3_input, p=dropout_rate) dropout_layer3 = DropoutHiddenLayer( rng=rng, input=dropout_layer3_input, n_in=n_in3, n_out=nkerns[3], activation=T.tanh, dropout_rate=dropout_rate, use_bias=True ) # reuser the parameters from the dropout layer here, in a different path through the graph layer3 = HiddenLayer( rng, input=layer3_input, W=dropout_layer3.W*(1-dropout_rate), b=dropout_layer3.b, n_in=n_in3, n_out=nkerns[3], activation=T.tanh ) dropout_layer4_input = T.switch(dropout_layer3.output < 0, 0, dropout_layer3.output) layer4_input = T.switch(layer3.output < 0, 0, layer3.output) n_in4 = nkerns[3] # classify the values of the fully-connected sigmoidal layer dropout_layer4 = GroupedLogisticRegression( input=dropout_layer4_input, n_in=n_in4, n_outs=label_struct ) layer4 = GroupedLogisticRegression( input=layer4_input, W=dropout_layer4.W*(1-dropout_rate), b=dropout_layer4.b, n_in=n_in4, n_outs=label_struct ) self.cost = layer4.negative_log_likelihood(y) self.dropout_cost = dropout_layer4.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent self.params = dropout_layer4.params + dropout_layer3.params + layer2.params + layer1.params + layer0.params self.layers = [layer0, layer1, layer2, dropout_layer3, layer3, dropout_layer4, layer4] self.x = x self.y = y self.xs = xs
def __init__(self, numpy_rng, n_ins_rgb=26880, n_ins_depth=26880, n_hidden_rgb=512, n_hidden_depth=512, n_selector_hidden_rgb=[256, 128], n_selector_hidden_depth=[256, 128], n_selector_hidden_joint=[256, 128], dropout_rate_rgb=0.5, dropout_rate_depth=0.5, dropout_rate_joint=0.5, dropout_s_rgb=[0.5, 0.5, 0.5], dropout_s_depth=[0.5, 0.5, 0.5], dropout_s_joint=[0.5, 0.5, 0.5], n_group=2, n_class=249, weight_decay=0.00004, activation=T.nnet.relu, selection_penalty=1): self.prediction1 = T.matrix('x1') self.prediction2 = T.matrix('x2') self.x1 = T.matrix('f1') self.x2 = T.matrix('f2') self.y = T.ivector('y') # the labels are presented as 1D vector of self.selection_penalty = theano.shared( numpy.asarray(selection_penalty, dtype=theano.config.floatX)) self.params = [] self.paramsForNeuronGroup = [] self.paramsForSelector = [] self.W = [] rng = numpy_rng srng = theano.tensor.shared_randomstreams.RandomStreams( rng.randint(999999)) dropout_input_rgb = _dropout_from_layer(srng, self.x1, p=dropout_rate_rgb) dropout_input_depth = _dropout_from_layer(srng, self.x2, p=dropout_rate_rgb) self.rgb_AHL = adaptiveHiddenLayer( rng=rng, input=self.x1, dropped_input=self.x1, n_in=n_ins_rgb, n_hidden=n_hidden_rgb, n_selector_hidden=n_selector_hidden_rgb, n_group=2, activation=T.nnet.relu, dropout_rate=dropout_rate_rgb, dropout_s=dropout_s_rgb, belta=1) self.params.extend(self.rgb_AHL.params) self.paramsForNeuronGroup.extend(self.rgb_AHL.params_n) self.paramsForSelector.extend(self.rgb_AHL.params_s) self.depth_AHL = adaptiveHiddenLayer( rng=rng, input=self.x2, dropped_input=self.x2, n_in=n_ins_depth, n_hidden=n_hidden_depth, n_selector_hidden=n_selector_hidden_depth, n_group=2, activation=T.nnet.relu, dropout_rate=dropout_rate_depth, dropout_s=dropout_s_depth, belta=1) self.params.extend(self.depth_AHL.params) self.paramsForNeuronGroup.extend(self.depth_AHL.params_n) self.paramsForSelector.extend(self.depth_AHL.params_s) joint_input = T.concatenate( [self.rgb_AHL.output, self.depth_AHL.output], axis=1) joint_dropout_input = T.concatenate( [self.rgb_AHL.dropped_output, self.depth_AHL.dropped_output], axis=1) self.joint_AHL = adaptiveHiddenLayer( rng=rng, input=joint_input, dropped_input=joint_dropout_input, n_in=n_hidden_rgb + n_hidden_depth, n_hidden=n_class, n_selector_hidden=n_selector_hidden_joint, n_group=2, activation='softmax', dropout_rate=dropout_rate_joint, dropout_s=dropout_s_joint, belta=1) self.params = self.joint_AHL.params + self.depth_AHL.params + self.rgb_AHL.params self.paramsForNeuronGroup = self.joint_AHL.params_n + self.depth_AHL.params_n + self.rgb_AHL.params_n self.paramsForSelector = self.joint_AHL.params_s + self.depth_AHL.params_s + self.rgb_AHL.params_s cost1 = T.log( self.joint_AHL.dropped_p_y_given_x[T.arange(self.y.shape[0]), self.y]) cost3 = self.joint_AHL.SBR + self.depth_AHL.SBR + self.rgb_AHL.SBR L2_norm = self.joint_AHL.L2_norm + self.depth_AHL.L2_norm + self.rgb_AHL.L2_norm self.y_pred = T.argmax(self.joint_AHL.p_y_given_x, axis=1) self.dropout_finetune_cost = -T.mean( cost1) + weight_decay * L2_norm - cost3 self.errors = T.mean(T.neq(self.y_pred, self.y))
def __init__(self, rng, input, dropped_input, n_in, n_hidden,n_selector_hidden=[256, 128], n_group=2, activation=T.nnet.relu, W_n=None, b_n=None, W_s=None, b_s=None, dropout_rate=0, dropout_s=[0.5, 0.5, 0.5], belta=1): n_selector_in = n_in #create shared variables of weights for neuron groups if W_n is None: W_n = [] for group_num in range(n_group): W_values = numpy.asarray( rng.randn(n_in, n_hidden ) * numpy.sqrt(2.0/(n_in + n_hidden)), dtype=theano.config.floatX ) if activation == theano.tensor.nnet.sigmoid: W_values *= 4 W_n.extend([theano.shared(value=W_values, name='W'+str(group_num), borrow=True)]) #create shared variables of bias for neuron groups if b_n is None: b_n = [] for group_num in range(n_group): b_values = numpy.zeros((n_hidden,), dtype=theano.config.floatX) theano.shared(value=b_values, name='b'+str(group_num), borrow=True) b_n.extend([theano.shared(value=b_values, name='b'+str(group_num), borrow=True)]) #create shared variables of weights for the selector if W_s is None: W_s = [] for layer_idx in range(len(n_selector_hidden)): if layer_idx == 0 : W_values = numpy.asarray( rng.randn(n_selector_in, n_selector_hidden[0] ) * numpy.sqrt(2.0/(n_selector_in + n_selector_hidden[0])), dtype=theano.config.floatX ) else: W_values = numpy.asarray( rng.randn(n_selector_hidden[layer_idx - 1], n_selector_hidden[layer_idx] ) * numpy.sqrt(2.0/(n_selector_hidden[layer_idx - 1] + n_selector_hidden[layer_idx])), dtype=theano.config.floatX ) if activation == theano.tensor.nnet.sigmoid: W_values *= 4 W_s.extend([theano.shared(value=W_values, name='Ws'+str(layer_idx), borrow=True)]) if len(n_selector_hidden) != 0 : W_values = numpy.zeros( (n_selector_hidden[-1], n_group), dtype=theano.config.floatX ) else: W_values = numpy.zeros( (n_in, n_group), dtype=theano.config.floatX ) W_s.extend([theano.shared(value=W_values, name='Wo', borrow=True)]) #create shared variables of biases for the selector if b_s is None: b_s = [] for layer_idx in range(len(n_selector_hidden)): if layer_idx == 0 : b_values = numpy.zeros((n_selector_hidden[0],), dtype=theano.config.floatX) else: b_values = numpy.zeros((n_selector_hidden[layer_idx],), dtype=theano.config.floatX) b_s.extend([theano.shared(value=b_values, name='bs'+str(layer_idx), borrow=True)]) b_values = numpy.zeros((n_group,), dtype=theano.config.floatX) b_s.extend([theano.shared(value=b_values, name='bo', borrow=True)]) #We construct the model sets. For the same input, we duplicate the 'n_group' times of outputs of those models. #And we collectes these layers into self.hidden_layers. self.hidden_layers = [] self.dropout_hidden_layers = [] ###preparing dropped input for dropout neuron groups srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999)) dropout_input = _dropout_from_layer(srng, dropped_input, p=dropout_rate) ###constructing neuron groups and its corresponding dropout layers if activation is 'softmax': for group_num in range(n_group): dropoutHiddenLayer = LogisticRegression( input=dropout_input, n_in=n_in, n_out=n_hidden, W=W_n[group_num], b=b_n[group_num], ) hiddenLayer = LogisticRegression( input=input, n_in=n_in, n_out=n_hidden, W=W_n[group_num] * (1 - dropout_rate), b=b_n[group_num], ) self.dropout_hidden_layers.append(dropoutHiddenLayer) self.hidden_layers.append(hiddenLayer) else: for group_num in range(n_group): dropoutHiddenLayer = HiddenLayer( rng=rng, input=dropout_input, n_in=n_in, n_out=n_hidden, activation=activation, W=W_n[group_num], b=b_n[group_num], ) hiddenLayer = HiddenLayer( rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=activation, W=W_n[group_num] * (1 - dropout_rate), b=b_n[group_num], ) self.hidden_layers.append(hiddenLayer) self.dropout_hidden_layers.append(dropoutHiddenLayer) ###constructing selector self.selector_hidden_layers = [] self.dropout_selector_hidden_layers = [] if len(n_selector_hidden) == 0 : ###preparing dropped input for dropout neuron groups dropout_selector_input = _dropout_from_layer(srng, dropped_input, p=dropout_s[0]) selectorDropoutLogRegressionLayer = LogisticRegression( input=dropout_selector_input, n_in=n_in, n_out=n_group, W=W_s[0], b=b_s[0] ) selectorLogRegressionLayer = LogisticRegression( input=input, n_in=n_in, n_out=n_group, W=W_s[0] * (1 - dropout_s[0]), b=b_s[0] ) self.selector_hidden_layers.append(selectorLogRegressionLayer) self.dropout_selector_hidden_layers.append(selectorDropoutLogRegressionLayer) else: for layer_idx in range(len(n_selector_hidden)): if layer_idx == 0: dropout_selector_input = _dropout_from_layer(srng, dropped_input, p=dropout_s[0]) else: dropout_selector_input = _dropout_from_layer(srng, self.dropout_selector_hidden_layers[-1].output, p=dropout_s[layer_idx]) if layer_idx == 0: dropoutSelectorHiddenLayer = HiddenLayer( rng=rng, input=dropout_selector_input, n_in=n_selector_in, n_out=n_selector_hidden[0], activation=T.nnet.relu, W=W_s[layer_idx], b=b_s[layer_idx], ) selectorHiddenLayer = HiddenLayer( rng=rng, input=input, n_in=n_selector_in, n_out=n_selector_hidden[0], activation=T.nnet.relu, W=W_s[layer_idx] * (1 - dropout_s[layer_idx]) , b=b_s[layer_idx], ) else: dropoutSelectorHiddenLayer = HiddenLayer( rng=rng, input=dropout_selector_input, n_in=n_selector_hidden[layer_idx-1], n_out=n_selector_hidden[layer_idx], activation=T.nnet.relu, W=W_s[layer_idx], b=b_s[layer_idx], ) selectorHiddenLayer = HiddenLayer( rng=rng, input=self.selector_hidden_layers[-1].output, n_in=n_selector_hidden[layer_idx-1], n_out=n_selector_hidden[layer_idx], activation=T.nnet.relu, W=W_s[layer_idx] * (1 - dropout_s[layer_idx]) , b=b_s[layer_idx], ) self.dropout_selector_hidden_layers.append(dropoutSelectorHiddenLayer) self.selector_hidden_layers.append(selectorHiddenLayer) ###preparing dropped input for dropout selector dropout_selector_input = _dropout_from_layer(srng, self.dropout_selector_hidden_layers[-1].output, p=dropout_s[-1]) selectorDropoutLogRegressionLayer = LogisticRegression( input=dropout_selector_input, n_in=n_selector_hidden[-1], n_out=n_group, W=W_s[-1], b=b_s[-1] ) selectorLogRegressionLayer = LogisticRegression( input=self.selector_hidden_layers[-1].output, n_in=n_selector_hidden[-1], n_out=n_group, W=W_s[-1] * (1 - dropout_s[-1]), b=b_s[-1] ) self.selector_hidden_layers.append(selectorLogRegressionLayer) self.dropout_selector_hidden_layers.append(selectorDropoutLogRegressionLayer) ####ADD hard thersholding #self.dropout_selector_hidden_layers[-1].p_y_given_x = hardtanh(self.dropout_selector_hidden_layers[-1].p_y_given_x) shape_size = input.shape[0] zero_vector = (T.zeros((shape_size, ), dtype='int32')) self.params = W_n + W_s + b_n + b_s self.params_n = W_n + b_n self.params_s = W_s + b_s self.W = W_n + W_s self.L2_norm = 0 for w in self.W: self.L2_norm = self.L2_norm + (w**2).sum() ###SBR penalty eplison = 1e-8 p1 = T.reshape(hardtanh(self.dropout_selector_hidden_layers[-1].p_y_given_x)[T.arange(shape_size), zero_vector + 0] ,newshape = (shape_size,1)) p2 = T.reshape(hardtanh(self.dropout_selector_hidden_layers[-1].p_y_given_x)[T.arange(shape_size), zero_vector + 1] ,newshape = (shape_size,1)) denominator = T.sum(self.dropout_selector_hidden_layers[-1].p_y_given_x) + eplison p1_r = T.clip(T.sum(T.switch(p1 >= 0.5, p1, 0)) / denominator, eplison, 1) p2_r = T.clip(T.sum(T.switch(p2 >= 0.5, p2, 0)) / denominator, eplison, 1) self.SBR = -(p1_r * T.log(p1_r) + p2_r * T.log(p2_r)) * belta ###define network output p1_test = T.reshape(self.selector_hidden_layers[-1].p_y_given_x[T.arange(shape_size), zero_vector + 0] ,newshape = (shape_size,1)) p2_test = T.reshape(self.selector_hidden_layers[-1].p_y_given_x[T.arange(shape_size), zero_vector + 1] ,newshape = (shape_size,1)) ###hard assigning #p1_test = T.switch(p1_test >= 0.5, 1, 0) #p2_test = T.switch(p2_test > 0.5, 1, 0) if activation is 'softmax': self.output = p1_test * self.hidden_layers[0].p_y_given_x + p2_test * self.hidden_layers[1].p_y_given_x self.dropout_output = p1 * self.dropout_hidden_layers[0].p_y_given_x + p2 * self.dropout_hidden_layers[1].p_y_given_x self.p_y_given_x = p1_test * self.hidden_layers[0].p_y_given_x + p2_test * self.hidden_layers[1].p_y_given_x self.dropped_p_y_given_x = p1 * self.dropout_hidden_layers[0].p_y_given_x + p2 * self.dropout_hidden_layers[1].p_y_given_x else: self.output = p1_test * self.hidden_layers[0].output + p2_test * self.hidden_layers[1].output self.dropped_output = p1 * self.dropout_hidden_layers[0].output + p2 * self.dropout_hidden_layers[1].output self.W_n = W_n self.b_n = b_n
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), p=0.5): super(DropoutLeNetConvPool3dLayer, self).__init__( rng=rng, input=input, filter_shape=filter_shape, image_shape=image_shape, poolsize=poolsize) self.output = _dropout_from_layer(rng, self.output, p=p)
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), convDropRate=0., poolDropRate=0.): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape) if convDropRate > 0: conv_out = _dropout_from_layer(rng, conv_out, p=convDropRate) # downsample each feature map individually, using maxpooling pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) if poolDropRate > 0: pooled_out = _dropout_from_layer(rng, pooled_out, p=poolDropRate) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]
def __init__(self, numpy_rng=None, useRelu=None, W_distribution=None, LayerNodes=None, dropout=None): self.n_layers = (len(LayerNodes) - 2) self.dA_layers = [] self.dropout_layers = [] self.layers = [] self.x = T.matrix('x') self.y = T.ivector('y') next_layer_input = self.x next_dropout_layer_input = _dropout_from_layer(numpy_rng, self.x, p=dropout[0]) weight_matrix_sizes = zip(LayerNodes, LayerNodes[1:]) layer_counter = 0 for n_in, n_out in weight_matrix_sizes[:-1]: if useRelu == True: activation = relu activation1 = relu if layer_counter == 0: activation2 = T.nnet.sigmoid else: activation2 = T.nnet.softplus else: activation = T.nnet.sigmoid activation1 = T.nnet.sigmoid activation2 = T.nnet.sigmoid W_bound = 4. * numpy.sqrt(6. / (n_in + n_out)) next_dropout_layer = DropoutHiddenLayer( numpy_rng=numpy_rng, input=next_dropout_layer_input, activation=activation, n_in=n_in, n_out=n_out, W_distribution=W_distribution, W_bound=W_bound, dropout_rate=dropout[layer_counter + 1]) self.dropout_layers.append(next_dropout_layer) next_dropout_layer_input = next_dropout_layer.output next_layer = HiddenLayer(numpy_rng=numpy_rng, input=next_layer_input, activation=activation, n_in=n_in, n_out=n_out, W=next_dropout_layer.W * (1 - dropout[layer_counter]), b=next_dropout_layer.b) self.layers.append(next_layer) dA_layer = dA(numpy_rng=numpy_rng, input=next_layer_input, useRelu=useRelu, activation1=activation1, activation2=activation2, n_visible=n_in, n_hidden=n_out, W=next_dropout_layer.W, b=next_dropout_layer.b) self.dA_layers.append(dA_layer) next_layer_input = next_layer.output if layer_counter == 0: self.L1 = abs(next_dropout_layer.W).sum() self.L2 = (next_dropout_layer.W**2).sum() else: self.L1 = self.L1 + abs(next_dropout_layer.W).sum() self.L2 = self.L2 + (next_dropout_layer.W**2).sum() layer_counter += 1 n_in, n_out = weight_matrix_sizes[-1] dropout_output_layer = LogisticRegression( input=next_dropout_layer_input, n_in=n_in, n_out=n_out) self.dropout_layers.append(dropout_output_layer) self.L1 = self.L1 + abs(dropout_output_layer.W).sum() self.L2 = self.L2 + (dropout_output_layer.W**2).sum() self.dropout_negative_log_likelihood = self.dropout_layers[ -1].negative_log_likelihood(self.y) output_layer = LogisticRegression(input=next_layer_input, n_in=n_in, n_out=n_out, W=dropout_output_layer.W * (1 - dropout[-1]), b=dropout_output_layer.b) self.layers.append(output_layer) self.error = self.layers[-1].error(self.y) self.sensitivity = self.layers[-1].sensitivity(self.y) self.specificity = self.layers[-1].specificity(self.y) self.class1_pred = self.layers[-1].class1_pred(self.y) self.params = [ param for layer in self.dropout_layers for param in layer.params ]
def __init__(self, label_struct, n_features, filter_h=1, presample_size=1, batch_size=512, nkerns=[10,10,10,30], dropout_rate=0): self.label_struct = label_struct self.batch_size = batch_size self.nkerns = nkerns self.dropout_rate = dropout_rate rng = numpy.random.RandomState(39316) print '...building the model' a = T.tensor3('a') l = T.tensor3('l') g = T.tensor3('g') m = T.matrix('m') y = T.imatrix('y') # Reshape matrix of signals of shape (batch_size, 50) to a 4D tensor acc_input = a.dimshuffle(0, 'x', 1, 2) lin_input = l.dimshuffle(0, 'x', 1, 2) gyro_input = g.dimshuffle(0, 'x', 1, 2) acc_wing = ConvWing(rng, acc_input, n_features['acc'], filter_h, presample_size, nkerns, batch_size) lin_wing = ConvWing(rng, lin_input, n_features['lin'], filter_h, presample_size, nkerns, batch_size) gyro_wing = ConvWing(rng, gyro_input, n_features['gyro'], filter_h, presample_size, nkerns, batch_size) layer3_input = T.concatenate([acc_wing.output, lin_wing.output, gyro_wing.output, m], axis=1) # n_in3 = nkerns[2] * 3 + n_features['mag'] n_in3 = acc_wing.outdim + lin_wing.outdim + gyro_wing.outdim + n_features['mag'] # construct a fully-connected sigmoidal layer # dropout the input dropout_layer3_input = _dropout_from_layer(rng, layer3_input, p=dropout_rate) dropout_layer3 = DropoutHiddenLayer( rng=rng, input=dropout_layer3_input, n_in=n_in3, n_out=nkerns[3], activation=T.tanh, dropout_rate=dropout_rate, use_bias=True ) # reuser the parameters from the dropout layer here, in a different path through the graph layer3 = HiddenLayer( rng, input=layer3_input, W=dropout_layer3.W*(1-dropout_rate), b=dropout_layer3.b, n_in=n_in3, n_out=nkerns[3], activation=T.tanh ) dropout_layer4_input = T.switch(dropout_layer3.output < 0, 0, dropout_layer3.output) layer4_input = T.switch(layer3.output < 0, 0, layer3.output) n_in4=nkerns[3] # classify the values of the fully-connected sigmoidal layer #layer4 = LogisticRegression(input=layer4_input, n_in=nkerns[3], n_out=numpy.sum(label_struct)) #layer4 = GroupedLogisticRegression(input=layer4_input, n_in=nkerns[3], n_outs=label_struct) dropout_layer4 = GroupedLogisticRegression( input=dropout_layer4_input, n_in=n_in4, n_outs=label_struct ) layer4 = GroupedLogisticRegression( input=layer4_input, W=dropout_layer4.W*(1-dropout_rate), b=dropout_layer4.b, n_in=n_in4, n_outs=label_struct ) self.cost = layer4.negative_log_likelihood(y) self.dropout_cost = dropout_layer4.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent self.params = dropout_layer4.params + dropout_layer3.params + acc_wing.params + lin_wing.params + gyro_wing.params self.layers = [acc_wing, lin_wing, gyro_wing, dropout_layer3, layer3, dropout_layer4, layer4] self.a = a self.l = l self.g = g self.y = y self.m = m