def encoder(x, params, config): mb_size = config['mb_size'] num_hidden = config['num_hidden'] x = T.specify_shape(x, (128, 1, 28, 28)) #c_1 = ConvPoolLayer(in_length = 4000, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = True, W = params['Wc_enc_1'], b = params['bc_enc_1']) #c_2 = ConvPoolLayer(in_length = 399, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = True, W = params['Wc_enc_2'], b = params['bc_enc_2']) #c_3 = ConvPoolLayer(in_length = 38, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = True, W = params['Wc_enc_3'], b = params['bc_enc_3']) h_out_1 = HiddenLayer(num_in = 784, num_out = num_hidden, W = params['W_enc_1'], b = params['b_enc_1'], activation = 'relu', batch_norm = True) h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_enc_2'], b = params['b_enc_2'], activation = 'relu', batch_norm = True) print "x ndim", x.ndim #c_1_value = T.specify_shape(c_1.output(x), (128, 96, 16, 16)) #c_2_value = c_2.output(c_1_value) #c_3_value = c_3.output(c_2_value) h_out_1_value = T.specify_shape(h_out_1.output(x.flatten(2)), (128, num_hidden)) h_out_2_value = h_out_2.output(h_out_1_value) return {'h' : h_out_2_value}
def __init__(self, data, all_y_trues, neurons_in_hl=[0]): # Data Members self.data = data self.all_y_trues = all_y_trues self.hidden_layers = [] # # Hidden Layer 1 # self.hl = HiddenLayer(neurons_in_hl[0]) # For each hidden layer save in the hidden_layer array amount_of_weights = len(self.data[0]) for hl_count in range(len(neurons_in_hl)): hl = HiddenLayer(neurons_in_hl[hl_count]) for neuron in hl.neurons(): neuron.changeProps( [np.random.normal() for i in range(amount_of_weights)], np.random.normal()) amount_of_weights = neurons_in_hl[hl_count] self.hidden_layers.append(hl) # Output Neuron self.o1 = Neuron( [np.random.normal() for i in range(amount_of_weights)], np.random.normal())
class MLP(object): def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): self.x = input self.y = label if rng is None: rng = np.random.RandomState(1234) # construct hidden layer self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, rng=rng, activation=tanh) # construct log_layer self.log_layer = LR(input=self.hidden_layer.output, label=self.y, n_in=n_hidden, n_out=n_out) def train(self): # forward hidden_layer layer_input = self.hidden_layer.forward() # forward & backward log_layer self.log_layer.train(input=layer_input) # backward hidden_layer self.hidden_layer.backward(prev_layer=self.log_layer) def predict(self, x): x = self.hidden_layer.output(input=x) return self.log_layer.predict(x)
class MLP(object): def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): self.x = input self.y = label if rng is None: rng = numpy.random.RandomState(1234) # construct hidden_layer (tanh or sigmoid so far) self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, rng=rng, activation=numpy.tanh) # construct log_layer (softmax) self.log_layer = LogisticRegression(input=self.hidden_layer.output, label=self.y, n_in=n_hidden, n_out=n_out) def train(self): layer_input = self.hidden_layer.forward() self.log_layer.train(input=layer_input) self.hidden_layer.backward(prev_layer=self.log_layer) def predict(self, x): x = self.hidden_layer.output(x) return self.log_layer.predict(x)
def __init__(self, att_number, learn_rate, epochs, n_hidden_layer, n_output_layer): self.epochs = epochs self.learn_rate = learn_rate self.att_number = att_number self.n_hidden_layer = n_hidden_layer self.n_output_layer = n_output_layer self.hidden_layer = HiddenLayer(att_number, learn_rate, n_hidden_layer) self.output_layer = OutputLayer(n_hidden_layer, learn_rate, n_output_layer)
def __init__(self, N, label, n_hidden, n_out, image_size, channel, n_kernels, kernel_sizes, pool_sizes, rng=None, activation=ReLU): if rng is None: rng = numpy.random.RandomState(1234) self.N = N self.n_hidden = n_hidden self.n_kernels = n_kernels self.pool_sizes = pool_sizes self.conv_layers = [] self.conv_sizes = [] # construct 1st conv_layer conv_layer0 = ConvPoolLayer(N, image_size, channel, n_kernels[0], kernel_sizes[0], pool_sizes[0], rng, activation) self.conv_layers.append(conv_layer0) conv_size = [ (image_size[0] - kernel_sizes[0][0] + 1) / pool_sizes[0][0], (image_size[1] - kernel_sizes[0][1] + 1) / pool_sizes[0][1] ] self.conv_sizes.append(conv_size) # construct 2nd conv_layer conv_layer1 = ConvPoolLayer(N, conv_size, n_kernels[0], n_kernels[1], kernel_sizes[1], pool_sizes[1], rng, activation) self.conv_layers.append(conv_layer1) conv_size = [ (conv_size[0] - kernel_sizes[1][0] + 1) / pool_sizes[1][0], (conv_size[1] - kernel_sizes[1][0] + 1) / pool_sizes[1][1] ] self.conv_sizes.append(conv_size) # construct hidden_layer self.hidden_layer = HiddenLayer( None, n_kernels[-1] * conv_size[0] * conv_size[1], n_hidden, None, None, rng, activation) # construct log_layer self.log_layer = LogisticRegression(None, label, n_hidden, n_out)
def decoder(z, z_extra, params, config): mb_size = config['mb_size'] num_latent = config['num_latent'] num_hidden = config['num_hidden'] h_out_1 = HiddenLayer(num_in = num_latent, num_out = num_hidden, W = params['W_dec_1'], b = params['b_dec_1'], activation = 'relu', batch_norm = True) h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_dec_2'], b = params['b_dec_2'], activation = 'relu', batch_norm = True) h_out_3 = HiddenLayer(num_in = num_hidden, num_out = 4096, activation = 'relu', W = params['W_dec_3'], b = params['b_dec_3'], batch_norm = True) c1 = DeConvLayer(in_channels = 512, out_channels = 256, activation = 'relu', up_rate = 5, W = params['Wc_dec_1'], b = params['bc_dec_1'], batch_norm = True) c2 = DeConvLayer(in_channels = 256, out_channels = 128, activation = 'relu', up_rate = 10, W = params['Wc_dec_2'], b = params['bc_dec_2'], batch_norm = False) c3 = DeConvLayer(in_channels = 128, out_channels = 1, activation = None, up_rate = 10, W = params['Wc_dec_3'], b = params['bc_dec_3'], batch_norm = False) z = T.concatenate([z,z_extra], axis = 1) h_out_1_value = h_out_1.output(z) h_out_2_value = h_out_2.output(h_out_1_value) h_out_3_value = h_out_3.output(h_out_2_value) c1_o = c1.output(h_out_3_value.reshape((128,512,8,1))) c2_o = c2.output(c1_o) c3_o = c3.output(c2_o) out = c3_o.reshape((128,4000)) return {'h' : out}
def __init__(self, rng, input, n_in, n_hidden, n_out, srng=None, dropout_rate=0, activation='tanh', outputActivation='softmax', params=None): """Initialize the parameters for the multilayer perceptron rng: random number generator, e.g. numpy.random.RandomState(1234) input: theano.tensor matrix of shape (n_examples, n_in) n_in: int, dimensionality of input n_hidden: int, number of hidden units n_out: int, number of hidden units dropout_rate: float, if dropout_rate is non zero, then we implement a Dropout in the hidden layer activation: string, nonlinearity to be applied in the hidden layer """ hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=activation, params=maybe(lambda: params[0])) h = hiddenLayer.output if dropout_rate > 0: assert (srng is not None) h = dropout(srng, dropout_rate, h) outputLayer = HiddenLayer(rng=rng, input=h, n_in=n_hidden, n_out=n_out, activation=outputActivation, params=maybe(lambda: params[1])) self.layers = [hiddenLayer, outputLayer] self.params = layers_params(self.layers) self.L1 = layers_L1(self.layers) self.L2_sqr = layers_L2_sqr(self.layers) self.output = outputLayer.output
def __init__(self, n_voc, trainset, testset,dataname, classes, prefix): if prefix != None: prefix += '/' self.trainset = trainset self.testset = testset docs = T.imatrix() label = T.ivector() length = T.fvector() sentencenum = T.fvector() wordmask = T.fmatrix() sentencemask = T.fmatrix() maxsentencenum = T.iscalar() isTrain = T.iscalar() rng = numpy.random layers = [] layers.append(EmbLayer(rng, docs, n_voc, 200, 'emblayer', dataname, prefix)) layers.append(LSTMLayer(rng, layers[-1].output, wordmask, 200, 200, 'wordlstmlayer', prefix)) layers.append(MeanPoolLayer(layers[-1].output, length)) layers.append(SentenceSortLayer(layers[-1].output,maxsentencenum)) layers.append(LSTMLayer(rng, layers[-1].output, sentencemask, 200, 200, 'sentencelstmlayer', prefix)) layers.append(MeanPoolLayer(layers[-1].output, sentencenum)) layers.append(HiddenLayer(rng, layers[-1].output, 200, 200, 'fulllayer', prefix)) layers.append(HiddenLayer(rng, layers[-1].output, 200, int(classes), 'softmaxlayer', prefix, activation=T.nnet.softmax)) self.layers = layers cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]), label], acc_dtype='float32') correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label), acc_dtype='int32') err = T.argmax(layers[-1].output, axis=1) - label mse = T.sum(err * err) params = [] for layer in layers: params += layer.params L2_rate = numpy.float32(1e-5) for param in params[1:]: cost += T.sum(L2_rate * (param * param), acc_dtype='float32') gparams = [T.grad(cost, param) for param in params] updates = AdaUpdates(params, gparams, 0.95, 1e-6) self.train_model = theano.function( inputs=[docs, label,length,sentencenum,wordmask,sentencemask,maxsentencenum], outputs=cost, updates=updates, ) self.test_model = theano.function( inputs=[docs, label,length,sentencenum,wordmask,sentencemask,maxsentencenum], outputs=[correct, mse], )
def encoder(x, params, config): mb_size = config['mb_size'] num_hidden = config['num_hidden'] h_out_1 = HiddenLayer(num_in = 4000, num_out = num_hidden, W = params['W_enc_1'], b = params['b_enc_1'], activation = 'relu', batch_norm = True) h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_enc_2'], b = params['b_enc_2'], activation = 'relu', batch_norm = True) h_out_1_value = h_out_1.output(x) h_out_2_value = h_out_2.output(h_out_1_value) return {'h' : h_out_2_value}
def __init__(self, rng, input, n_hidden, n_out, embeddingsLookups, embedding_matrix_to_update): """Initialize the parameters for the multilayer perceptron """ self.n_hidden = n_hidden self.n_out = n_out self.ft_names = [] #for ft in embeddingsLookups: # self.ft_names.append(ft.getName()) # First a lookup layer to map indices to their corresponding embedding vector self.embeddingLayer = EmbeddingLayer(input, embeddingsLookups) # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer(rng=rng, input=self.embeddingLayer.output, n_in=self.embeddingLayer.n_out, n_out=n_hidden, activation=T.tanh) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = SoftmaxLayer(input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \ + (self.logRegressionLayer.W ** 2).sum() # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params + embedding_matrix_to_update
def __construct_layers(self, n_ins, n_outs, hidden_layer_sizes, rng): for i in range(self.n_layers): # layer_input if i == 0: input_size = n_ins layer_input = self.x else: input_size = hidden_layer_sizes[i - 1] layer_input = self.sigmoid_layers[-1].sample_h_given_v() sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) rbm_layer = RBM(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # ay = self.sigmoid_layers[-1].sample_h_given_v() # print("sigmoid_layers", ay.shape, sigmoid_layer.W.shape) self.lr_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) self.finetune_cost = self.lr_layer.negative_log_likelihood()
def __init__(self, input=None, label=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM( input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood()
def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): """ n_hidden: python list represent the hidden dimention """ self.x = input self.y = label if rng is None: rng = numpy.random.RandomState(1234) # construct hidden_layer layers_dim = numpy.hstack([n_in, n_hidden]) self.hidden_layer = [] for hidden_idx in xrange(len(layers_dim) - 1): self.hidden_layer.append( HiddenLayer(input=self.x, n_in=layers_dim[hidden_idx], n_out=layers_dim[hidden_idx + 1], rng=rng, activation=tanh)) # construct log_layer self.log_layer = LogisticRegression(input=self.hidden_layer[-1].output, label=self.y, n_in=n_hidden[-1], n_out=n_out)
def decoder(z, params, config): mb_size = config['mb_size'] num_latent = config['num_latent'] num_hidden = config['num_hidden'] h_out_1 = HiddenLayer(num_in = num_latent, num_out = num_hidden, W = params['W_dec_1'], b = params['b_dec_1'], activation = 'relu', batch_norm = True) h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_dec_2'], b = params['b_dec_2'], activation = 'relu', batch_norm = True) h_out_3 = DenseLayer((mb_size, num_hidden), num_units = 4000, nonlinearity=None, W = params['W_dec_3'], b = params['b_dec_3']) h_out_1_value = h_out_1.output(z) h_out_2_value = h_out_2.output(h_out_1_value) h_out_3_value = h_out_3.get_output_for(h_out_2_value) return {'h' : h_out_3_value}
def build_model(self, n_classes, learning_rate): self.model = [] self.model.append(ConvolutionLayer(n_filters=32, filter_size=(3,3), learning_rate=learning_rate)) self.model.append(ConvolutionLayer(n_filters=64, filter_size=(3,3), learning_rate=learning_rate)) self.model.append(MaxPoolLayer(window_size=(2, 2), stride=2)) self.model.append(FlattenLayer()) self.model.append(HiddenLayer(n_neurons=128, input_shape=294912, activation_function=relu, learning_rate=learning_rate)) self.model.append(OutputLayer(n_classes, input_shape=128, activation_function=sigmoid, learning_rate=learning_rate))
def discriminator(x, z, params, mb_size, num_hidden, num_latent): import random as rng srng = theano.tensor.shared_randomstreams.RandomStreams(420) c_1 = ConvPoolLayer(in_channels = 1, out_channels = 128, in_length = 4000, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = False, W = params['W_c_1'], b = params['b_c_1']) c_2 = ConvPoolLayer(in_channels = 128, out_channels = 256, in_length = 399, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = False, W = params['W_c_2'], b = params['b_c_2']) c_3 = ConvPoolLayer(in_channels = 256, out_channels = 512, in_length = 38, batch_size = mb_size, kernel_len = 10, stride = 5, activation = "relu", batch_norm = False, W = params['W_c_3'], b = params['b_c_3']) c_h_1 = HiddenLayer(num_in = 6 * 512, num_out = num_hidden, W = params['W_ch_1'], b = params['b_ch_1'], activation = 'relu', batch_norm = False) h_out_1 = HiddenLayer(num_in = num_hidden + num_latent, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_1'], b = params['b_disc_1']) h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_2'], b = params['b_disc_2']) h_out_3 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_3'], b = params['b_disc_3']) h_out_4 = HiddenLayer(num_in = num_hidden, num_out = 1, activation = None, batch_norm = False, W = params['W_disc_4'], b = params['b_disc_4']) c_1_value = T.specify_shape(c_1.output(dropout(x, 0.8).reshape((128,1,4000))), (128,128,399)) c_2_value = T.specify_shape(c_2.output(c_1_value), (128,256,38)) c_3_value = T.specify_shape(c_3.output(c_2_value), (128,512,6)) c_h_1_value = c_h_1.output(c_3_value.flatten(2)) h_out_1_value = dropout(h_out_1.output(T.concatenate([z, c_h_1_value], axis = 1))) h_out_2_value = dropout(h_out_2.output(h_out_1_value), 0.2) h_out_3_value = dropout(h_out_3.output(h_out_2_value), 0.2) h_out_4_value = h_out_4.output(h_out_3_value) raw_y = h_out_4_value classification = T.nnet.sigmoid(raw_y) results = {'c' : classification} return results
def __init__(self, input=None, label=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.dA_layers = [] self.n_layers = len(hidden_layer_sizes) if rng is None: rng = np.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) dA_layer = dA(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.dA_layers.append(dA_layer) self.log_layer = LR(input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) self.finetune_cost = self.log_layer.negative_log_likelihood()
def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): self.x = input self.y = label if rng is None: rng = np.random.RandomState(1234) # construct hidden layer self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, rng=rng, activation=tanh) # construct log_layer self.log_layer = LR(input=self.hidden_layer.output, label=self.y, n_in=n_hidden, n_out=n_out)
def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): self.x = input self.y = label if rng is None: rng = numpy.random.RandomState(1234) # construct hidden_layer (tanh or sigmoid so far) self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, rng=rng, activation=numpy.tanh) # construct log_layer (softmax) self.log_layer = LogisticRegression(input=self.hidden_layer.output, label=self.y, n_in=n_hidden, n_out=n_out)
def build_model(self, input_shape, hidden_shape, output_shape, learning_rate, is_agent_mode_enabled): self.input_layer = InputLayer() self.hidden_layers = [] last_output_shape = input_shape for shape in hidden_shape: self.hidden_layers.append(HiddenLayer(shape, last_output_shape, sigmoid, learning_rate, is_agent_mode_enabled)) last_output_shape = shape last_layer_output_shape = self.hidden_layers[-1].get_output_shape() self.output_layer = OutputLayer(output_shape, last_layer_output_shape, sigmoid, learning_rate)
def __init__(self, rng=rng, input_shape=1, output_shape=1, dropout=0.7): self.nslices = 4 self.dropout0 = DropoutLayer(dropout, rng=rng) self.dropout1 = DropoutLayer(dropout, rng=rng) self.dropout2 = DropoutLayer(dropout, rng=rng) self.activation = ActivationLayer('ELU') self.W0 = HiddenLayer((self.nslices, 512, input_shape-1), rng=rng, gamma=0.01) self.W1 = HiddenLayer((self.nslices, 512, 512), rng=rng, gamma=0.01) self.W2 = HiddenLayer((self.nslices, output_shape, 512), rng=rng, gamma=0.01) self.b0 = BiasLayer((self.nslices, 512)) self.b1 = BiasLayer((self.nslices, 512)) self.b2 = BiasLayer((self.nslices, output_shape)) self.layers = [ self.W0, self.W1, self.W2, self.b0, self.b1, self.b2] self.params = sum([layer.params for layer in self.layers], [])
def encoder(x, params, config): mb_size = config['mb_size'] num_hidden = config['num_hidden'] c_1 = ConvPoolLayer(in_channels = 1, out_channels = 128, in_length = 4000, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = True, W = params['Wc_enc_1'], b = params['bc_enc_1']) c_2 = ConvPoolLayer(in_channels = 128, out_channels = 256, in_length = 399, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = True, W = params['Wc_enc_2'], b = params['bc_enc_2']) c_3 = ConvPoolLayer(in_channels = 256, out_channels = 512, in_length = 38, batch_size = mb_size, kernel_len = 10, stride = 5, activation = "relu", batch_norm = True, W = params['Wc_enc_3'], b = params['bc_enc_3']) h_out_1 = HiddenLayer(num_in = 512 * 6, num_out = num_hidden, W = params['W_enc_1'], b = params['b_enc_1'], activation = 'relu', batch_norm = True) h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_enc_2'], b = params['b_enc_2'], activation = 'relu', batch_norm = True) print "x ndim", x.ndim c_1_value = T.specify_shape(c_1.output(x.reshape((128,1,4000))), (128, 128, 399)) c_2_value = c_2.output(c_1_value) c_3_value = c_3.output(c_2_value) h_out_1_value = h_out_1.output(c_3_value.flatten(2)) h_out_2_value = h_out_2.output(h_out_1_value) return {'h' : h_out_2_value}
def __init__(self, rng, input, n_in, n_out, layer_sizes=[], dropout_rate=0, srng=None, activation='tanh', outputActivation='softmax', params=None): """Initialize the parameters for the multilayer perceptron rng: random number generator, e.g. numpy.random.RandomState(1234) input: theano.tensor matrix of shape (n_examples, n_in) n_in: int, dimensionality of input layer_sizes: array of ints, dimensionality of the hidden layers n_out: int, number of hidden units dropout_rate: float, if dropout_rate is non zero, then we implement a Dropout in the hidden layer activation: string, nonlinearity to be applied in the hidden layer """ ff = ForwardFeed( rng=rng, input=input, layer_sizes=[n_in] + layer_sizes, activation=activation, params=maybe(lambda: params[0]), dropout_rate=dropout_rate, srng=srng, ) outputLayer = HiddenLayer(rng=rng, input=ff.output, n_in=layer_sizes[-1], n_out=n_out, activation=outputActivation, params=maybe(lambda: params[1])) self.layers = [ff, outputLayer] self.params = layers_params(self.layers) self.L1 = layers_L1(self.layers) self.L2_sqr = layers_L2_sqr(self.layers) self.output = outputLayer.output
def __init__(self, rng, input, n_hidden, n_out, embeddingsLookups, embedding_matrix_to_update): """Initialize the parameters for the multilayer perceptron """ self.n_hidden = n_hidden self.n_out = n_out self.ft_names = [] #for ft in embeddingsLookups: # self.ft_names.append(ft.getName()) # First a lookup layer to map indices to their corresponding embedding vector self.embeddingLayer = EmbeddingLayer(input, embeddingsLookups) # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer(rng=rng, input=self.embeddingLayer.output, n_in=self.embeddingLayer.n_out, n_out=n_hidden, activation=T.tanh) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = SoftmaxLayer( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \ + (self.logRegressionLayer.W ** 2).sum() # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params + embedding_matrix_to_update
def __init__(self, learningrate, n_features, num_of_hidden_layers, Ws, bs): self.learning_rate = learningrate self.n_class = Ws[-1].shape[1] self.features = n_features self.L = num_of_hidden_layers + 1 self.input_layer = InputLayer() self.output_layer = OutputLayer() self.hidden_layers = np.array([]) self.connections = np.array([]) for i in range(num_of_hidden_layers): self.hidden_layers = np.append(self.hidden_layers, HiddenLayer()) for i in range(1, self.L + 1): self.connections = np.append(self.connections, Connection()) self.connections[i - 1].set(Ws[i - 1], bs[i - 1], i - 1, i) return
class MLP(object): def __init__(self, att_number, learn_rate, epochs, n_hidden_layer, n_output_layer): self.epochs = epochs self.learn_rate = learn_rate self.att_number = att_number self.n_hidden_layer = n_hidden_layer self.n_output_layer = n_output_layer self.hidden_layer = HiddenLayer(att_number, learn_rate, n_hidden_layer) self.output_layer = OutputLayer(n_hidden_layer, learn_rate, n_output_layer) def feedforward(self, inputs, expected): self.hidden_layer.run_layer(inputs) self.output_layer.run_layer(self.hidden_layer.outputs, expected) self.hidden_layer.update_layer(inputs, self.output_layer) def train(self, train_data, att): for _ in range(self.epochs): np.random.shuffle(train_data) for d in train_data: selected_inputs, expected = self.inputs_and_expected(d, att) self.feedforward(selected_inputs, expected) def test(self, test_data, att): hits = 0 for data in test_data: selected_inputs, expected = self.inputs_and_expected(data, att) self.hidden_layer.run_layer(selected_inputs) outputs = self.output_layer.run_test(self.hidden_layer.outputs) predict = self.predict(outputs) hits = hits + 1 if np.array_equal(predict, expected) else hits return (hits / len(test_data)) * 100 def get_predict(self, inputs): hiden_out = self.hidden_layer.run_layer(inputs) out_out = self.output_layer.run_test(hiden_out) return self.predict(out_out) @staticmethod def inputs_and_expected(d, att): expected = np.array([d[len(d)-1]]) if (isinstance(d[len(d)-1], np.floating)) else np.array(list(d[len(d)-1])).astype(np.int) selected_inputs = [d[att[i]] for i in range(len(att))] return selected_inputs, expected @staticmethod def predict(outputs): predict = [1 if output == np.amax(outputs) else 0 for output in outputs] return predict
def discriminator(x, z, params, mb_size, num_hidden, num_latent): import random as rng srng = theano.tensor.shared_randomstreams.RandomStreams(420) #c_1 = ConvPoolLayer(in_length = 4000, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = False, W = params['W_c_1'], b = params['b_c_1']) #c_2 = ConvPoolLayer(in_length = 399, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = False, W = params['W_c_2'], b = params['b_c_2']) #c_3 = ConvPoolLayer(in_length = 38, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = False, W = params['W_c_3'], b = params['b_c_3']) #c_h_1 = HiddenLayer(num_in = 6 * 512, num_out = num_hidden, W = params['W_ch_1'], b = params['b_ch_1'], activation = 'relu', batch_norm = False) h_out_1 = HiddenLayer(num_in = num_hidden + num_latent, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_1'], b = params['b_disc_1']) h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_2'], b = params['b_disc_2']) h_out_3 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_3'], b = params['b_disc_3']) h_out_4 = HiddenLayer(num_in = num_hidden, num_out = 1, activation = None, batch_norm = False, W = params['W_disc_4'], b = params['b_disc_4']) #c_1_value = c_1.output(dropout(x, 0.8)) #c_2_value = c_2.output(c_1_value) #c_3_value = c_3.output(c_2_value) #c_h_1_value = c_h_1.output(c_3_value.flatten(2)) h_out_1_value = dropout(h_out_1.output(T.concatenate([z, dropout(noise(x.flatten(2)), 0.8)], axis = 1)), 0.5) h_out_2_value = dropout(h_out_2.output(h_out_1_value), 0.5) h_out_3_value = dropout(h_out_3.output(h_out_2_value), 0.5) h_out_4_value = h_out_4.output(h_out_3_value) raw_y = h_out_4_value classification = T.nnet.sigmoid(raw_y) results = {'c' : classification} return results
def __init__(self, input, label,\ n_in, hidden_layer_sizes, n_out,\ rng=None, activation=ReLU): self.x = input self.y = label self.hidden_layers = [] self.n_layers = len(hidden_layer_sizes) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in range(self.n_layers): # layer_size if i == 0: input_size = n_in else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.hidden_layers[-1].output() # construct hidden_layer hidden_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=activation) self.hidden_layers.append(hidden_layer) # layer for ouput using Logistic Regression (softmax) self.log_layer = LogisticRegression( input=self.hidden_layers[-1].output(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_out)
def __init__(self, rng, input, layer_sizes=[], dropout_rate=0, srng=None, params=None, activation='tanh'): """Initialize the parameters for the forward feed rng: random number generator, e.g. numpy.random.RandomState(1234) input: theano.tensor matrix of shape (n_examples, n_in) layer_sizes: array of ints, dimensionality of each layer size, input to output activation: string, nonlinearity to be applied in the hidden layer """ output = input layers = [] for i in range(0, len(layer_sizes) - 1): hiddenLayer = HiddenLayer(rng=rng, input=output, params=maybe(lambda: params[i]), n_in=layer_sizes[i], n_out=layer_sizes[i + 1], activation=activation) h = hiddenLayer.output if dropout_rate > 0: assert (srng is not None) h = dropout(srng, dropout_rate, h) output = h layers.append(hiddenLayer) self.layers = layers self.output = output self.params = layers_params(self.layers) self.L1 = layers_L1(self.layers) self.L2_sqr = layers_L2_sqr(self.layers)
def __init__(self, input, label, n_in, hidden_layer_sizes, n_out, rng=None, activation=ReLU): self.x = input self.y = label self.hidden_layers = [] self.n_layers = len(hidden_layer_sizes) if rng is None: rng = np.random.RandomState(1234) assert self.n_layers > 0 for i in xrange(self.n_layers): if i == 0: input_size = n_in else: input_size = hidden_layer_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.hidden_layers[-1].output() hidden_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=activation) self.hidden_layers.append(hidden_layer) self.log_layer = LR(input=self.hidden_layers[-1].output(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_out)
class Network: __A = (1, 0, 0, 0, 0, 0, 0, 0, 0, 0) __B = (0, 1, 0, 0, 0, 0, 0, 0, 0, 0) __C = (0, 0, 1, 0, 0, 0, 0, 0, 0, 0) __D = (0, 0, 0, 1, 0, 0, 0, 0, 0, 0) __E = (0, 0, 0, 0, 1, 0, 0, 0, 0, 0) __F = (0, 0, 0, 0, 0, 1, 0, 0, 0, 0) __G = (0, 0, 0, 0, 0, 0, 1, 0, 0, 0) __H = (0, 0, 0, 0, 0, 0, 0, 1, 0, 0) __I = (0, 0, 0, 0, 0, 0, 0, 0, 1, 0) __J = (0, 0, 0, 0, 0, 0, 0, 0, 0, 1) def __init__(self, training_set, test_set, option): self.__inLayer = InputLayer(training_set, test_set, option) self.__hidLayer = HiddenLayer(28 * 28, 30, option) self.__outLayer = OutputLayer(30, 10, option) self.option = option def set_hid_weights(self, weights): self.__hidLayer.set_weights(weights) return self def set_out_weights(self, weights): self.__outLayer.set_weights(weights) return self def set_hid_bias(self, bias): self.__hidLayer.set_bias(bias) return self def set_out_bias(self, bias): self.__outLayer.set_bias(bias) return self def train(self, last_time): loss = 0 for i in range(self.__inLayer.training_set_size()): desired_output = self.get_desired_output( self.__inLayer.get_training_label(i)) self.__outLayer.set_desired_output(desired_output) inp = self.__inLayer.get_image(i) if self.option.is_dropout(): prob = np.random.randint(0, 2, (1, 784)) inp = np.multiply(inp, prob) self.__hidLayer.calc(inp) hid = self.__hidLayer.get_output() if self.option.is_dropout(): prob = np.random.randint(0, 2, (1, 30)) hid = np.multiply(hid, prob) self.__outLayer.calc(hid) loss += self.__outLayer.loss_function() self.__outLayer.back_propagate(hid) self.__hidLayer.back_propagate(inp, self.__outLayer) if last_time: np.savez_compressed('weights', hid_weights=self.__hidLayer.get_weights(), out_weights=self.__outLayer.get_weights(), hid_bias=self.__hidLayer.get_bias(), out_bias=self.__outLayer.get_bias()) return loss / self.__inLayer.training_set_size() def test(self, last_time): count = 0 loss = 0 for i in range(self.__inLayer.test_set_size()): desired_output = self.get_desired_output( self.__inLayer.get_test_label(i)) self.__outLayer.set_desired_output(desired_output) inp = self.__inLayer.get_test_image(i) self.__hidLayer.calc(inp) self.__outLayer.calc(self.__hidLayer.get_output()) loss += self.__outLayer.loss_function() must_be = self.__inLayer.get_test_label(i) y_predict = np.zeros(10, dtype=np.int) max_i = np.argmax(self.__outLayer.get_output()) y_predict[max_i] = 1 prediction = self.prediction(tuple(y_predict)) if must_be == prediction: count += 1 if last_time: print("accuracy: %.2f%%" % (100 * count / self.__inLayer.test_set_size())) return loss / self.__inLayer.test_set_size() def get_desired_output(self, desired): if desired == "A": return Network.__A elif desired == "B": return Network.__B elif desired == "C": return Network.__C elif desired == "D": return Network.__D elif desired == "E": return Network.__E elif desired == "F": return Network.__F elif desired == "G": return Network.__G elif desired == "H": return Network.__H elif desired == "I": return Network.__I elif desired == "J": return Network.__J def prediction(self, output): if output == Network.__A: return "A" elif output == Network.__B: return "B" elif output == Network.__C: return "C" elif output == Network.__D: return "D" elif output == Network.__E: return "E" elif output == Network.__F: return "F" elif output == Network.__G: return "G" elif output == Network.__H: return "H" elif output == Network.__I: return "I" elif output == Network.__J: return "J"
def __init__(self, training_set, test_set, option): self.__inLayer = InputLayer(training_set, test_set, option) self.__hidLayer = HiddenLayer(28 * 28, 30, option) self.__outLayer = OutputLayer(30, 10, option) self.option = option
def __init__(self, rng, input, n_in, n_hidden_sizes, n_out, n_domains, n_domain_hidden_layer_size, a_function=T.tanh): self.rng=rng, self.hidden_layer_sizes = n_hidden_sizes self.input=input, self.n_in=n_in, #get the size of the last hidden layer this is the size of the input for softmax output layer self.n_out=n_hidden_sizes[len(n_hidden_sizes)-1], self.hidden_layers = [] self.domain_hidden_layers = [] self.logistic_layers = [] """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden_sizes: list :param n_hidden: list of number of hidden units at each hidden layer :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function #Create hidden layers for the mlp for i_hidden_layer, i_hidden_layer_size in enumerate(n_hidden_sizes): if i_hidden_layer > 0: n_in = n_hidden_sizes[i_hidden_layer-1] input = self.hidden_layers[i_hidden_layer - 1].output hidden_layer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=i_hidden_layer_size, activation=a_function) self.hidden_layers.append(hidden_layer) for i in range(n_domains): domain_hidden_layer = HiddenLayer(rng=rng, input=hidden_layer.output, n_in=n_hidden_sizes[len(n_hidden_sizes) - 1], n_out=n_domain_hidden_layer_size, activation=a_function) self.domain_hidden_layers.append(domain_hidden_layer) # The logistic regression layer gets as input the hidden units # of the hidden layer for domain_hidden_layer in self.domain_hidden_layers: logRegressionLayer = LogisticRegression( input=domain_hidden_layer.output, n_in=domain_hidden_layer.n_out, n_out=n_out ) self.logistic_layers.append(logRegressionLayer) # L1 norm ; one regularization option is to enforce L1 norm to # be small l1_weight = shared(0) domain_l1_layer = shared(0) for hl in self.hidden_layers: l1_weight += abs(hl.W).sum() for domain_hidden_layer in self.domain_hidden_layers: domain_l1_layer += abs(domain_hidden_layer.W).sum() self.L1 = ( l1_weight + domain_l1_layer ) for logRegressionLayer in self.logistic_layers: l1_weight += abs(logRegressionLayer.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small l2_weight = shared(0) domain_l2_weight = shared(0) for hl in self.hidden_layers: l2_weight += (hl.W ** 2).sum() for domain_hidden_layer in self.domain_hidden_layers: domain_l2_weight += (domain_hidden_layer.W ** 2).sum() self.L2_sqr = ( l2_weight + domain_l2_weight ) for logRegressionLayer in self.logistic_layers: l2_weight += (logRegressionLayer.W ** 2).sum() print(self.L2_sqr) print(type(self.L2_sqr)) # the parameters of the model are the parameters of the two layer it is # made out of hidden_layer_params = [] domain_hidden_layer_params = [] for hl in self.hidden_layers: hidden_layer_params += hl.params for hidden_layer in self.domain_hidden_layers: domain_hidden_layer_params += hidden_layer.params self.params = hidden_layer_params + domain_hidden_layer_params for logRegressionLayer in self.logistic_layers: self.params += logRegressionLayer.params
def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[], n_outs=2,\ rng=None, W=None, b=None): ''' :param input:前两个参数,最好不要用,只有在FIT的时候才需要数据, :param label: :param n_ins: :param hidden_layer_sizes: :param n_outs: :param rng: :param W: :param b: ''' self.x = input self.y = label self.sigmoid_layers = [] #这个是隐层数 self.hidden_n_layers = len( hidden_layer_sizes) # = len(self.rbm_layers) # print("hidden_n_layers=", self.hidden_n_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.hidden_n_layers >= 0 # construct multi-layer # layer_input=None for i in range(self.hidden_n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # construct sigmoid_layer sigmoid_layer = HiddenLayer( # input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, W=W, b=b, activation=tanh) self.sigmoid_layers.append(sigmoid_layer) # 添加输出层 input_size = None if self.hidden_n_layers == 0: input_size = n_ins else: input_size = hidden_layer_sizes[-1] self.log_layer = LogisticRegression( #input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=input_size, n_out=n_outs, W=W, b=b, outputMap= "sigmoid" #"softmax" #"sigmoid" #"tanh" #"identity" #"sigmoid" )
def evaluate_lenet5( learning_rate=0.1, n_epochs=200, dataset='/Users/yigenliang/PycharmProjects/theano/assets/mnist.pkl.gz', nkerns=[20, 50], batch_size=500): """ learning_rate:学习速率,随机梯度前的系数。 n_epochs训练步数,每一步都会遍历所有batch,即所有样本 batch_size,这里设置为500,即每遍历完500个样本,才计算梯度并更新参数 nkerns=[20, 50],每一个LeNetConvPoolLayer卷积核的个数,第一个LeNetConvPoolLayer有 20个卷积核,第二个有50个 """ rng = numpy.random.RandomState(23455) #加载数据 datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # 计算batch的个数 n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size #定义几个变量,index表示batch下标,x表示输入的训练数据,y对应其标签 index = T.lscalar() x = T.matrix('x') y = T.ivector('y') ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' #我们加载进来的batch大小的数据是(batch_size, 28 * 28),但是LeNetConvPoolLayer的输入是四维的,所以要reshape layer0_input = x.reshape((batch_size, 1, 28, 28)) # layer0即第一个LeNetConvPoolLayer层 #输入的单张图片(28,28),经过conv得到(28-5+1 , 28-5+1) = (24, 24), #经过maxpooling得到(24/2, 24/2) = (12, 12) #因为每个batch有batch_size张图,第一个LeNetConvPoolLayer层有nkerns[0]个卷积核, #故layer0输出为(batch_size, nkerns[0], 12, 12) layer0 = ConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) #layer1即第二个LeNetConvPoolLayer层 #输入是layer0的输出,每张特征图为(12,12),经过conv得到(12-5+1, 12-5+1) = (8, 8), #经过maxpooling得到(8/2, 8/2) = (4, 4) #因为每个batch有batch_size张图(特征图),第二个LeNetConvPoolLayer层有nkerns[1]个卷积核 #,故layer1输出为(batch_size, nkerns[1], 4, 4) layer1 = ConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), #输入nkerns[0]张特征图,即layer0输出nkerns[0]张特征图 filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) #前面定义好了两个LeNetConvPoolLayer(layer0和layer1),layer1后面接layer2,这是一个全连接层,相当于MLP里面的隐含层 #故可以用MLP中定义的HiddenLayer来初始化layer2,layer2的输入是二维的(batch_size, num_pixels) , #故要将上层中同一张图经不同卷积核卷积出来的特征图合并为一维向量, #也就是将layer1的输出(batch_size, nkerns[1], 4, 4)flatten为(batch_size, nkerns[1]*4*4)=(500,800),作为layer2的输入。 #(500,800)表示有500个样本,每一行代表一个样本。layer2的输出大小是(batch_size,n_out)=(500,500) layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) #最后一层layer3是分类层,用的是逻辑回归中定义的LogisticRegression, #layer3的输入是layer2的输出(500,500),layer3的输出就是(batch_size,n_out)=(500,10) layer3 = SoftMax(input=layer2.output, n_in=500, n_out=10) #代价函数NLL cost = layer3.negative_log_likelihood(y) # test_model计算测试误差,x、y根据给定的index具体化,然后调用layer3, #layer3又会逐层地调用layer2、layer1、layer0,故test_model其实就是整个CNN结构, #test_model的输入是x、y,输出是layer3.errors(y)的输出,即误差。 test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) #validate_model,验证模型,分析同上。 validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) #下面是train_model,涉及到优化算法即SGD,需要计算梯度、更新参数 #参数集 params = layer3.params + layer2.params + layer1.params + layer0.params #对各个参数的梯度 grads = T.grad(cost, params) #因为参数太多,在updates规则里面一个一个具体地写出来是很麻烦的,所以下面用了一个for..in..,自动生成规则对(param_i, param_i - learning_rate * grad_i) updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] #train_model,代码分析同test_model。train_model里比test_model、validation_model多出updates规则 train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # 开始训练 # ############### print '... training' patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) #这样设置validation_frequency可以保证每一次epoch都会在验证集上测试。 best_validation_loss = numpy.inf #最好的验证集上的loss,最好即最小 best_iter = 0 #最好的迭代次数,以batch为单位。比如best_iter=10000,说明在训练完第10000个batch时,达到best_validation_loss test_score = 0. start_time = time.clock() epoch = 0 done_looping = False #下面就是训练过程了,while循环控制的时步数epoch,一个epoch会遍历所有的batch,即所有的图片。 #for循环是遍历一个个batch,一次一个batch地训练。for循环体里会用train_model(minibatch_index)去训练模型, #train_model里面的updatas会更新各个参数。 #for循环里面会累加训练过的batch数iter,当iter是validation_frequency倍数时则会在验证集上测试, #如果验证集的损失this_validation_loss小于之前最佳的损失best_validation_loss, #则更新best_validation_loss和best_iter,同时在testset上测试。 #如果验证集的损失this_validation_loss小于best_validation_loss*improvement_threshold时则更新patience。 #当达到最大步数n_epoch时,或者patience<iter时,结束训练 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) #cost_ij 没什么用,后面都没有用到,只是为了调用train_model,而train_model有返回值 if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print( 'Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def evaluate_lenet5(topo, learning_rate=0.005, n_epochs=500, datasetName='mnist.pkl.gz', batch_size=4242, stateIn = None, stateOut = None): rng = numpy.random.RandomState(23455) theano_rng = RandomStreams(numpy.random.randint(2 ** 30)) #Original #datasets = load_data(dataset) #n_out = 10 datasets = Preprocessing.load_pictures() # pickle.dump(datasets, open( datasetName, "wb" ) ) #Attention y is wrong # print("Saveing the pickeled data-set") #Loading the pickled images #print("Loading the pickels data-set " + str(datasetName)) #datasets = pickle.load(open(datasetName, "r")) n_out = 6 batch_size = 10 print(" Learning rate " + str(learning_rate)) # Images for face recognition #train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[0] test_set_x, test_set_y = datasets[1] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' print 'Number of Kernels' + str(topo.nkerns) in_2 = 14 #Input in second layer (layer1) # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, topo.ishape[0], topo.ishape[1])) # Using presistent state from last run w0 = w1 = b0 = b1 = wHidden = bHidden = wLogReg = bLogReg = None if stateIn is not None: print(" Loading previous state ...") state = pickle.load(open(stateIn, "r")) convValues = state.convValues w0 = convValues[0][0] b0 = convValues[0][1] w1 = convValues[1][0] b1 = convValues[1][1] hiddenVals = state.hiddenValues wHidden = hiddenVals[0] bHidden = hiddenVals[1] logRegValues = state.logRegValues wLogReg = logRegValues[0] bLogReg = logRegValues[1] print("Hallo Gallo") # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, topo.ishape[0], topo.ishape[0]), filter_shape=(topo.nkerns[0], 1, topo.filter_1, topo.filter_1), poolsize=(topo.pool_1, topo.pool_1), wOld=w0, bOld=b0) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, topo.nkerns[0], topo.in_2, topo.in_2), filter_shape=(topo.nkerns[1], topo.nkerns[0], topo.filter_2, topo.filter_2), poolsize=(topo.pool_2, topo.pool_2), wOld=w1, bOld=b1) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # Evt. some drop out for the fully connected layer # Achtung p=1 entspricht keinem Dropout. # layer2_input = theano_rng.binomial(size=layer2_input.shape, n=1, p=1 - 0.02) * layer2_input # paper_6 no dropout # paper_14 again 0.02 dropout # paper_15 again no dropout layer2 = HiddenLayer(rng, input=layer2_input, n_in=topo.nkerns[1] * topo.hidden_input, n_out=topo.numLogisticInput, activation=T.tanh, Wold = wHidden, bOld = bHidden) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=topo.numLogisticInput, n_out=n_out, Wold = wLogReg, bOld=bLogReg ) # Some regularisation (not for the conv-Kernels) L2_sqr = (layer2.W ** 2).sum() + (layer3.W ** 2).sum() # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) + 0.001 * L2_sqr # paper7 # paper9 back to 0.001 again # paper10 no reg. # paper12 back to 0.001 again # create a function to compute the mistakes that are made by the model test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False epoch_fraction = 0.0 while (epoch < n_epochs) and (not done_looping): # New epoch the training set is disturbed again print(" Starting new training epoch") print(" Manipulating the training set") train_set_x, train_set_y = Preprocessing.giveMeNewTraining() n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size validation_frequency = min(n_train_batches, patience / 2) print(" Compiling new function") learning_rate *= 0.993 #See Paper from Cican train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) print(" Finished compiling the training set") epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): #Alle einmal anfassen iter = (epoch - 1) * n_train_batches + minibatch_index epoch_fraction += 1.0 / float(n_train_batches) if iter % 100 == 0: print 'training @ iter = ', iter, ' epoch_fraction ', epoch_fraction cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) # test it on the test set test_start = time.clock(); test_losses = [test_model(i) for i in xrange(n_test_batches)] train_costs = [train_model(i) for i in xrange(n_test_batches)] dt = time.clock() - test_start print'Testing %i faces in %f msec image / sec %f', batch_size * n_test_batches, dt, dt/(n_test_batches * batch_size) test_score = numpy.mean(test_losses) train_cost = numpy.mean(train_costs) print('%i, %f, %f, %f, %f, 0.424242' % (epoch, this_validation_loss * 100.,test_score * 100., learning_rate, train_cost)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # # test it on the test set # test_losses = [test_model(i) for i in xrange(n_test_batches)] # test_score = numpy.mean(test_losses) # print((' epoch %i, minibatch %i/%i, test error of best ' # 'model %f %%') % # (epoch, minibatch_index + 1, n_train_batches, # test_score * 100.)) # if (this_validation_loss < 0.02): # learning_rate /= 2 # print("Decreased learning rate due to low xval error to " + str(learning_rate)) if patience <= iter: print("--------- Finished Looping ----- earlier ") done_looping = True break end_time = time.clock() print('---------- Optimization complete -------------------------') print('Res: ', str(topo.nkerns)) print('Res: ', learning_rate) print('Res: Best validation score of %f %% obtained at iteration %i,' \ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('Res: The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) # Oliver if not os.path.isdir("conv_images"): os.makedirs("conv_images") os.chdir("conv_images") # d = layer0.W.get_value() #e.g. (20, 1, 5, 5) number of filter, num of incomming filters, dim filter # for i in range(0, numpy.shape(d)[0]): # dd = d[i][0] # rescaled = (255.0 / dd.max() * (dd - dd.min())).astype(numpy.uint8) # img = Image.fromarray(rescaled) # img.save('filter_l0' + str(i) + '.png') # # d = layer1.W.get_value() #e.g. (20, 1, 5, 5) number of filter, num of incomming filters, dim filter # for i in range(0, numpy.shape(d)[0]): # dd = d[i][0] # rescaled = (255.0 / dd.max() * (dd - dd.min())).astype(numpy.uint8) # img = Image.fromarray(rescaled) # img.save('filter_l1' + str(i) + '.png') state = LeNet5State(topology=topo, convValues = [layer0.getParametersAsValues(), layer1.getParametersAsValues()], hiddenValues = layer2.getParametersAsValues(), logRegValues = layer3.getParametersAsValues()) print if stateOut is not None: pickle.dump(state, open(stateOut, 'wb') ) #Attention y is wrong print("Saved the pickeled data-set") return learning_rate