Python OutputLayer示例，OutputLayer Python示例

示例#1

0

显示文件

文件： ConvNet.py 项目： LJY404/Lenet5

class ConvNet(object):
    def __init__(self, input_size=[]):

        cov3_core_sizes = [[3, 5, 5]] * 6
        cov3_core_sizes.extend([[4, 5, 5]] * 9)
        cov3_core_sizes.extend([[6, 5, 5]])

        cov3_mapcombindex = [[0,1,2],[1,2,3],[2,3,4],[3,4,5],[4,5,0],[5,0,1],\
                [0,1,2,3],[1,2,3,4],[2,3,4,5],[3,4,5,0],[4,5,0,1],[5,0,1,2],[0,1,3,4],[1,2,4,5],[0,2,3,5],[0,1,2,3,4,5]]

        self.convlay1 = ConvLayer([[28, 28]] * 6, [[1, 5, 5]] * 6)
        self.poollay2 = PoolingLayer([[14, 14]] * 6, [[2, 2]] * 6)
        self.convlay3 = ConvLayer([[10, 10]] * 16, cov3_core_sizes,
                                  cov3_mapcombindex)
        self.poollay4 = PoolingLayer([[5, 5]] * 16, [[2, 2]] * 16)
        self.convlay5 = ConvLayer([[1, 1]] * 120, [[16, 5, 5]] * 120)
        self.fclayer6 = FCLayer(84, 120)
        self.output7 = OutputLayer(10, 84)

    def forward_p(self, pic, label):

        self.convlay1.feedforward(pic)
        self.poollay2.feedforward(self.convlay1.maps)
        self.convlay3.feedforward(self.poollay2.maps, True)
        self.poollay4.feedforward(self.convlay3.maps)
        self.convlay5.feedforward(self.poollay4.maps)
        self.fclayer6.feedforward(self.convlay5.maps)
        self.output7.softmax(self.fclayer6.maps)

    def back_p(self, pic, label, learn_rate):

        output_error = np.zeros([1, 1, 10])
        output_error[0][0][label] = 1

        #fclayer_error = self.outputlay7.back_propa(self.fclay6.maps, output_error, learn_rate, True)
        fclayer_error = self.output7.back_propa_softmax(
            self.fclayer6.maps, output_error, learn_rate, True)
        conv5_error = self.fclayer6.back_propa(self.convlay5.maps,
                                               fclayer_error, learn_rate, True)
        pool4_error = self.convlay5.back_propa(self.poollay4.maps, conv5_error,
                                               learn_rate, True)
        conv3_error = self.poollay4.back_propa(self.convlay3.maps, pool4_error,
                                               learn_rate, True)
        pool2_error = self.convlay3.back_propa(self.poollay2.maps, conv3_error,
                                               learn_rate, True)
        conv1_error = self.poollay2.back_propa(self.convlay1.maps, pool2_error,
                                               learn_rate, True)
        ilayer_error = self.convlay1.back_propa(pic, conv1_error, learn_rate,
                                                True)

    def print_weights(self):
        pass

示例#2

0

显示文件

文件： ConvNet.py 项目： LJY404/Lenet5

    def __init__(self, input_size=[]):

        cov3_core_sizes = [[3, 5, 5]] * 6
        cov3_core_sizes.extend([[4, 5, 5]] * 9)
        cov3_core_sizes.extend([[6, 5, 5]])

        cov3_mapcombindex = [[0,1,2],[1,2,3],[2,3,4],[3,4,5],[4,5,0],[5,0,1],\
                [0,1,2,3],[1,2,3,4],[2,3,4,5],[3,4,5,0],[4,5,0,1],[5,0,1,2],[0,1,3,4],[1,2,4,5],[0,2,3,5],[0,1,2,3,4,5]]

        self.convlay1 = ConvLayer([[28, 28]] * 6, [[1, 5, 5]] * 6)
        self.poollay2 = PoolingLayer([[14, 14]] * 6, [[2, 2]] * 6)
        self.convlay3 = ConvLayer([[10, 10]] * 16, cov3_core_sizes,
                                  cov3_mapcombindex)
        self.poollay4 = PoolingLayer([[5, 5]] * 16, [[2, 2]] * 16)
        self.convlay5 = ConvLayer([[1, 1]] * 120, [[16, 5, 5]] * 120)
        self.fclayer6 = FCLayer(84, 120)
        self.output7 = OutputLayer(10, 84)

示例#3

0

显示文件

文件： EchoStateNetwork.py 项目： cameronosmith/Echo-State-Network

    def train ( self, train_input, train_output ):

        #holds our states at every time step (used to construct outputs weights)
        states_matrix = np.zeros( (res_size, len(train_input)) )

        #go run the res to collect the states matrix  
        for time_t, data_t in enumerate( train_input ) :
            if time_t % update_info_interval == 0 :
                print("on training data idx: ",time_t," out of ",len(train_input))
            self.resevoir.run( self.weights_in, data_t )
            states_matrix[:,time_t] = self.resevoir.get_hidden_states()[:,0]

        #create our output layer based on our states matrix
        print("creating output layer...")
        self.weights_out = OutputLayer.create ( states_matrix, train_output )
        print("output layer found.")

示例#4

0

显示文件

文件： NeuralNet.py 项目： UIKit0/Replicating-DeepMind

    def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out):
        '''
        Initialize a NeuralNet

        @param input_shape: tuple or list of length 4 , (batch size, num input feature maps,
                             image height, image width)
        @param filter_shapes: list of 2 (for each conv layer) * 4 values (number of filters, num input feature maps,
                              filter height,filter width)
        @param strides: list of size 2, stride values for each hidden layer
        @param n_hidden: int, number of neurons in the all-to-all connected hidden layer
        @param n_out: int, number od nudes in output layer
        '''

        #create theano variables corresponding to input_batch (x) and output of the network (y)
        x = T.ftensor4('x')
        y = T.fmatrix('y')

        #first hidden layer is convolutional:
        self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0])

        #second convolutional hidden layer: the size of input depends on the size of output from first layer
        #it is defined as (num_batches, num_input_feature_maps, height_of_input_maps, width_of_input_maps)
        second_conv_input_shape = [input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size,
                                   self.layer_hidden_conv1.feature_map_size]
        self.layer_hidden_conv2 = ConvolutionalLayer(self.layer_hidden_conv1.output, filter_shapes[1],
                                                     image_shape=second_conv_input_shape, stride=2)

        #output from convolutional layer is 4D, but normal hidden layer expects 2D. Because of all to all connections
        # 3rd hidden layer does not care from which feature map or from which position the input comes from
        flattened_input = self.layer_hidden_conv2.output.flatten(2)

        #create third hidden layer
        self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden)

        #create output layer
        self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out)

        #define the ensemble of parameters of the whole network
        self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \
            + self.layer_hidden3.params + self.layer_output.params

        #discount factor
        self.gamma = 0.95

        #: define regularization terms, for some reason we only take in count the weights, not biases)
        #  linear regularization term, useful for having many weights zero
        self.l1 = abs(self.layer_hidden_conv1.W).sum() \
            + abs(self.layer_hidden_conv2.W).sum() \
            + abs(self.layer_hidden3.W).sum() \
            + abs(self.layer_output.W).sum()

        #: square regularization term, useful for forcing small weights
        self.l2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \
            + (self.layer_hidden_conv2.W ** 2).sum() \
            + (self.layer_hidden3.W ** 2).sum() \
            + (self.layer_output.W ** 2).sum()

        #: define the cost function
        cost = 0.0 * self.l1 + 0.0 * self.l2_sqr + self.layer_output.errors(y)

        #: define gradient calculation
        grads = T.grad(cost, self.params)

        #: Define how much we need to change the parameter values
        learning_rate = 0.0001
        updates = []
        for param_i, gparam_i in zip(self.params, grads):
            updates.append((param_i, param_i - learning_rate * gparam_i))

        #: we need another set of theano variables (other than x and y) to use in train and predict functions
        temp_x = T.ftensor4('temp_x')
        temp_y = T.fmatrix('temp_y')

        #: define the training operation as applying the updates calculated given temp_x and temp_y
        self.train_model = theano.function(inputs=[temp_x, temp_y],
                                           outputs=[cost , self.params[0][0]],
                                           updates=updates,
                                           givens={
                                               x: temp_x,
                                               y: temp_y})

        self.predict_rewards = theano.function(
            inputs=[temp_x],
            outputs=[self.layer_output.output],
            givens={
                x: temp_x
            })


        self.predict_rewards_and_cost = theano.function(
            inputs=[temp_x, temp_y],
            outputs=[self.layer_output.output, cost],
            givens={
                x: temp_x,
                y: temp_y
            })

示例#5

0

显示文件

文件： NeuralNet.py 项目： UIKit0/Replicating-DeepMind

class NeuralNet:

    def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out):
        '''
        Initialize a NeuralNet

        @param input_shape: tuple or list of length 4 , (batch size, num input feature maps,
                             image height, image width)
        @param filter_shapes: list of 2 (for each conv layer) * 4 values (number of filters, num input feature maps,
                              filter height,filter width)
        @param strides: list of size 2, stride values for each hidden layer
        @param n_hidden: int, number of neurons in the all-to-all connected hidden layer
        @param n_out: int, number od nudes in output layer
        '''

        #create theano variables corresponding to input_batch (x) and output of the network (y)
        x = T.ftensor4('x')
        y = T.fmatrix('y')

        #first hidden layer is convolutional:
        self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0])

        #second convolutional hidden layer: the size of input depends on the size of output from first layer
        #it is defined as (num_batches, num_input_feature_maps, height_of_input_maps, width_of_input_maps)
        second_conv_input_shape = [input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size,
                                   self.layer_hidden_conv1.feature_map_size]
        self.layer_hidden_conv2 = ConvolutionalLayer(self.layer_hidden_conv1.output, filter_shapes[1],
                                                     image_shape=second_conv_input_shape, stride=2)

        #output from convolutional layer is 4D, but normal hidden layer expects 2D. Because of all to all connections
        # 3rd hidden layer does not care from which feature map or from which position the input comes from
        flattened_input = self.layer_hidden_conv2.output.flatten(2)

        #create third hidden layer
        self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden)

        #create output layer
        self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out)

        #define the ensemble of parameters of the whole network
        self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \
            + self.layer_hidden3.params + self.layer_output.params

        #discount factor
        self.gamma = 0.95

        #: define regularization terms, for some reason we only take in count the weights, not biases)
        #  linear regularization term, useful for having many weights zero
        self.l1 = abs(self.layer_hidden_conv1.W).sum() \
            + abs(self.layer_hidden_conv2.W).sum() \
            + abs(self.layer_hidden3.W).sum() \
            + abs(self.layer_output.W).sum()

        #: square regularization term, useful for forcing small weights
        self.l2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \
            + (self.layer_hidden_conv2.W ** 2).sum() \
            + (self.layer_hidden3.W ** 2).sum() \
            + (self.layer_output.W ** 2).sum()

        #: define the cost function
        cost = 0.0 * self.l1 + 0.0 * self.l2_sqr + self.layer_output.errors(y)

        #: define gradient calculation
        grads = T.grad(cost, self.params)

        #: Define how much we need to change the parameter values
        learning_rate = 0.0001
        updates = []
        for param_i, gparam_i in zip(self.params, grads):
            updates.append((param_i, param_i - learning_rate * gparam_i))

        #: we need another set of theano variables (other than x and y) to use in train and predict functions
        temp_x = T.ftensor4('temp_x')
        temp_y = T.fmatrix('temp_y')

        #: define the training operation as applying the updates calculated given temp_x and temp_y
        self.train_model = theano.function(inputs=[temp_x, temp_y],
                                           outputs=[cost , self.params[0][0]],
                                           updates=updates,
                                           givens={
                                               x: temp_x,
                                               y: temp_y})

        self.predict_rewards = theano.function(
            inputs=[temp_x],
            outputs=[self.layer_output.output],
            givens={
                x: temp_x
            })


        self.predict_rewards_and_cost = theano.function(
            inputs=[temp_x, temp_y],
            outputs=[self.layer_output.output, cost],
            givens={
                x: temp_x,
                y: temp_y
            })

    def train(self, minibatch):
        """
        Train function that transforms (state,action,reward,state) into (input, expected_output) for neural net
        and trains the network
        @param minibatch: array of dictionaries, each dictionary contains
        one transition (prestate,action,reward,poststate)
        """

        #: we have a new, better estimation for the Q-val of the action we chose, it is the sum of the reward
        #  received on transition and the maximum of future rewards. Q-s for other actions remain the same.
        for i, transition in enumerate(minibatch):
            estimated_Q = self.predict_rewards([transition['prestate']])[0][0]

            #: line prints out the output of the network, uncomment it if you want to verify that different
            #  inputs give different outputs (c.f. wiki Basic tests/Issue #10)
            #print "estimated q", estimated_Q

            estimated_Q[transition['action']] = transition['reward'] + self.gamma \
                                                * np.max(self.predict_rewards([transition['poststate']]))
            #: knowing what estimated_Q looks like, we can train the model
            cost, first_filter = self.train_model([transition['prestate']], [estimated_Q])

            #: next line prints out the weight values in the first line of the first 8x8 filter in first conv layer,
            #  uncomment it if you want to make sure the weight values do indeed change as the result of learning
            #  (c.f. wiki Basic tests/Issue #7)
            #print "first line of filter applied to first img of first layer is:  \n", first_filter[0][0]

    def predict_best_action(self, state):
        """
        Predict_best_action returns the action with the highest Q-value
        @param state: 4D array, input (game state) for which we want to know the best action
        """
        predicted_values_for_actions = self.predict_rewards(state)[0][0]
        #print "predicted best action", predicted_values_for_actions
        return  np.argmax(predicted_values_for_actions)

示例#6

0

显示文件

文件： ConditionalUnetAudioSeparator.py 项目： YashNita/Google_Cloud_TPU_USE

    def get_output(self,
                   input,
                   z,
                   training=None,
                   return_spectrogram=False,
                   reuse=True):
        '''
        Creates symbolic computation graph of the U-Net for a given input batch
        :param input: Input batch of mixtures, 3D tensor [batch_size, num_samples, num_channels]
        :param reuse: Whether to create new parameter variables or reuse existing ones
        :return: U-Net output: List of source estimates. Each item is a 3D tensor [batch_size, num_out_samples, num_channels]
        '''
        with tf.variable_scope("separator", reuse=reuse):
            enc_outputs = list()
            current_layer = input

            # Down-convolution: Repeat strided conv
            for i in range(self.num_layers):
                current_layer = tf.layers.conv1d(
                    current_layer,
                    self.num_initial_filters + (self.num_initial_filters * i),
                    self.filter_size,
                    strides=1,
                    activation=LeakyReLU,
                    padding=self.padding)  # out = in - filter + 1
                enc_outputs.append(current_layer)
                current_layer = current_layer[:, ::
                                              2, :]  # Decimate by factor of 2 # out = (in-1)/2 + 1

            current_layer = tf.layers.conv1d(
                current_layer,
                self.num_initial_filters +
                (self.num_initial_filters * self.num_layers),
                self.filter_size,
                activation=LeakyReLU,
                padding=self.padding
            )  # One more conv here since we need to compute features after last decimation
            # Feature map here shall be X along one dimension

            # Make conditioning on the bottleneck
            # z --> [batch_size, num_sources] -> [batch_size, timestamps, n_filters, num_sources]
            z = tf.tile(z, [current_layer.shape[1], current_layer.shape[2]])
            z = tf.reshape(
                z, (current_layer.shape.as_list() + [self.num_sources]))

            # Apply multiplicative conditioning
            current_layer = tf.expand_dims(current_layer, axis=-1)
            current_layer = tf.multiply(z, current_layer)
            current_layer = tf.reshape(
                current_layer,
                (current_layer.shape[0], current_layer.shape[1], -1))

            # Upconvolution
            for i in range(self.num_layers):
                #UPSAMPLING
                current_layer = tf.expand_dims(current_layer, axis=1)
                if self.upsampling == 'learned':
                    # Learned interpolation between two neighbouring time positions by using a convolution filter of width 2, and inserting the responses in the middle of the two respective inputs
                    current_layer = Utils.learned_interpolation_layer(
                        current_layer, self.padding, i)
                else:
                    if self.context:
                        current_layer = tf.image.resize_bilinear(
                            current_layer, [
                                1,
                                current_layer.get_shape().as_list()[2] * 2 - 1
                            ],
                            align_corners=True)
                        current_layer = tf.cast(current_layer, tf.bfloat16)
                    else:
                        current_layer = tf.image.resize_bilinear(
                            current_layer,
                            [1, current_layer.get_shape().as_list()[2] * 2
                             ])  # out = in + in - 1
                #current_layer = tf.layers.conv2d_transpose(current_layer, self.num_initial_filters + (16 * (self.num_layers-i-1)), [1, 15], strides=[1, 2], activation=LeakyReLU, padding='same') # output = input * stride + filter - stride
                current_layer = tf.squeeze(current_layer, axis=1)

                assert (
                    enc_outputs[-i - 1].get_shape().as_list()[1]
                    == current_layer.get_shape().as_list()[1] or self.context
                )  #No cropping should be necessary unless we are using context
                current_layer = Utils.crop_and_concat(enc_outputs[-i - 1],
                                                      current_layer,
                                                      match_feature_dim=False)
                current_layer = tf.layers.conv1d(
                    current_layer,
                    self.num_initial_filters + (self.num_initial_filters *
                                                (self.num_layers - i - 1)),
                    self.merge_filter_size,
                    activation=LeakyReLU,
                    padding=self.padding)  # out = in - filter + 1

            current_layer = Utils.crop_and_concat(input,
                                                  current_layer,
                                                  match_feature_dim=False)
            # Output layer
            if self.output_type == "direct":
                return OutputLayer.independent_outputs(current_layer,
                                                       self.num_sources,
                                                       self.num_channels)
            elif self.output_type == "difference":
                cropped_input = Utils.crop(input,
                                           current_layer.get_shape().as_list(),
                                           match_feature_dim=False)
                return OutputLayer.difference_output(cropped_input,
                                                     current_layer,
                                                     self.num_sources,
                                                     self.num_channels)
            else:
                raise NotImplementedError

示例#7

0

显示文件

文件： NeuralNet.py 项目： coventry/Replicating-DeepMind

    def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out):
        '''
        Initialize a NeuralNet

        @param input_shape: tuple or list of length 4 , (batch size, num input feature maps,
                             image height, image width)
        @param filter_shapes: list of 2 (for each conv layer) * 4 values (number of filters, num input feature maps,
                              filter height,filter width)
        @param strides: list of size 2, stride values for each hidden layer
        @param n_hidden: int, number of neurons in the all-to-all connected hidden layer
        @param n_out: int, number od nudes in output layer
        '''

        #create theano variables corresponding to input_batch (x) and output of the network (y)
        x = T.ftensor4('x')
        y = T.fmatrix('y')

        #first hidden layer is convolutional:
        self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0],
                                                     input_shape, strides[0])

        #second convolutional hidden layer: the size of input depends on the size of output from first layer
        #it is defined as (num_batches, num_input_feature_maps, height_of_input_maps, width_of_input_maps)
        second_conv_input_shape = [
            input_shape[0], filter_shapes[0][0],
            self.layer_hidden_conv1.feature_map_size,
            self.layer_hidden_conv1.feature_map_size
        ]
        self.layer_hidden_conv2 = ConvolutionalLayer(
            self.layer_hidden_conv1.output,
            filter_shapes[1],
            image_shape=second_conv_input_shape,
            stride=2)  # Drops use of strides

        #output from convolutional layer is 4D, but normal hidden layer expects 2D. Because of all to all connections
        # 3rd hidden layer does not care from which feature map or from which position the input comes from
        flattened_input = self.layer_hidden_conv2.output.flatten(2)

        #create third hidden layer
        self.layer_hidden3 = HiddenLayer(flattened_input,
                                         self.layer_hidden_conv2.fan_out,
                                         n_hidden)

        #create output layer
        self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden,
                                        n_out)

        #define the ensemble of parameters of the whole network
        self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \
            + self.layer_hidden3.params + self.layer_output.params

        #discount factor
        self.gamma = 0.95

        #: define regularization terms, for some reason we only take in count the weights, not biases)
        #  linear regularization term, useful for having many weights zero
        self.l1 = abs(self.layer_hidden_conv1.W).sum() \
            + abs(self.layer_hidden_conv2.W).sum() \
            + abs(self.layer_hidden3.W).sum() \
            + abs(self.layer_output.W).sum()

        #: square regularization term, useful for forcing small weights
        self.l2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \
            + (self.layer_hidden_conv2.W ** 2).sum() \
            + (self.layer_hidden3.W ** 2).sum() \
            + (self.layer_output.W ** 2).sum()

        #: define the cost function
        self.cost = 0.0 * self.l1 + 0.0 * self.l2_sqr + self.layer_output.errors(
            y)
        self.cost_function = theano.function([x, y], [self.cost])

        #: define gradient calculation
        self.grads = T.grad(self.cost, self.params)

        #: Define how much we need to change the parameter values
        self.learning_rate = T.scalar('lr')
        self.updates = []
        for param_i, gparam_i in zip(self.params, self.grads):
            self.updates.append(
                (param_i, param_i - self.learning_rate * gparam_i))
        self.x = x
        self.y = y

        #: we need another set of theano variables (other than x and y) to use in train and predict functions
        temp_x = T.ftensor4('temp_x')
        temp_y = T.fmatrix('temp_y')

        #: define the training operation as applying the updates calculated given temp_x and temp_y
        self.train_model = theano.function(inputs=[
            temp_x, temp_y,
            theano.Param(self.learning_rate, default=0.00001)
        ],
                                           outputs=[self.cost],
                                           updates=self.updates,
                                           givens={
                                               x: temp_x,
                                               y: temp_y
                                           },
                                           name='train_model')

        self.cost_clone = theano.clone(self.cost, replace=self.updates)
        self.line_function = theano.function([x, y, self.learning_rate],
                                             [self.cost_clone])

        self.predict_rewards = theano.function(
            inputs=[temp_x],
            outputs=[self.layer_output.output],
            givens={x: temp_x},
            name='predict_rewards')

        self.predict_rewards_and_cost = theano.function(
            inputs=[temp_x, temp_y],
            outputs=[self.layer_output.output, self.cost],
            givens={
                x: temp_x,
                y: temp_y
            },
            name='predict_rewards_and_cost')

示例#8

0

显示文件

文件： NeuralNet.py 项目： coventry/Replicating-DeepMind

class NeuralNet:
    def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out):
        '''
        Initialize a NeuralNet

        @param input_shape: tuple or list of length 4 , (batch size, num input feature maps,
                             image height, image width)
        @param filter_shapes: list of 2 (for each conv layer) * 4 values (number of filters, num input feature maps,
                              filter height,filter width)
        @param strides: list of size 2, stride values for each hidden layer
        @param n_hidden: int, number of neurons in the all-to-all connected hidden layer
        @param n_out: int, number od nudes in output layer
        '''

        #create theano variables corresponding to input_batch (x) and output of the network (y)
        x = T.ftensor4('x')
        y = T.fmatrix('y')

        #first hidden layer is convolutional:
        self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0],
                                                     input_shape, strides[0])

        #second convolutional hidden layer: the size of input depends on the size of output from first layer
        #it is defined as (num_batches, num_input_feature_maps, height_of_input_maps, width_of_input_maps)
        second_conv_input_shape = [
            input_shape[0], filter_shapes[0][0],
            self.layer_hidden_conv1.feature_map_size,
            self.layer_hidden_conv1.feature_map_size
        ]
        self.layer_hidden_conv2 = ConvolutionalLayer(
            self.layer_hidden_conv1.output,
            filter_shapes[1],
            image_shape=second_conv_input_shape,
            stride=2)  # Drops use of strides

        #output from convolutional layer is 4D, but normal hidden layer expects 2D. Because of all to all connections
        # 3rd hidden layer does not care from which feature map or from which position the input comes from
        flattened_input = self.layer_hidden_conv2.output.flatten(2)

        #create third hidden layer
        self.layer_hidden3 = HiddenLayer(flattened_input,
                                         self.layer_hidden_conv2.fan_out,
                                         n_hidden)

        #create output layer
        self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden,
                                        n_out)

        #define the ensemble of parameters of the whole network
        self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \
            + self.layer_hidden3.params + self.layer_output.params

        #discount factor
        self.gamma = 0.95

        #: define regularization terms, for some reason we only take in count the weights, not biases)
        #  linear regularization term, useful for having many weights zero
        self.l1 = abs(self.layer_hidden_conv1.W).sum() \
            + abs(self.layer_hidden_conv2.W).sum() \
            + abs(self.layer_hidden3.W).sum() \
            + abs(self.layer_output.W).sum()

        #: square regularization term, useful for forcing small weights
        self.l2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \
            + (self.layer_hidden_conv2.W ** 2).sum() \
            + (self.layer_hidden3.W ** 2).sum() \
            + (self.layer_output.W ** 2).sum()

        #: define the cost function
        self.cost = 0.0 * self.l1 + 0.0 * self.l2_sqr + self.layer_output.errors(
            y)
        self.cost_function = theano.function([x, y], [self.cost])

        #: define gradient calculation
        self.grads = T.grad(self.cost, self.params)

        #: Define how much we need to change the parameter values
        self.learning_rate = T.scalar('lr')
        self.updates = []
        for param_i, gparam_i in zip(self.params, self.grads):
            self.updates.append(
                (param_i, param_i - self.learning_rate * gparam_i))
        self.x = x
        self.y = y

        #: we need another set of theano variables (other than x and y) to use in train and predict functions
        temp_x = T.ftensor4('temp_x')
        temp_y = T.fmatrix('temp_y')

        #: define the training operation as applying the updates calculated given temp_x and temp_y
        self.train_model = theano.function(inputs=[
            temp_x, temp_y,
            theano.Param(self.learning_rate, default=0.00001)
        ],
                                           outputs=[self.cost],
                                           updates=self.updates,
                                           givens={
                                               x: temp_x,
                                               y: temp_y
                                           },
                                           name='train_model')

        self.cost_clone = theano.clone(self.cost, replace=self.updates)
        self.line_function = theano.function([x, y, self.learning_rate],
                                             [self.cost_clone])

        self.predict_rewards = theano.function(
            inputs=[temp_x],
            outputs=[self.layer_output.output],
            givens={x: temp_x},
            name='predict_rewards')

        self.predict_rewards_and_cost = theano.function(
            inputs=[temp_x, temp_y],
            outputs=[self.layer_output.output, self.cost],
            givens={
                x: temp_x,
                y: temp_y
            },
            name='predict_rewards_and_cost')

    actual_learning_rate = 1e-5
    learning_rates = []

    def optimal_learning_rate(self, prestates, new_estimated_Q, lr):
        objective = lambda lr: self.line_function(np.array(
            prestates), new_estimated_Q, float(lr))[0]
        res = scipy.optimize.minimize(objective,
                                      0,
                                      method='Nelder-Mead',
                                      options={'xtol': 1e-1})
        print 'optimization result'
        print res
        self.learning_rates.append(max(1e-6, float(res.x)))

    def train(self, minibatch):
        """
        Train function that transforms (state,action,reward,state) into (input, expected_output) for neural net
        and trains the network
        @param minibatch: array of dictionaries, each dictionary contains
        one transition (prestate,action,reward,poststate)
        """
        prestates = [t['prestate'] for t in minibatch]
        initial_estimated_Q = self.predict_rewards(prestates)[0]
        new_estimated_Q = initial_estimated_Q.copy()
        poststates = [t['poststate'] for t in minibatch]
        post_eQ = [
            self.predict_rewards([s])[0] if s is not None else None
            for s in poststates
        ]
        actions = [t['action'] for t in minibatch]
        game_end_ps = [t['game_end'] for t in minibatch]
        rewards = np.array([t['reward'] for t in minibatch])
        for row, (peQ, action, reward, game_end) in enumerate(
                zip(post_eQ, actions, rewards, game_end_ps)):
            new_estimated_Q[row,
                            action] = reward + (0 if game_end else self.gamma *
                                                np.max(peQ))
        initial_cost = self.cost_function(prestates, new_estimated_Q)
        optimal_learning_rate = lambda: self.optimal_learning_rate(
            prestates, new_estimated_Q, self.learning_rates[-1]
            if self.learning_rates else self.actual_learning_rate)
        if (len(self.learning_rates) % 50) == 0:
            print 'computing optimal learning rate'
            optimal_learning_rate()
        else:
            self.learning_rates.append(self.learning_rates[-1])
        self.train_model(np.array(prestates), new_estimated_Q,
                         self.learning_rates[-1])
        final_cost = self.cost_function(prestates, new_estimated_Q)
        final_estimated_Q = self.predict_rewards(prestates)[0]
        print 'initial_cost', initial_cost, 'final_cost', final_cost, 'foo baz'
        print 'current rewards', (final_estimated_Q -
                                  final_estimated_Q.min(axis=0)).mean(axis=0)
        print 'current rewards absolute'
        for r, a, s in sorted(
                zip(rewards, actions, map(list, final_estimated_Q))):
            print r, a, s
        if final_cost > initial_cost:
            print 'overstepped; computing current optimal learning rate'
            optimal_learning_rate()
        if os.path.exists('/var/tmp/stop'):
            import pdb
            pdb.set_trace()

    def predict_best_action(self, state):
        """
        Predict_best_action returns the action with the highest Q-value
        @param state: 4D array, input (game state) for which we want to know the best action
        """
        predicted_values_for_actions = self.predict_rewards(state)[0][0]
        return np.argmax(predicted_values_for_actions)

示例#9

0

显示文件

文件： neuralnet_21_05_2014.py 项目： Dokotta/Replicating-DeepM

    def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out):

        x = T.dtensor4('x')
        y = T.dmatrix('y')

        self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0],
                                                     input_shape, strides[0])

        second_conv_input_shape = [
            input_shape[0], filter_shapes[0][0],
            self.layer_hidden_conv1.feature_map_size,
            self.layer_hidden_conv1.feature_map_size
        ]
        self.layer_hidden_conv2 = ConvolutionalLayer(
            self.layer_hidden_conv1.output,
            filter_shapes[1],
            image_shape=second_conv_input_shape,
            stride=2)

        flattened_input = self.layer_hidden_conv2.output.flatten(2)

        self.layer_hidden3 = HiddenLayer(flattened_input,
                                         self.layer_hidden_conv2.fan_out,
                                         n_hidden)
        self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden,
                                        n_out)
        self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \
                    + self.layer_hidden3.params + self.layer_output.params

        self.gamma = 0.95

        self.L1 = abs(self.layer_hidden_conv1.W).sum() \
                + abs(self.layer_hidden_conv2.W).sum() \
                + abs(self.layer_hidden3.W).sum()  \
                + abs(self.layer_output.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \
                    + (self.layer_hidden_conv2.W ** 2).sum() \
                    + (self.layer_hidden3.W ** 2).sum() \
                    + (self.layer_output.W ** 2).sum()

        cost = 0.0 * self.L1 + 0.0 * self.L2_sqr + self.layer_output.errors(y)

        grads = T.grad(cost, self.params)

        # Define how much we need to change the parameter values
        learning_rate = 0.01
        updates = []
        for param_i, gparam_i in zip(self.params, grads):
            updates.append((param_i, param_i - learning_rate * gparam_i))

        temp1 = T.dtensor4('temp1')
        temp2 = T.dmatrix('temp2')

        self.train_model = theano.function(inputs=[temp1, temp2],
                                           outputs=[cost],
                                           updates=updates,
                                           givens={
                                               x: temp1,
                                               y: temp2
                                           })

        #self.shared_q = theano.shared(np.zeros((32,4)))
        #self.shared_s = theano.shared(np.zeros((32,4,84,84)))
        #self.train_model_shared = theano.function(inputs=[], outputs=[cost],
        #    updates=updates,
        #    givens={
        #        x: self.shared_s,
        #        y: self.shared_q
        #    })

        self.predict_rewards = theano.function(
            inputs=[temp1],
            outputs=[self.layer_output.output],
            givens={x: temp1})

        self.predict_rewards_and_cost = theano.function(
            inputs=[temp1, temp2],
            outputs=[self.layer_output.output, cost],
            givens={
                x: temp1,
                y: temp2
            })

示例#10

0

显示文件

文件： neuralnet_21_05_2014.py 项目： Dokotta/Replicating-DeepM

class NeuralNet:
    def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out):

        x = T.dtensor4('x')
        y = T.dmatrix('y')

        self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0],
                                                     input_shape, strides[0])

        second_conv_input_shape = [
            input_shape[0], filter_shapes[0][0],
            self.layer_hidden_conv1.feature_map_size,
            self.layer_hidden_conv1.feature_map_size
        ]
        self.layer_hidden_conv2 = ConvolutionalLayer(
            self.layer_hidden_conv1.output,
            filter_shapes[1],
            image_shape=second_conv_input_shape,
            stride=2)

        flattened_input = self.layer_hidden_conv2.output.flatten(2)

        self.layer_hidden3 = HiddenLayer(flattened_input,
                                         self.layer_hidden_conv2.fan_out,
                                         n_hidden)
        self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden,
                                        n_out)
        self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \
                    + self.layer_hidden3.params + self.layer_output.params

        self.gamma = 0.95

        self.L1 = abs(self.layer_hidden_conv1.W).sum() \
                + abs(self.layer_hidden_conv2.W).sum() \
                + abs(self.layer_hidden3.W).sum()  \
                + abs(self.layer_output.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \
                    + (self.layer_hidden_conv2.W ** 2).sum() \
                    + (self.layer_hidden3.W ** 2).sum() \
                    + (self.layer_output.W ** 2).sum()

        cost = 0.0 * self.L1 + 0.0 * self.L2_sqr + self.layer_output.errors(y)

        grads = T.grad(cost, self.params)

        # Define how much we need to change the parameter values
        learning_rate = 0.01
        updates = []
        for param_i, gparam_i in zip(self.params, grads):
            updates.append((param_i, param_i - learning_rate * gparam_i))

        temp1 = T.dtensor4('temp1')
        temp2 = T.dmatrix('temp2')

        self.train_model = theano.function(inputs=[temp1, temp2],
                                           outputs=[cost],
                                           updates=updates,
                                           givens={
                                               x: temp1,
                                               y: temp2
                                           })

        #self.shared_q = theano.shared(np.zeros((32,4)))
        #self.shared_s = theano.shared(np.zeros((32,4,84,84)))
        #self.train_model_shared = theano.function(inputs=[], outputs=[cost],
        #    updates=updates,
        #    givens={
        #        x: self.shared_s,
        #        y: self.shared_q
        #    })

        self.predict_rewards = theano.function(
            inputs=[temp1],
            outputs=[self.layer_output.output],
            givens={x: temp1})

        self.predict_rewards_and_cost = theano.function(
            inputs=[temp1, temp2],
            outputs=[self.layer_output.output, cost],
            givens={
                x: temp1,
                y: temp2
            })

    def train(self, minibatch):
        states = []
        expected_Qs = []
        states1 = [element['prestate'] for element in minibatch]
        states2 = [element['poststate'] for element in minibatch]
        current_predicted_rewards = self.predict_rewards(states1)[0]

        predicted_future_rewards = self.predict_rewards(states2)[0]
        for i, transition in enumerate(minibatch):
            rewards = current_predicted_rewards[i]
            rewards[transition['action']] = transition[
                'reward'] + self.gamma * np.max(predicted_future_rewards[i])
            states.append(transition['prestate'])
            expected_Qs.append(rewards)

        #self.shared_s = theano.shared(states)
        #self.shared_q = theano.shared(expected_Qs)
        #print "expected", expected_Qs[0]
        #print "expected", self.shared_q.eval()[0]
        #print self.predict_rewards_and_cost(self.shared_s.eval(),self.shared_q.eval())[0][0]

        #return self.train_model_shared()
        self.train_model(states, expected_Qs)

示例#11

0

显示文件

文件： neuralnet_21_05_2014.py 项目： 2089764/Replicating-DeepMind

    def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out):

        x = T.dtensor4('x')
        y = T.dmatrix('y')

        self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0])


        second_conv_input_shape=[input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size]
        self.layer_hidden_conv2 = ConvolutionalLayer(self.layer_hidden_conv1.output, filter_shapes[1],
                                                     image_shape=second_conv_input_shape, stride=2)

        flattened_input=self.layer_hidden_conv2.output.flatten(2)

        self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden)
        self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out)
        self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \
                    + self.layer_hidden3.params + self.layer_output.params

        self.gamma = 0.95

        self.L1 = abs(self.layer_hidden_conv1.W).sum() \
                + abs(self.layer_hidden_conv2.W).sum() \
                + abs(self.layer_hidden3.W).sum()  \
                + abs(self.layer_output.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \
                    + (self.layer_hidden_conv2.W ** 2).sum() \
                    + (self.layer_hidden3.W ** 2).sum() \
                    + (self.layer_output.W ** 2).sum()



        cost = 0.0*self.L1 + 0.0*self.L2_sqr + self.layer_output.errors(y)

        grads = T.grad(cost, self.params)

         # Define how much we need to change the parameter values
        learning_rate = 0.01
        updates = []
        for param_i, gparam_i in zip(self.params, grads):
            updates.append((param_i, param_i - learning_rate * gparam_i))

        temp1 = T.dtensor4('temp1')
        temp2 = T.dmatrix('temp2')


        self.train_model = theano.function(inputs=[temp1, temp2], outputs=[cost],
            updates=updates,
            givens={
                x: temp1,
                y: temp2})

        #self.shared_q = theano.shared(np.zeros((32,4)))
        #self.shared_s = theano.shared(np.zeros((32,4,84,84)))
        #self.train_model_shared = theano.function(inputs=[], outputs=[cost],
        #    updates=updates,
        #    givens={
        #        x: self.shared_s,
        #        y: self.shared_q
        #    })


        self.predict_rewards = theano.function(
            inputs=[temp1],
            outputs=[self.layer_output.output],
            givens={
                x: temp1
            })

        self.predict_rewards_and_cost = theano.function(
            inputs=[temp1, temp2],
            outputs=[self.layer_output.output, cost],
            givens={
                x: temp1,
                y: temp2
            })

示例#12

0

显示文件

文件： neuralnet_21_05_2014.py 项目： 2089764/Replicating-DeepMind

class NeuralNet:

    def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out):

        x = T.dtensor4('x')
        y = T.dmatrix('y')

        self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0])


        second_conv_input_shape=[input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size]
        self.layer_hidden_conv2 = ConvolutionalLayer(self.layer_hidden_conv1.output, filter_shapes[1],
                                                     image_shape=second_conv_input_shape, stride=2)

        flattened_input=self.layer_hidden_conv2.output.flatten(2)

        self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden)
        self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out)
        self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \
                    + self.layer_hidden3.params + self.layer_output.params

        self.gamma = 0.95

        self.L1 = abs(self.layer_hidden_conv1.W).sum() \
                + abs(self.layer_hidden_conv2.W).sum() \
                + abs(self.layer_hidden3.W).sum()  \
                + abs(self.layer_output.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \
                    + (self.layer_hidden_conv2.W ** 2).sum() \
                    + (self.layer_hidden3.W ** 2).sum() \
                    + (self.layer_output.W ** 2).sum()



        cost = 0.0*self.L1 + 0.0*self.L2_sqr + self.layer_output.errors(y)

        grads = T.grad(cost, self.params)

         # Define how much we need to change the parameter values
        learning_rate = 0.01
        updates = []
        for param_i, gparam_i in zip(self.params, grads):
            updates.append((param_i, param_i - learning_rate * gparam_i))

        temp1 = T.dtensor4('temp1')
        temp2 = T.dmatrix('temp2')


        self.train_model = theano.function(inputs=[temp1, temp2], outputs=[cost],
            updates=updates,
            givens={
                x: temp1,
                y: temp2})

        #self.shared_q = theano.shared(np.zeros((32,4)))
        #self.shared_s = theano.shared(np.zeros((32,4,84,84)))
        #self.train_model_shared = theano.function(inputs=[], outputs=[cost],
        #    updates=updates,
        #    givens={
        #        x: self.shared_s,
        #        y: self.shared_q
        #    })


        self.predict_rewards = theano.function(
            inputs=[temp1],
            outputs=[self.layer_output.output],
            givens={
                x: temp1
            })

        self.predict_rewards_and_cost = theano.function(
            inputs=[temp1, temp2],
            outputs=[self.layer_output.output, cost],
            givens={
                x: temp1,
                y: temp2
            })



    def train(self, minibatch):
        states = []
        expected_Qs = []
        states1 = [element['prestate'] for element in minibatch]
        states2 = [element['poststate'] for element in minibatch]
        current_predicted_rewards = self.predict_rewards(states1)[0]

        predicted_future_rewards = self.predict_rewards(states2)[0]
        for i, transition in enumerate(minibatch):
            rewards = current_predicted_rewards[i]
            rewards[transition['action']] = transition['reward'] + self.gamma*np.max(predicted_future_rewards[i])
            states.append(transition['prestate'])
            expected_Qs.append(rewards)

        #self.shared_s = theano.shared(states)
        #self.shared_q = theano.shared(expected_Qs)
        #print "expected", expected_Qs[0]
        #print "expected", self.shared_q.eval()[0]
        #print self.predict_rewards_and_cost(self.shared_s.eval(),self.shared_q.eval())[0][0]

        #return self.train_model_shared()
        self.train_model(states, expected_Qs)

示例#13

0

显示文件

 def create_output_layer(self):
     output_layer = OutputLayer.OutputLayer(self.class_num)
     return output_layer

示例#14

0

显示文件

文件： NeuralNet.py 项目： huiwq1990/Replicating-DeepMind

class NeuralNet:
    def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out):
        '''
        Initialize a NeuralNet

        @param input_shape: tuple or list of length 4 , (batch size, num input feature maps,
                             image height, image width)
        @param filter_shapes: list of 2 (for each conv layer) * 4 values (number of filters, num input feature maps,
                              filter height,filter width)
        @param strides: list of size 2, stride values for each hidden layer
        @param n_hidden: int, number of neurons in the all-to-all connected hidden layer
        @param n_out: int, number od nudes in output layer
        '''

        #create theano variables corresponding to input_batch (x) and output of the network (y)
        x = T.ftensor4('x')
        y = T.fmatrix('y')

        #first hidden layer is convolutional:
        self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0],
                                                     input_shape, strides[0])

        #second convolutional hidden layer: the size of input depends on the size of output from first layer
        #it is defined as (num_batches, num_input_feature_maps, height_of_input_maps, width_of_input_maps)
        second_conv_input_shape = [
            input_shape[0], filter_shapes[0][0],
            self.layer_hidden_conv1.feature_map_size,
            self.layer_hidden_conv1.feature_map_size
        ]
        self.layer_hidden_conv2 = ConvolutionalLayer(
            self.layer_hidden_conv1.output,
            filter_shapes[1],
            image_shape=second_conv_input_shape,
            stride=2)

        #output from convolutional layer is 4D, but normal hidden layer expects 2D. Because of all to all connections
        # 3rd hidden layer does not care from which feature map or from which position the input comes from
        flattened_input = self.layer_hidden_conv2.output.flatten(2)

        #create third hidden layer
        self.layer_hidden3 = HiddenLayer(flattened_input,
                                         self.layer_hidden_conv2.fan_out,
                                         n_hidden)

        #create output layer
        self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden,
                                        n_out)

        #define the ensemble of parameters of the whole network
        self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \
            + self.layer_hidden3.params + self.layer_output.params

        #discount factor
        self.gamma = 0.95

        #: define regularization terms, for some reason we only take in count the weights, not biases)
        #  linear regularization term, useful for having many weights zero
        self.l1 = abs(self.layer_hidden_conv1.W).sum() \
            + abs(self.layer_hidden_conv2.W).sum() \
            + abs(self.layer_hidden3.W).sum() \
            + abs(self.layer_output.W).sum()

        #: square regularization term, useful for forcing small weights
        self.l2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \
            + (self.layer_hidden_conv2.W ** 2).sum() \
            + (self.layer_hidden3.W ** 2).sum() \
            + (self.layer_output.W ** 2).sum()

        #: define the cost function
        cost = 0.0 * self.l1 + 0.0 * self.l2_sqr + self.layer_output.errors(y)

        #: define gradient calculation
        grads = T.grad(cost, self.params)

        #: Define how much we need to change the parameter values
        learning_rate = 0.0001
        updates = []
        for param_i, gparam_i in zip(self.params, grads):
            updates.append((param_i, param_i - learning_rate * gparam_i))

        #: we need another set of theano variables (other than x and y) to use in train and predict functions
        temp_x = T.ftensor4('temp_x')
        temp_y = T.fmatrix('temp_y')

        #: define the training operation as applying the updates calculated given temp_x and temp_y
        self.train_model = theano.function(inputs=[temp_x, temp_y],
                                           outputs=[cost],
                                           updates=updates,
                                           givens={
                                               x: temp_x,
                                               y: temp_y
                                           })

        self.predict_rewards = theano.function(
            inputs=[temp_x],
            outputs=[self.layer_output.output],
            givens={x: temp_x})

        self.predict_rewards_and_cost = theano.function(
            inputs=[temp_x, temp_y],
            outputs=[self.layer_output.output, cost],
            givens={
                x: temp_x,
                y: temp_y
            })

    @profile
    def train(self, minibatch):
        """
        Train function that transforms (state,action,reward,state) into (input, expected_output) for neural net
        and trains the network
        @param minibatch: array of dictionaries, each dictionary contains
        one transition (prestate,action,reward,poststate)
        """

        #: we have a new, better estimation for the Q-val of the action we chose, it is the sum of the reward
        #  received on transition and the maximum of future rewards. Q-s for other actions remain the same.
        for i, transition in enumerate(minibatch):
            estimated_Q = self.predict_rewards([transition['prestate']])[0][0]
            estimated_Q[transition['action']] = transition['reward'] + self.gamma \
                                                * np.max(self.predict_rewards([transition['prestate']]))
            #: knowing what estimated_Q looks like, we can train the model
            self.train_model([transition['prestate']], [estimated_Q])

    @profile
    def predict_best_action(self, state):
        """
        Predict_best_action returns the action with the highest Q-value
        @param state: 4D array, input (game state) for which we want to know the best action
        """
        predicted_values_for_actions = self.predict_rewards(state)[0][0]
        return np.argmax(predicted_values_for_actions)

Python OutputLayer, HGN示例