示例#1
0
    def __init__(self,
                 nn_layers,
                 L2_reg=0.0001,
                 learning_rate=0.1,
                 batch_size=32,
                 discrete_target=False):

        self.layers = nn_layers
        #TODO: maybe initialize layers and set all inputs as prev outputs

        self._batch_size = batch_size

        # self._fprop = theano.function(
        #     [self.layers[0].input_var],
        #     self.output()
        # )

        self.parameters = layers.all_parameters(self.layers[-1])
        # self.parameters = [param for layer in nn_layers[1:] for param in layer.params] #nn_layers[5].params + nn_layers[4].params + nn_layers[3].params + nn_layers[2].params + nn_layers[1].params

        self.cost = self.layers[-1].error()
        self.error_rate = self.layers[-1].error_rate()

        self.regularization = sum([(W_or_b**2).sum()
                                   for W_or_b in self.parameters])

        self.updates = layers.gen_updates_sgd(
            self.cost + self.regularization * L2_reg, self.parameters,
            learning_rate)  # the last layer must be a layers.OutputLayer
        # self.updates = layers.gen_updates_sgd(cost, self.parameters, learning_rate) # the last layer must be a layers.OutputLayer

        # self.train_model = theano.function(
        #     inputs=[self.layers[0].input_var, self.layers[-1].target_var],
        #     updates=self.updates,
        #     outputs=self.cost
        # )

        self._idx = T.lscalar('idx')
        self.x_shared = theano.shared(
            np.zeros(self.layers[0].get_output_shape(),
                     dtype=theano.config.floatX))
        self.y_shared = theano.shared(
            np.zeros(self.layers[-1].get_output_shape(),
                     dtype=theano.config.floatX))
        self.x_shared_validate = theano.shared(
            np.zeros(self.layers[0].get_output_shape(),
                     dtype=theano.config.floatX))
        self.y_shared_validate = theano.shared(
            np.zeros(self.layers[-1].get_output_shape(),
                     dtype=theano.config.floatX))
        self.x_shared_test = theano.shared(
            np.zeros(self.layers[0].get_output_shape(),
                     dtype=theano.config.floatX))
        self.y_shared_test = theano.shared(
            np.zeros(self.layers[-1].get_output_shape(),
                     dtype=theano.config.floatX))
        self.y_converted = T.cast(
            self.y_shared, 'int32') if discrete_target else self.y_shared
        self.y_converted_validate = T.cast(
            self.y_shared_validate,
            'int32') if discrete_target else self.y_shared
        self.y_converted_test = T.cast(
            self.y_shared_test, 'int32') if discrete_target else self.y_shared

        self._givens = {
            self.layers[0].input_var:
            self.x_shared[self._idx * self._batch_size:(self._idx + 1) *
                          self._batch_size],
            self.layers[-1].target_var:
            self.y_converted[self._idx * self._batch_size:(self._idx + 1) *
                             self._batch_size],
        }

        self._givens_validate = {
            self.layers[0].input_var:
            self.x_shared_validate[self._idx *
                                   self._batch_size:(self._idx + 1) *
                                   self._batch_size],
            self.layers[-1].target_var:
            self.y_converted_validate[self._idx *
                                      self._batch_size:(self._idx + 1) *
                                      self._batch_size],
        }

        self._givens_test = {
            self.layers[0].input_var:
            self.x_shared_test[self._idx * self._batch_size:(self._idx + 1) *
                               self._batch_size],
            self.layers[-1].target_var:
            self.y_converted_test[self._idx *
                                  self._batch_size:(self._idx + 1) *
                                  self._batch_size],
        }

        self._train_model_batch = theano.function(inputs=[self._idx],
                                                  updates=self.updates,
                                                  givens=self._givens,
                                                  outputs=self.cost)

        self._validate_model_batch = theano.function(
            inputs=[self._idx],
            givens=self._givens_validate,
            outputs=self.error_rate)

        self._test_model_batch = theano.function(inputs=[self._idx],
                                                 givens=self._givens_test,
                                                 outputs=self.error_rate)

        self._output_model_batch = theano.function(inputs=[self._idx],
                                                   updates=self.updates,
                                                   givens=self._givens,
                                                   outputs=self.output())
示例#2
0
    def __init__(self,
                 num_actions,
                 phi_length,
                 width,
                 height,
                 discount=.9,
                 learning_rate=.01,
                 batch_size=32,
                 approximator='none'):
        self._batch_size = batch_size
        self._num_input_features = phi_length
        self._phi_length = phi_length
        self._img_width = width
        self._img_height = height
        self._discount = discount
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.scale_input_by = 255.0

        print "neural net initialization, lr is: ", self.learning_rate, approximator

        # CONSTRUCT THE LAYERS
        self.q_layers = []
        self.q_layers.append(
            layers.Input2DLayer(self._batch_size, self._num_input_features,
                                self._img_height, self._img_width,
                                self.scale_input_by))

        if approximator == 'cuda_conv':
            self.q_layers.append(
                cc_layers.ShuffleBC01ToC01BLayer(self.q_layers[-1]))
            self.q_layers.append(
                cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1],
                                                 n_filters=16,
                                                 filter_size=8,
                                                 stride=4,
                                                 weights_std=.01,
                                                 init_bias_value=0.1))
            self.q_layers.append(
                cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1],
                                                 n_filters=32,
                                                 filter_size=4,
                                                 stride=2,
                                                 weights_std=.01,
                                                 init_bias_value=0.1))
            self.q_layers.append(
                cc_layers.ShuffleC01BToBC01Layer(self.q_layers[-1]))

        elif approximator == 'conv':
            self.q_layers.append(
                layers.StridedConv2DLayer(self.q_layers[-1],
                                          n_filters=16,
                                          filter_width=8,
                                          filter_height=8,
                                          stride_x=4,
                                          stride_y=4,
                                          weights_std=.01,
                                          init_bias_value=0.01))

            self.q_layers.append(
                layers.StridedConv2DLayer(self.q_layers[-1],
                                          n_filters=32,
                                          filter_width=4,
                                          filter_height=4,
                                          stride_x=2,
                                          stride_y=2,
                                          weights_std=.01,
                                          init_bias_value=0.01))
        if approximator == 'cuda_conv' or approximator == 'conv':

            self.q_layers.append(
                layers.DenseLayer(self.q_layers[-1],
                                  n_outputs=256,
                                  weights_std=0.01,
                                  init_bias_value=0.1,
                                  dropout=0,
                                  nonlinearity=layers.rectify))

            self.q_layers.append(
                layers.DenseLayer(self.q_layers[-1],
                                  n_outputs=num_actions,
                                  weights_std=0.01,
                                  init_bias_value=0.1,
                                  dropout=0,
                                  nonlinearity=layers.identity))

        if approximator == 'none':
            self.q_layers.append(\
                layers.DenseLayerNoBias(self.q_layers[-1],
                                        n_outputs=num_actions,
                                        weights_std=0.00,
                                        dropout=0,
                                        nonlinearity=layers.identity))

        self.q_layers.append(layers.OutputLayer(self.q_layers[-1]))

        for i in range(len(self.q_layers) - 1):
            print self.q_layers[i].get_output_shape()

        # Now create a network (using the same weights)
        # for next state q values
        self.next_layers = copy_layers(self.q_layers)
        self.next_layers[0] = layers.Input2DLayer(self._batch_size,
                                                  self._num_input_features,
                                                  self._img_width,
                                                  self._img_height,
                                                  self.scale_input_by)
        self.next_layers[1].input_layer = self.next_layers[0]

        self.rewards = T.col()
        self.actions = T.icol()

        # Build the loss function ...
        print "building loss funtion"
        q_vals = self.q_layers[-1].predictions()
        next_q_vals = self.next_layers[-1].predictions()
        next_maxes = T.max(next_q_vals, axis=1, keepdims=True)
        target = self.rewards + discount * next_maxes
        target = theano.gradient.consider_constant(target)
        diff = target - q_vals
        # Zero out all entries for actions that were not chosen...
        mask = build_mask(T.zeros_like(diff), self.actions, 1.0)
        diff_masked = diff * mask
        error = T.mean(diff_masked**2)
        self._loss = error * diff_masked.shape[1]  #

        self._parameters = layers.all_parameters(self.q_layers[-1])

        self._idx = T.lscalar('idx')

        # CREATE VARIABLES FOR INPUT AND OUTPUT
        self.states_shared = theano.shared(
            np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
        self.states_shared_next = theano.shared(
            np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(np.zeros(
            (1, 1), dtype=theano.config.floatX),
                                            broadcastable=(False, True))
        self.actions_shared = theano.shared(np.zeros((1, 1), dtype='int32'),
                                            broadcastable=(False, True))

        self._givens = \
            {self.q_layers[0].input_var:
             self.states_shared[self._idx*self._batch_size:
                                (self._idx+1)*self._batch_size, :, :, :],
             self.next_layers[0].input_var:
             self.states_shared_next[self._idx*self._batch_size:
                                     (self._idx+1)*self._batch_size, :, :, :],

             self.rewards:
             self.rewards_shared[self._idx*self._batch_size:
                                 (self._idx+1)*self._batch_size, :],
             self.actions:
             self.actions_shared[self._idx*self._batch_size:
                                 (self._idx+1)*self._batch_size, :]
             }

        self._updates = layers.gen_updates_rmsprop_and_nesterov_momentum(\
            self._loss, self._parameters, learning_rate=self.learning_rate,
            rho=0.9, momentum=0.9, epsilon=1e-6)

        self._train = theano.function([self._idx],
                                      self._loss,
                                      givens=self._givens,
                                      updates=self._updates)
        self._compute_loss = theano.function([self._idx],
                                             self._loss,
                                             givens=self._givens)
        self._compute_q_vals = \
            theano.function([self.q_layers[0].input_var],
                            self.q_layers[-1].predictions(),
                            on_unused_input='ignore')
l5 = layers.DenseLayer(l4,
                       n_outputs=37,
                       weights_std=0.01,
                       init_bias_value=0.1,
                       dropout=0.5,
                       nonlinearity=layers.identity)

# l6 = layers.OutputLayer(l5, error_measure='mse')
l6 = custom.OptimisedDivGalaxyOutputLayer(
    l5
)  # this incorporates the constraints on the output (probabilities sum to one, weighting, etc.)

train_loss_nonorm = l6.error(normalisation=False)
train_loss = l6.error()  # but compute and print this!
valid_loss = l6.error(dropout_active=False)
all_parameters = layers.all_parameters(l6)
all_bias_parameters = layers.all_bias_parameters(l6)

xs_shared = [
    theano.shared(np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
    for _ in xrange(num_input_representations)
]
y_shared = theano.shared(np.zeros((1, 1), dtype=theano.config.floatX))

learning_rate = theano.shared(
    np.array(LEARNING_RATE_SCHEDULE[0], dtype=theano.config.floatX))

idx = T.lscalar('idx')

givens = {
    l0.input_var: xs_shared[0][idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE],
l4a = layers.DenseLayer(j3, n_outputs=4096, weights_std=0.001, init_bias_value=0.01, dropout=0.5, nonlinearity=layers.identity)
l4b = layers.FeatureMaxPoolingLayer(l4a, pool_size=2, feature_dim=1, implementation='reshape')
l4c = layers.DenseLayer(l4b, n_outputs=4096, weights_std=0.001, init_bias_value=0.01, dropout=0.5, nonlinearity=layers.identity)
l4 = layers.FeatureMaxPoolingLayer(l4c, pool_size=2, feature_dim=1, implementation='reshape')

# l5 = layers.DenseLayer(l4, n_outputs=37, weights_std=0.01, init_bias_value=0.0, dropout=0.5, nonlinearity=custom.clip_01) #  nonlinearity=layers.identity)
l5 = layers.DenseLayer(l4, n_outputs=37, weights_std=0.01, init_bias_value=0.1, dropout=0.5, nonlinearity=layers.identity)

# l6 = layers.OutputLayer(l5, error_measure='mse')
l6 = custom.OptimisedDivGalaxyOutputLayer(l5) # this incorporates the constraints on the output (probabilities sum to one, weighting, etc.)

train_loss_nonorm = l6.error(normalisation=False)
train_loss = l6.error() # but compute and print this!
valid_loss = l6.error(dropout_active=False)
all_parameters = layers.all_parameters(l6)
all_bias_parameters = layers.all_bias_parameters(l6)

xs_shared = [theano.shared(np.zeros((1,1,1,1), dtype=theano.config.floatX)) for _ in xrange(num_input_representations)]
y_shared = theano.shared(np.zeros((1,1), dtype=theano.config.floatX))

learning_rate = theano.shared(np.array(LEARNING_RATE_SCHEDULE[0], dtype=theano.config.floatX))

idx = T.lscalar('idx')

givens = {
    l0.input_var: xs_shared[0][idx*BATCH_SIZE:(idx+1)*BATCH_SIZE],
    l0_45.input_var: xs_shared[1][idx*BATCH_SIZE:(idx+1)*BATCH_SIZE],
    l6.target_var: y_shared[idx*BATCH_SIZE:(idx+1)*BATCH_SIZE],
}
示例#5
0
    def __init__(self, num_actions, phi_length, width, height,
                 discount, learning_rate, decay, momentum=0,
                 batch_size=32,
                 approximator='none'):
        self._batch_size = batch_size
        self._num_input_features = phi_length
        self._phi_length = phi_length
        self._img_width = width
        self._img_height = height
        self._discount = discount
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.decay = decay
        self.momentum = momentum
        self.scale_input_by = 255.0

        # CONSTRUCT THE LAYERS
        self.q_layers = []
        self.q_layers.append(layers.Input2DLayer(self._batch_size,
                                               self._num_input_features,
                                               self._img_height,
                                               self._img_width,
                                               self.scale_input_by))

        if approximator == 'cuda_conv':
            self.q_layers.append(cc_layers.ShuffleBC01ToC01BLayer(
                    self.q_layers[-1]))
            self.q_layers.append(
                cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1],
                                                 n_filters=16,
                                                 filter_size=8,
                                                 stride=4,
                                                 weights_std=.01,
                                                 init_bias_value=0.1))
            self.q_layers.append(
                cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1],
                                                 n_filters=32,
                                                 filter_size=4,
                                                 stride=2,
                                                 weights_std=.01,
                                                 init_bias_value=0.1))
            self.q_layers.append(cc_layers.ShuffleC01BToBC01Layer(
                    self.q_layers[-1]))

        elif approximator == 'conv':
            self.q_layers.append(layers.StridedConv2DLayer(self.q_layers[-1],
                                                         n_filters=16,
                                                         filter_width=8,
                                                         filter_height=8,
                                                         stride_x=4,
                                                         stride_y=4,
                                                         weights_std=.01,
                                                         init_bias_value=0.01))

            self.q_layers.append(layers.StridedConv2DLayer(self.q_layers[-1],
                                                         n_filters=32,
                                                         filter_width=4,
                                                         filter_height=4,
                                                         stride_x=2,
                                                         stride_y=2,
                                                         weights_std=.01,
                                                         init_bias_value=0.01))
        if approximator == 'cuda_conv' or approximator == 'conv':

            self.q_layers.append(layers.DenseLayer(self.q_layers[-1],
                                                   n_outputs=256,
                                                   weights_std=0.01,
                                                   init_bias_value=0.1,
                                                   dropout=0,
                                                   nonlinearity=layers.rectify))

            self.q_layers.append(
                layers.DenseLayer(self.q_layers[-1],
                                  n_outputs=num_actions,
                                  weights_std=0.01,
                                  init_bias_value=0.1,
                                  dropout=0,
                                  nonlinearity=layers.identity))


        if approximator == 'none':
            self.q_layers.append(\
                layers.DenseLayerNoBias(self.q_layers[-1],
                                        n_outputs=num_actions,
                                        weights_std=0.00,
                                        dropout=0,
                                        nonlinearity=layers.identity))


        self.q_layers.append(layers.OutputLayer(self.q_layers[-1]))

        for i in range(len(self.q_layers)-1):
            print self.q_layers[i].get_output_shape()


        # Now create a network (using the same weights)
        # for next state q values
        self.next_layers = copy_layers(self.q_layers)
        self.next_layers[0] = layers.Input2DLayer(self._batch_size,
                                                  self._num_input_features,
                                                  self._img_width,
                                                  self._img_height,
                                                  self.scale_input_by)
        self.next_layers[1].input_layer = self.next_layers[0]

        self.rewards = T.col()
        self.actions = T.icol()

        # Build the loss function ...
        q_vals = self.q_layers[-1].predictions()
        next_q_vals = self.next_layers[-1].predictions()
        next_maxes = T.max(next_q_vals, axis=1, keepdims=True)
        target = self.rewards + discount * next_maxes
        target = theano.gradient.consider_constant(target)
        diff = target - q_vals
        # Zero out all entries for actions that were not chosen...
        mask = build_mask(T.zeros_like(diff), self.actions, 1.0)
        diff_masked = diff * mask
        error = T.mean(diff_masked ** 2)
        self._loss = error * diff_masked.shape[1] #

        self._parameters = layers.all_parameters(self.q_layers[-1])

        self._idx = T.lscalar('idx')

        # CREATE VARIABLES FOR INPUT AND OUTPUT
        self.states_shared = theano.shared(
            np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
        self.states_shared_next = theano.shared(
            np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(
            np.zeros((1, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))
        self.actions_shared = theano.shared(
            np.zeros((1, 1), dtype='int32'), broadcastable=(False, True))

        self._givens = \
            {self.q_layers[0].input_var:
             self.states_shared[self._idx*self._batch_size:
                                (self._idx+1)*self._batch_size, :, :, :],
             self.next_layers[0].input_var:
             self.states_shared_next[self._idx*self._batch_size:
                                     (self._idx+1)*self._batch_size, :, :, :],

             self.rewards:
             self.rewards_shared[self._idx*self._batch_size:
                                 (self._idx+1)*self._batch_size, :],
             self.actions:
             self.actions_shared[self._idx*self._batch_size:
                                 (self._idx+1)*self._batch_size, :]
             }

        if self.momentum != 0:
            self._updates = layers.gen_updates_rmsprop_and_nesterov_momentum(\
                self._loss, self._parameters, learning_rate=self.learning_rate,
                rho=self.decay, momentum=self.momentum, epsilon=1e-6)
        else:
            self._updates = layers.gen_updates_rmsprop(self._loss,
                self._parameters, learning_rate=self.learning_rate,
                rho=self.decay, epsilon=1e-6)

        self._train = theano.function([self._idx], self._loss,
                                      givens=self._givens,
                                      updates=self._updates)
        self._compute_loss = theano.function([self._idx],
                                             self._loss,
                                             givens=self._givens)
        self._compute_q_vals = \
            theano.function([self.q_layers[0].input_var],
                            self.q_layers[-1].predictions(),
                            on_unused_input='ignore')
示例#6
0
    def __init__(self, nn_layers, L2_reg=0.0001, learning_rate=0.1, batch_size=32, discrete_target=False):

        self.layers = nn_layers
        #TODO: maybe initialize layers and set all inputs as prev outputs

        self._batch_size = batch_size

        # self._fprop = theano.function(
        #     [self.layers[0].input_var],
        #     self.output()
        # )

        self.parameters = layers.all_parameters(self.layers[-1])
        # self.parameters = [param for layer in nn_layers[1:] for param in layer.params] #nn_layers[5].params + nn_layers[4].params + nn_layers[3].params + nn_layers[2].params + nn_layers[1].params

        self.cost = self.layers[-1].error()
        self.error_rate = self.layers[-1].error_rate()

        self.regularization = sum([(W_or_b ** 2).sum() for W_or_b in self.parameters])

        self.updates = layers.gen_updates_sgd(self.cost + self.regularization * L2_reg, self.parameters, learning_rate) # the last layer must be a layers.OutputLayer
        # self.updates = layers.gen_updates_sgd(cost, self.parameters, learning_rate) # the last layer must be a layers.OutputLayer

        # self.train_model = theano.function(
        #     inputs=[self.layers[0].input_var, self.layers[-1].target_var],
        #     updates=self.updates,
        #     outputs=self.cost
        # )

        self._idx = T.lscalar('idx')
        self.x_shared = theano.shared(
            np.zeros(self.layers[0].get_output_shape(), dtype=theano.config.floatX)
        )
        self.y_shared = theano.shared(
            np.zeros(self.layers[-1].get_output_shape(), dtype=theano.config.floatX)
        )
        self.x_shared_validate = theano.shared(
            np.zeros(self.layers[0].get_output_shape(), dtype=theano.config.floatX)
        )
        self.y_shared_validate = theano.shared(
            np.zeros(self.layers[-1].get_output_shape(), dtype=theano.config.floatX)
        )
        self.x_shared_test = theano.shared(
            np.zeros(self.layers[0].get_output_shape(), dtype=theano.config.floatX)
        )
        self.y_shared_test = theano.shared(
            np.zeros(self.layers[-1].get_output_shape(), dtype=theano.config.floatX)
        )
        self.y_converted = T.cast(self.y_shared, 'int32') if discrete_target else self.y_shared
        self.y_converted_validate = T.cast(self.y_shared_validate, 'int32') if discrete_target else self.y_shared
        self.y_converted_test = T.cast(self.y_shared_test, 'int32') if discrete_target else self.y_shared

        self._givens = {
            self.layers[0].input_var: self.x_shared[self._idx * self._batch_size: (self._idx+1)*self._batch_size],
            self.layers[-1].target_var: self.y_converted[self._idx * self._batch_size: (self._idx+1)*self._batch_size],
        }

        self._givens_validate = {
            self.layers[0].input_var: self.x_shared_validate[self._idx * self._batch_size: (self._idx+1)*self._batch_size],
            self.layers[-1].target_var: self.y_converted_validate[self._idx * self._batch_size: (self._idx+1)*self._batch_size],
        }

        self._givens_test= {
            self.layers[0].input_var: self.x_shared_test[self._idx * self._batch_size: (self._idx+1)*self._batch_size],
            self.layers[-1].target_var: self.y_converted_test[self._idx * self._batch_size: (self._idx+1)*self._batch_size],
        }

        self._train_model_batch = theano.function(
            inputs=[self._idx],
            updates=self.updates,
            givens=self._givens,
            outputs=self.cost
        )

        self._validate_model_batch = theano.function(
            inputs=[self._idx],
            givens=self._givens_validate,
            outputs=self.error_rate
        )

        self._test_model_batch = theano.function(
            inputs=[self._idx],
            givens=self._givens_test,
            outputs=self.error_rate
        )

        self._output_model_batch = theano.function(
            inputs=[self._idx],
            updates=self.updates,
            givens=self._givens,
            outputs=self.output()
        )