def __init__(self, nn_layers, L2_reg=0.0001, learning_rate=0.1, batch_size=32, discrete_target=False): self.layers = nn_layers #TODO: maybe initialize layers and set all inputs as prev outputs self._batch_size = batch_size # self._fprop = theano.function( # [self.layers[0].input_var], # self.output() # ) self.parameters = layers.all_parameters(self.layers[-1]) # self.parameters = [param for layer in nn_layers[1:] for param in layer.params] #nn_layers[5].params + nn_layers[4].params + nn_layers[3].params + nn_layers[2].params + nn_layers[1].params self.cost = self.layers[-1].error() self.error_rate = self.layers[-1].error_rate() self.regularization = sum([(W_or_b**2).sum() for W_or_b in self.parameters]) self.updates = layers.gen_updates_sgd( self.cost + self.regularization * L2_reg, self.parameters, learning_rate) # the last layer must be a layers.OutputLayer # self.updates = layers.gen_updates_sgd(cost, self.parameters, learning_rate) # the last layer must be a layers.OutputLayer # self.train_model = theano.function( # inputs=[self.layers[0].input_var, self.layers[-1].target_var], # updates=self.updates, # outputs=self.cost # ) self._idx = T.lscalar('idx') self.x_shared = theano.shared( np.zeros(self.layers[0].get_output_shape(), dtype=theano.config.floatX)) self.y_shared = theano.shared( np.zeros(self.layers[-1].get_output_shape(), dtype=theano.config.floatX)) self.x_shared_validate = theano.shared( np.zeros(self.layers[0].get_output_shape(), dtype=theano.config.floatX)) self.y_shared_validate = theano.shared( np.zeros(self.layers[-1].get_output_shape(), dtype=theano.config.floatX)) self.x_shared_test = theano.shared( np.zeros(self.layers[0].get_output_shape(), dtype=theano.config.floatX)) self.y_shared_test = theano.shared( np.zeros(self.layers[-1].get_output_shape(), dtype=theano.config.floatX)) self.y_converted = T.cast( self.y_shared, 'int32') if discrete_target else self.y_shared self.y_converted_validate = T.cast( self.y_shared_validate, 'int32') if discrete_target else self.y_shared self.y_converted_test = T.cast( self.y_shared_test, 'int32') if discrete_target else self.y_shared self._givens = { self.layers[0].input_var: self.x_shared[self._idx * self._batch_size:(self._idx + 1) * self._batch_size], self.layers[-1].target_var: self.y_converted[self._idx * self._batch_size:(self._idx + 1) * self._batch_size], } self._givens_validate = { self.layers[0].input_var: self.x_shared_validate[self._idx * self._batch_size:(self._idx + 1) * self._batch_size], self.layers[-1].target_var: self.y_converted_validate[self._idx * self._batch_size:(self._idx + 1) * self._batch_size], } self._givens_test = { self.layers[0].input_var: self.x_shared_test[self._idx * self._batch_size:(self._idx + 1) * self._batch_size], self.layers[-1].target_var: self.y_converted_test[self._idx * self._batch_size:(self._idx + 1) * self._batch_size], } self._train_model_batch = theano.function(inputs=[self._idx], updates=self.updates, givens=self._givens, outputs=self.cost) self._validate_model_batch = theano.function( inputs=[self._idx], givens=self._givens_validate, outputs=self.error_rate) self._test_model_batch = theano.function(inputs=[self._idx], givens=self._givens_test, outputs=self.error_rate) self._output_model_batch = theano.function(inputs=[self._idx], updates=self.updates, givens=self._givens, outputs=self.output())
def __init__(self, num_actions, phi_length, width, height, discount=.9, learning_rate=.01, batch_size=32, approximator='none'): self._batch_size = batch_size self._num_input_features = phi_length self._phi_length = phi_length self._img_width = width self._img_height = height self._discount = discount self.num_actions = num_actions self.learning_rate = learning_rate self.scale_input_by = 255.0 print "neural net initialization, lr is: ", self.learning_rate, approximator # CONSTRUCT THE LAYERS self.q_layers = [] self.q_layers.append( layers.Input2DLayer(self._batch_size, self._num_input_features, self._img_height, self._img_width, self.scale_input_by)) if approximator == 'cuda_conv': self.q_layers.append( cc_layers.ShuffleBC01ToC01BLayer(self.q_layers[-1])) self.q_layers.append( cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1], n_filters=16, filter_size=8, stride=4, weights_std=.01, init_bias_value=0.1)) self.q_layers.append( cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1], n_filters=32, filter_size=4, stride=2, weights_std=.01, init_bias_value=0.1)) self.q_layers.append( cc_layers.ShuffleC01BToBC01Layer(self.q_layers[-1])) elif approximator == 'conv': self.q_layers.append( layers.StridedConv2DLayer(self.q_layers[-1], n_filters=16, filter_width=8, filter_height=8, stride_x=4, stride_y=4, weights_std=.01, init_bias_value=0.01)) self.q_layers.append( layers.StridedConv2DLayer(self.q_layers[-1], n_filters=32, filter_width=4, filter_height=4, stride_x=2, stride_y=2, weights_std=.01, init_bias_value=0.01)) if approximator == 'cuda_conv' or approximator == 'conv': self.q_layers.append( layers.DenseLayer(self.q_layers[-1], n_outputs=256, weights_std=0.01, init_bias_value=0.1, dropout=0, nonlinearity=layers.rectify)) self.q_layers.append( layers.DenseLayer(self.q_layers[-1], n_outputs=num_actions, weights_std=0.01, init_bias_value=0.1, dropout=0, nonlinearity=layers.identity)) if approximator == 'none': self.q_layers.append(\ layers.DenseLayerNoBias(self.q_layers[-1], n_outputs=num_actions, weights_std=0.00, dropout=0, nonlinearity=layers.identity)) self.q_layers.append(layers.OutputLayer(self.q_layers[-1])) for i in range(len(self.q_layers) - 1): print self.q_layers[i].get_output_shape() # Now create a network (using the same weights) # for next state q values self.next_layers = copy_layers(self.q_layers) self.next_layers[0] = layers.Input2DLayer(self._batch_size, self._num_input_features, self._img_width, self._img_height, self.scale_input_by) self.next_layers[1].input_layer = self.next_layers[0] self.rewards = T.col() self.actions = T.icol() # Build the loss function ... print "building loss funtion" q_vals = self.q_layers[-1].predictions() next_q_vals = self.next_layers[-1].predictions() next_maxes = T.max(next_q_vals, axis=1, keepdims=True) target = self.rewards + discount * next_maxes target = theano.gradient.consider_constant(target) diff = target - q_vals # Zero out all entries for actions that were not chosen... mask = build_mask(T.zeros_like(diff), self.actions, 1.0) diff_masked = diff * mask error = T.mean(diff_masked**2) self._loss = error * diff_masked.shape[1] # self._parameters = layers.all_parameters(self.q_layers[-1]) self._idx = T.lscalar('idx') # CREATE VARIABLES FOR INPUT AND OUTPUT self.states_shared = theano.shared( np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) self.states_shared_next = theano.shared( np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) self.rewards_shared = theano.shared(np.zeros( (1, 1), dtype=theano.config.floatX), broadcastable=(False, True)) self.actions_shared = theano.shared(np.zeros((1, 1), dtype='int32'), broadcastable=(False, True)) self._givens = \ {self.q_layers[0].input_var: self.states_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :, :, :], self.next_layers[0].input_var: self.states_shared_next[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :, :, :], self.rewards: self.rewards_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :], self.actions: self.actions_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :] } self._updates = layers.gen_updates_rmsprop_and_nesterov_momentum(\ self._loss, self._parameters, learning_rate=self.learning_rate, rho=0.9, momentum=0.9, epsilon=1e-6) self._train = theano.function([self._idx], self._loss, givens=self._givens, updates=self._updates) self._compute_loss = theano.function([self._idx], self._loss, givens=self._givens) self._compute_q_vals = \ theano.function([self.q_layers[0].input_var], self.q_layers[-1].predictions(), on_unused_input='ignore')
l5 = layers.DenseLayer(l4, n_outputs=37, weights_std=0.01, init_bias_value=0.1, dropout=0.5, nonlinearity=layers.identity) # l6 = layers.OutputLayer(l5, error_measure='mse') l6 = custom.OptimisedDivGalaxyOutputLayer( l5 ) # this incorporates the constraints on the output (probabilities sum to one, weighting, etc.) train_loss_nonorm = l6.error(normalisation=False) train_loss = l6.error() # but compute and print this! valid_loss = l6.error(dropout_active=False) all_parameters = layers.all_parameters(l6) all_bias_parameters = layers.all_bias_parameters(l6) xs_shared = [ theano.shared(np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) for _ in xrange(num_input_representations) ] y_shared = theano.shared(np.zeros((1, 1), dtype=theano.config.floatX)) learning_rate = theano.shared( np.array(LEARNING_RATE_SCHEDULE[0], dtype=theano.config.floatX)) idx = T.lscalar('idx') givens = { l0.input_var: xs_shared[0][idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE],
l4a = layers.DenseLayer(j3, n_outputs=4096, weights_std=0.001, init_bias_value=0.01, dropout=0.5, nonlinearity=layers.identity) l4b = layers.FeatureMaxPoolingLayer(l4a, pool_size=2, feature_dim=1, implementation='reshape') l4c = layers.DenseLayer(l4b, n_outputs=4096, weights_std=0.001, init_bias_value=0.01, dropout=0.5, nonlinearity=layers.identity) l4 = layers.FeatureMaxPoolingLayer(l4c, pool_size=2, feature_dim=1, implementation='reshape') # l5 = layers.DenseLayer(l4, n_outputs=37, weights_std=0.01, init_bias_value=0.0, dropout=0.5, nonlinearity=custom.clip_01) # nonlinearity=layers.identity) l5 = layers.DenseLayer(l4, n_outputs=37, weights_std=0.01, init_bias_value=0.1, dropout=0.5, nonlinearity=layers.identity) # l6 = layers.OutputLayer(l5, error_measure='mse') l6 = custom.OptimisedDivGalaxyOutputLayer(l5) # this incorporates the constraints on the output (probabilities sum to one, weighting, etc.) train_loss_nonorm = l6.error(normalisation=False) train_loss = l6.error() # but compute and print this! valid_loss = l6.error(dropout_active=False) all_parameters = layers.all_parameters(l6) all_bias_parameters = layers.all_bias_parameters(l6) xs_shared = [theano.shared(np.zeros((1,1,1,1), dtype=theano.config.floatX)) for _ in xrange(num_input_representations)] y_shared = theano.shared(np.zeros((1,1), dtype=theano.config.floatX)) learning_rate = theano.shared(np.array(LEARNING_RATE_SCHEDULE[0], dtype=theano.config.floatX)) idx = T.lscalar('idx') givens = { l0.input_var: xs_shared[0][idx*BATCH_SIZE:(idx+1)*BATCH_SIZE], l0_45.input_var: xs_shared[1][idx*BATCH_SIZE:(idx+1)*BATCH_SIZE], l6.target_var: y_shared[idx*BATCH_SIZE:(idx+1)*BATCH_SIZE], }
def __init__(self, num_actions, phi_length, width, height, discount, learning_rate, decay, momentum=0, batch_size=32, approximator='none'): self._batch_size = batch_size self._num_input_features = phi_length self._phi_length = phi_length self._img_width = width self._img_height = height self._discount = discount self.num_actions = num_actions self.learning_rate = learning_rate self.decay = decay self.momentum = momentum self.scale_input_by = 255.0 # CONSTRUCT THE LAYERS self.q_layers = [] self.q_layers.append(layers.Input2DLayer(self._batch_size, self._num_input_features, self._img_height, self._img_width, self.scale_input_by)) if approximator == 'cuda_conv': self.q_layers.append(cc_layers.ShuffleBC01ToC01BLayer( self.q_layers[-1])) self.q_layers.append( cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1], n_filters=16, filter_size=8, stride=4, weights_std=.01, init_bias_value=0.1)) self.q_layers.append( cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1], n_filters=32, filter_size=4, stride=2, weights_std=.01, init_bias_value=0.1)) self.q_layers.append(cc_layers.ShuffleC01BToBC01Layer( self.q_layers[-1])) elif approximator == 'conv': self.q_layers.append(layers.StridedConv2DLayer(self.q_layers[-1], n_filters=16, filter_width=8, filter_height=8, stride_x=4, stride_y=4, weights_std=.01, init_bias_value=0.01)) self.q_layers.append(layers.StridedConv2DLayer(self.q_layers[-1], n_filters=32, filter_width=4, filter_height=4, stride_x=2, stride_y=2, weights_std=.01, init_bias_value=0.01)) if approximator == 'cuda_conv' or approximator == 'conv': self.q_layers.append(layers.DenseLayer(self.q_layers[-1], n_outputs=256, weights_std=0.01, init_bias_value=0.1, dropout=0, nonlinearity=layers.rectify)) self.q_layers.append( layers.DenseLayer(self.q_layers[-1], n_outputs=num_actions, weights_std=0.01, init_bias_value=0.1, dropout=0, nonlinearity=layers.identity)) if approximator == 'none': self.q_layers.append(\ layers.DenseLayerNoBias(self.q_layers[-1], n_outputs=num_actions, weights_std=0.00, dropout=0, nonlinearity=layers.identity)) self.q_layers.append(layers.OutputLayer(self.q_layers[-1])) for i in range(len(self.q_layers)-1): print self.q_layers[i].get_output_shape() # Now create a network (using the same weights) # for next state q values self.next_layers = copy_layers(self.q_layers) self.next_layers[0] = layers.Input2DLayer(self._batch_size, self._num_input_features, self._img_width, self._img_height, self.scale_input_by) self.next_layers[1].input_layer = self.next_layers[0] self.rewards = T.col() self.actions = T.icol() # Build the loss function ... q_vals = self.q_layers[-1].predictions() next_q_vals = self.next_layers[-1].predictions() next_maxes = T.max(next_q_vals, axis=1, keepdims=True) target = self.rewards + discount * next_maxes target = theano.gradient.consider_constant(target) diff = target - q_vals # Zero out all entries for actions that were not chosen... mask = build_mask(T.zeros_like(diff), self.actions, 1.0) diff_masked = diff * mask error = T.mean(diff_masked ** 2) self._loss = error * diff_masked.shape[1] # self._parameters = layers.all_parameters(self.q_layers[-1]) self._idx = T.lscalar('idx') # CREATE VARIABLES FOR INPUT AND OUTPUT self.states_shared = theano.shared( np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) self.states_shared_next = theano.shared( np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) self.rewards_shared = theano.shared( np.zeros((1, 1), dtype=theano.config.floatX), broadcastable=(False, True)) self.actions_shared = theano.shared( np.zeros((1, 1), dtype='int32'), broadcastable=(False, True)) self._givens = \ {self.q_layers[0].input_var: self.states_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :, :, :], self.next_layers[0].input_var: self.states_shared_next[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :, :, :], self.rewards: self.rewards_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :], self.actions: self.actions_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :] } if self.momentum != 0: self._updates = layers.gen_updates_rmsprop_and_nesterov_momentum(\ self._loss, self._parameters, learning_rate=self.learning_rate, rho=self.decay, momentum=self.momentum, epsilon=1e-6) else: self._updates = layers.gen_updates_rmsprop(self._loss, self._parameters, learning_rate=self.learning_rate, rho=self.decay, epsilon=1e-6) self._train = theano.function([self._idx], self._loss, givens=self._givens, updates=self._updates) self._compute_loss = theano.function([self._idx], self._loss, givens=self._givens) self._compute_q_vals = \ theano.function([self.q_layers[0].input_var], self.q_layers[-1].predictions(), on_unused_input='ignore')
def __init__(self, nn_layers, L2_reg=0.0001, learning_rate=0.1, batch_size=32, discrete_target=False): self.layers = nn_layers #TODO: maybe initialize layers and set all inputs as prev outputs self._batch_size = batch_size # self._fprop = theano.function( # [self.layers[0].input_var], # self.output() # ) self.parameters = layers.all_parameters(self.layers[-1]) # self.parameters = [param for layer in nn_layers[1:] for param in layer.params] #nn_layers[5].params + nn_layers[4].params + nn_layers[3].params + nn_layers[2].params + nn_layers[1].params self.cost = self.layers[-1].error() self.error_rate = self.layers[-1].error_rate() self.regularization = sum([(W_or_b ** 2).sum() for W_or_b in self.parameters]) self.updates = layers.gen_updates_sgd(self.cost + self.regularization * L2_reg, self.parameters, learning_rate) # the last layer must be a layers.OutputLayer # self.updates = layers.gen_updates_sgd(cost, self.parameters, learning_rate) # the last layer must be a layers.OutputLayer # self.train_model = theano.function( # inputs=[self.layers[0].input_var, self.layers[-1].target_var], # updates=self.updates, # outputs=self.cost # ) self._idx = T.lscalar('idx') self.x_shared = theano.shared( np.zeros(self.layers[0].get_output_shape(), dtype=theano.config.floatX) ) self.y_shared = theano.shared( np.zeros(self.layers[-1].get_output_shape(), dtype=theano.config.floatX) ) self.x_shared_validate = theano.shared( np.zeros(self.layers[0].get_output_shape(), dtype=theano.config.floatX) ) self.y_shared_validate = theano.shared( np.zeros(self.layers[-1].get_output_shape(), dtype=theano.config.floatX) ) self.x_shared_test = theano.shared( np.zeros(self.layers[0].get_output_shape(), dtype=theano.config.floatX) ) self.y_shared_test = theano.shared( np.zeros(self.layers[-1].get_output_shape(), dtype=theano.config.floatX) ) self.y_converted = T.cast(self.y_shared, 'int32') if discrete_target else self.y_shared self.y_converted_validate = T.cast(self.y_shared_validate, 'int32') if discrete_target else self.y_shared self.y_converted_test = T.cast(self.y_shared_test, 'int32') if discrete_target else self.y_shared self._givens = { self.layers[0].input_var: self.x_shared[self._idx * self._batch_size: (self._idx+1)*self._batch_size], self.layers[-1].target_var: self.y_converted[self._idx * self._batch_size: (self._idx+1)*self._batch_size], } self._givens_validate = { self.layers[0].input_var: self.x_shared_validate[self._idx * self._batch_size: (self._idx+1)*self._batch_size], self.layers[-1].target_var: self.y_converted_validate[self._idx * self._batch_size: (self._idx+1)*self._batch_size], } self._givens_test= { self.layers[0].input_var: self.x_shared_test[self._idx * self._batch_size: (self._idx+1)*self._batch_size], self.layers[-1].target_var: self.y_converted_test[self._idx * self._batch_size: (self._idx+1)*self._batch_size], } self._train_model_batch = theano.function( inputs=[self._idx], updates=self.updates, givens=self._givens, outputs=self.cost ) self._validate_model_batch = theano.function( inputs=[self._idx], givens=self._givens_validate, outputs=self.error_rate ) self._test_model_batch = theano.function( inputs=[self._idx], givens=self._givens_test, outputs=self.error_rate ) self._output_model_batch = theano.function( inputs=[self._idx], updates=self.updates, givens=self._givens, outputs=self.output() )