class ConvNet(object): def __init__(self, input_size=[]): cov3_core_sizes = [[3, 5, 5]] * 6 cov3_core_sizes.extend([[4, 5, 5]] * 9) cov3_core_sizes.extend([[6, 5, 5]]) cov3_mapcombindex = [[0,1,2],[1,2,3],[2,3,4],[3,4,5],[4,5,0],[5,0,1],\ [0,1,2,3],[1,2,3,4],[2,3,4,5],[3,4,5,0],[4,5,0,1],[5,0,1,2],[0,1,3,4],[1,2,4,5],[0,2,3,5],[0,1,2,3,4,5]] self.convlay1 = ConvLayer([[28, 28]] * 6, [[1, 5, 5]] * 6) self.poollay2 = PoolingLayer([[14, 14]] * 6, [[2, 2]] * 6) self.convlay3 = ConvLayer([[10, 10]] * 16, cov3_core_sizes, cov3_mapcombindex) self.poollay4 = PoolingLayer([[5, 5]] * 16, [[2, 2]] * 16) self.convlay5 = ConvLayer([[1, 1]] * 120, [[16, 5, 5]] * 120) self.fclayer6 = FCLayer(84, 120) self.output7 = OutputLayer(10, 84) def forward_p(self, pic, label): self.convlay1.feedforward(pic) self.poollay2.feedforward(self.convlay1.maps) self.convlay3.feedforward(self.poollay2.maps, True) self.poollay4.feedforward(self.convlay3.maps) self.convlay5.feedforward(self.poollay4.maps) self.fclayer6.feedforward(self.convlay5.maps) self.output7.softmax(self.fclayer6.maps) def back_p(self, pic, label, learn_rate): output_error = np.zeros([1, 1, 10]) output_error[0][0][label] = 1 #fclayer_error = self.outputlay7.back_propa(self.fclay6.maps, output_error, learn_rate, True) fclayer_error = self.output7.back_propa_softmax( self.fclayer6.maps, output_error, learn_rate, True) conv5_error = self.fclayer6.back_propa(self.convlay5.maps, fclayer_error, learn_rate, True) pool4_error = self.convlay5.back_propa(self.poollay4.maps, conv5_error, learn_rate, True) conv3_error = self.poollay4.back_propa(self.convlay3.maps, pool4_error, learn_rate, True) pool2_error = self.convlay3.back_propa(self.poollay2.maps, conv3_error, learn_rate, True) conv1_error = self.poollay2.back_propa(self.convlay1.maps, pool2_error, learn_rate, True) ilayer_error = self.convlay1.back_propa(pic, conv1_error, learn_rate, True) def print_weights(self): pass
def __init__(self, input_size=[]): cov3_core_sizes = [[3, 5, 5]] * 6 cov3_core_sizes.extend([[4, 5, 5]] * 9) cov3_core_sizes.extend([[6, 5, 5]]) cov3_mapcombindex = [[0,1,2],[1,2,3],[2,3,4],[3,4,5],[4,5,0],[5,0,1],\ [0,1,2,3],[1,2,3,4],[2,3,4,5],[3,4,5,0],[4,5,0,1],[5,0,1,2],[0,1,3,4],[1,2,4,5],[0,2,3,5],[0,1,2,3,4,5]] self.convlay1 = ConvLayer([[28, 28]] * 6, [[1, 5, 5]] * 6) self.poollay2 = PoolingLayer([[14, 14]] * 6, [[2, 2]] * 6) self.convlay3 = ConvLayer([[10, 10]] * 16, cov3_core_sizes, cov3_mapcombindex) self.poollay4 = PoolingLayer([[5, 5]] * 16, [[2, 2]] * 16) self.convlay5 = ConvLayer([[1, 1]] * 120, [[16, 5, 5]] * 120) self.fclayer6 = FCLayer(84, 120) self.output7 = OutputLayer(10, 84)
def train ( self, train_input, train_output ): #holds our states at every time step (used to construct outputs weights) states_matrix = np.zeros( (res_size, len(train_input)) ) #go run the res to collect the states matrix for time_t, data_t in enumerate( train_input ) : if time_t % update_info_interval == 0 : print("on training data idx: ",time_t," out of ",len(train_input)) self.resevoir.run( self.weights_in, data_t ) states_matrix[:,time_t] = self.resevoir.get_hidden_states()[:,0] #create our output layer based on our states matrix print("creating output layer...") self.weights_out = OutputLayer.create ( states_matrix, train_output ) print("output layer found.")
def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out): ''' Initialize a NeuralNet @param input_shape: tuple or list of length 4 , (batch size, num input feature maps, image height, image width) @param filter_shapes: list of 2 (for each conv layer) * 4 values (number of filters, num input feature maps, filter height,filter width) @param strides: list of size 2, stride values for each hidden layer @param n_hidden: int, number of neurons in the all-to-all connected hidden layer @param n_out: int, number od nudes in output layer ''' #create theano variables corresponding to input_batch (x) and output of the network (y) x = T.ftensor4('x') y = T.fmatrix('y') #first hidden layer is convolutional: self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0]) #second convolutional hidden layer: the size of input depends on the size of output from first layer #it is defined as (num_batches, num_input_feature_maps, height_of_input_maps, width_of_input_maps) second_conv_input_shape = [input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size] self.layer_hidden_conv2 = ConvolutionalLayer(self.layer_hidden_conv1.output, filter_shapes[1], image_shape=second_conv_input_shape, stride=2) #output from convolutional layer is 4D, but normal hidden layer expects 2D. Because of all to all connections # 3rd hidden layer does not care from which feature map or from which position the input comes from flattened_input = self.layer_hidden_conv2.output.flatten(2) #create third hidden layer self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden) #create output layer self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out) #define the ensemble of parameters of the whole network self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \ + self.layer_hidden3.params + self.layer_output.params #discount factor self.gamma = 0.95 #: define regularization terms, for some reason we only take in count the weights, not biases) # linear regularization term, useful for having many weights zero self.l1 = abs(self.layer_hidden_conv1.W).sum() \ + abs(self.layer_hidden_conv2.W).sum() \ + abs(self.layer_hidden3.W).sum() \ + abs(self.layer_output.W).sum() #: square regularization term, useful for forcing small weights self.l2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \ + (self.layer_hidden_conv2.W ** 2).sum() \ + (self.layer_hidden3.W ** 2).sum() \ + (self.layer_output.W ** 2).sum() #: define the cost function cost = 0.0 * self.l1 + 0.0 * self.l2_sqr + self.layer_output.errors(y) #: define gradient calculation grads = T.grad(cost, self.params) #: Define how much we need to change the parameter values learning_rate = 0.0001 updates = [] for param_i, gparam_i in zip(self.params, grads): updates.append((param_i, param_i - learning_rate * gparam_i)) #: we need another set of theano variables (other than x and y) to use in train and predict functions temp_x = T.ftensor4('temp_x') temp_y = T.fmatrix('temp_y') #: define the training operation as applying the updates calculated given temp_x and temp_y self.train_model = theano.function(inputs=[temp_x, temp_y], outputs=[cost , self.params[0][0]], updates=updates, givens={ x: temp_x, y: temp_y}) self.predict_rewards = theano.function( inputs=[temp_x], outputs=[self.layer_output.output], givens={ x: temp_x }) self.predict_rewards_and_cost = theano.function( inputs=[temp_x, temp_y], outputs=[self.layer_output.output, cost], givens={ x: temp_x, y: temp_y })
class NeuralNet: def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out): ''' Initialize a NeuralNet @param input_shape: tuple or list of length 4 , (batch size, num input feature maps, image height, image width) @param filter_shapes: list of 2 (for each conv layer) * 4 values (number of filters, num input feature maps, filter height,filter width) @param strides: list of size 2, stride values for each hidden layer @param n_hidden: int, number of neurons in the all-to-all connected hidden layer @param n_out: int, number od nudes in output layer ''' #create theano variables corresponding to input_batch (x) and output of the network (y) x = T.ftensor4('x') y = T.fmatrix('y') #first hidden layer is convolutional: self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0]) #second convolutional hidden layer: the size of input depends on the size of output from first layer #it is defined as (num_batches, num_input_feature_maps, height_of_input_maps, width_of_input_maps) second_conv_input_shape = [input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size] self.layer_hidden_conv2 = ConvolutionalLayer(self.layer_hidden_conv1.output, filter_shapes[1], image_shape=second_conv_input_shape, stride=2) #output from convolutional layer is 4D, but normal hidden layer expects 2D. Because of all to all connections # 3rd hidden layer does not care from which feature map or from which position the input comes from flattened_input = self.layer_hidden_conv2.output.flatten(2) #create third hidden layer self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden) #create output layer self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out) #define the ensemble of parameters of the whole network self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \ + self.layer_hidden3.params + self.layer_output.params #discount factor self.gamma = 0.95 #: define regularization terms, for some reason we only take in count the weights, not biases) # linear regularization term, useful for having many weights zero self.l1 = abs(self.layer_hidden_conv1.W).sum() \ + abs(self.layer_hidden_conv2.W).sum() \ + abs(self.layer_hidden3.W).sum() \ + abs(self.layer_output.W).sum() #: square regularization term, useful for forcing small weights self.l2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \ + (self.layer_hidden_conv2.W ** 2).sum() \ + (self.layer_hidden3.W ** 2).sum() \ + (self.layer_output.W ** 2).sum() #: define the cost function cost = 0.0 * self.l1 + 0.0 * self.l2_sqr + self.layer_output.errors(y) #: define gradient calculation grads = T.grad(cost, self.params) #: Define how much we need to change the parameter values learning_rate = 0.0001 updates = [] for param_i, gparam_i in zip(self.params, grads): updates.append((param_i, param_i - learning_rate * gparam_i)) #: we need another set of theano variables (other than x and y) to use in train and predict functions temp_x = T.ftensor4('temp_x') temp_y = T.fmatrix('temp_y') #: define the training operation as applying the updates calculated given temp_x and temp_y self.train_model = theano.function(inputs=[temp_x, temp_y], outputs=[cost , self.params[0][0]], updates=updates, givens={ x: temp_x, y: temp_y}) self.predict_rewards = theano.function( inputs=[temp_x], outputs=[self.layer_output.output], givens={ x: temp_x }) self.predict_rewards_and_cost = theano.function( inputs=[temp_x, temp_y], outputs=[self.layer_output.output, cost], givens={ x: temp_x, y: temp_y }) def train(self, minibatch): """ Train function that transforms (state,action,reward,state) into (input, expected_output) for neural net and trains the network @param minibatch: array of dictionaries, each dictionary contains one transition (prestate,action,reward,poststate) """ #: we have a new, better estimation for the Q-val of the action we chose, it is the sum of the reward # received on transition and the maximum of future rewards. Q-s for other actions remain the same. for i, transition in enumerate(minibatch): estimated_Q = self.predict_rewards([transition['prestate']])[0][0] #: line prints out the output of the network, uncomment it if you want to verify that different # inputs give different outputs (c.f. wiki Basic tests/Issue #10) #print "estimated q", estimated_Q estimated_Q[transition['action']] = transition['reward'] + self.gamma \ * np.max(self.predict_rewards([transition['poststate']])) #: knowing what estimated_Q looks like, we can train the model cost, first_filter = self.train_model([transition['prestate']], [estimated_Q]) #: next line prints out the weight values in the first line of the first 8x8 filter in first conv layer, # uncomment it if you want to make sure the weight values do indeed change as the result of learning # (c.f. wiki Basic tests/Issue #7) #print "first line of filter applied to first img of first layer is: \n", first_filter[0][0] def predict_best_action(self, state): """ Predict_best_action returns the action with the highest Q-value @param state: 4D array, input (game state) for which we want to know the best action """ predicted_values_for_actions = self.predict_rewards(state)[0][0] #print "predicted best action", predicted_values_for_actions return np.argmax(predicted_values_for_actions)
def get_output(self, input, z, training=None, return_spectrogram=False, reuse=True): ''' Creates symbolic computation graph of the U-Net for a given input batch :param input: Input batch of mixtures, 3D tensor [batch_size, num_samples, num_channels] :param reuse: Whether to create new parameter variables or reuse existing ones :return: U-Net output: List of source estimates. Each item is a 3D tensor [batch_size, num_out_samples, num_channels] ''' with tf.variable_scope("separator", reuse=reuse): enc_outputs = list() current_layer = input # Down-convolution: Repeat strided conv for i in range(self.num_layers): current_layer = tf.layers.conv1d( current_layer, self.num_initial_filters + (self.num_initial_filters * i), self.filter_size, strides=1, activation=LeakyReLU, padding=self.padding) # out = in - filter + 1 enc_outputs.append(current_layer) current_layer = current_layer[:, :: 2, :] # Decimate by factor of 2 # out = (in-1)/2 + 1 current_layer = tf.layers.conv1d( current_layer, self.num_initial_filters + (self.num_initial_filters * self.num_layers), self.filter_size, activation=LeakyReLU, padding=self.padding ) # One more conv here since we need to compute features after last decimation # Feature map here shall be X along one dimension # Make conditioning on the bottleneck # z --> [batch_size, num_sources] -> [batch_size, timestamps, n_filters, num_sources] z = tf.tile(z, [current_layer.shape[1], current_layer.shape[2]]) z = tf.reshape( z, (current_layer.shape.as_list() + [self.num_sources])) # Apply multiplicative conditioning current_layer = tf.expand_dims(current_layer, axis=-1) current_layer = tf.multiply(z, current_layer) current_layer = tf.reshape( current_layer, (current_layer.shape[0], current_layer.shape[1], -1)) # Upconvolution for i in range(self.num_layers): #UPSAMPLING current_layer = tf.expand_dims(current_layer, axis=1) if self.upsampling == 'learned': # Learned interpolation between two neighbouring time positions by using a convolution filter of width 2, and inserting the responses in the middle of the two respective inputs current_layer = Utils.learned_interpolation_layer( current_layer, self.padding, i) else: if self.context: current_layer = tf.image.resize_bilinear( current_layer, [ 1, current_layer.get_shape().as_list()[2] * 2 - 1 ], align_corners=True) current_layer = tf.cast(current_layer, tf.bfloat16) else: current_layer = tf.image.resize_bilinear( current_layer, [1, current_layer.get_shape().as_list()[2] * 2 ]) # out = in + in - 1 #current_layer = tf.layers.conv2d_transpose(current_layer, self.num_initial_filters + (16 * (self.num_layers-i-1)), [1, 15], strides=[1, 2], activation=LeakyReLU, padding='same') # output = input * stride + filter - stride current_layer = tf.squeeze(current_layer, axis=1) assert ( enc_outputs[-i - 1].get_shape().as_list()[1] == current_layer.get_shape().as_list()[1] or self.context ) #No cropping should be necessary unless we are using context current_layer = Utils.crop_and_concat(enc_outputs[-i - 1], current_layer, match_feature_dim=False) current_layer = tf.layers.conv1d( current_layer, self.num_initial_filters + (self.num_initial_filters * (self.num_layers - i - 1)), self.merge_filter_size, activation=LeakyReLU, padding=self.padding) # out = in - filter + 1 current_layer = Utils.crop_and_concat(input, current_layer, match_feature_dim=False) # Output layer if self.output_type == "direct": return OutputLayer.independent_outputs(current_layer, self.num_sources, self.num_channels) elif self.output_type == "difference": cropped_input = Utils.crop(input, current_layer.get_shape().as_list(), match_feature_dim=False) return OutputLayer.difference_output(cropped_input, current_layer, self.num_sources, self.num_channels) else: raise NotImplementedError
def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out): ''' Initialize a NeuralNet @param input_shape: tuple or list of length 4 , (batch size, num input feature maps, image height, image width) @param filter_shapes: list of 2 (for each conv layer) * 4 values (number of filters, num input feature maps, filter height,filter width) @param strides: list of size 2, stride values for each hidden layer @param n_hidden: int, number of neurons in the all-to-all connected hidden layer @param n_out: int, number od nudes in output layer ''' #create theano variables corresponding to input_batch (x) and output of the network (y) x = T.ftensor4('x') y = T.fmatrix('y') #first hidden layer is convolutional: self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0]) #second convolutional hidden layer: the size of input depends on the size of output from first layer #it is defined as (num_batches, num_input_feature_maps, height_of_input_maps, width_of_input_maps) second_conv_input_shape = [ input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size ] self.layer_hidden_conv2 = ConvolutionalLayer( self.layer_hidden_conv1.output, filter_shapes[1], image_shape=second_conv_input_shape, stride=2) # Drops use of strides #output from convolutional layer is 4D, but normal hidden layer expects 2D. Because of all to all connections # 3rd hidden layer does not care from which feature map or from which position the input comes from flattened_input = self.layer_hidden_conv2.output.flatten(2) #create third hidden layer self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden) #create output layer self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out) #define the ensemble of parameters of the whole network self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \ + self.layer_hidden3.params + self.layer_output.params #discount factor self.gamma = 0.95 #: define regularization terms, for some reason we only take in count the weights, not biases) # linear regularization term, useful for having many weights zero self.l1 = abs(self.layer_hidden_conv1.W).sum() \ + abs(self.layer_hidden_conv2.W).sum() \ + abs(self.layer_hidden3.W).sum() \ + abs(self.layer_output.W).sum() #: square regularization term, useful for forcing small weights self.l2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \ + (self.layer_hidden_conv2.W ** 2).sum() \ + (self.layer_hidden3.W ** 2).sum() \ + (self.layer_output.W ** 2).sum() #: define the cost function self.cost = 0.0 * self.l1 + 0.0 * self.l2_sqr + self.layer_output.errors( y) self.cost_function = theano.function([x, y], [self.cost]) #: define gradient calculation self.grads = T.grad(self.cost, self.params) #: Define how much we need to change the parameter values self.learning_rate = T.scalar('lr') self.updates = [] for param_i, gparam_i in zip(self.params, self.grads): self.updates.append( (param_i, param_i - self.learning_rate * gparam_i)) self.x = x self.y = y #: we need another set of theano variables (other than x and y) to use in train and predict functions temp_x = T.ftensor4('temp_x') temp_y = T.fmatrix('temp_y') #: define the training operation as applying the updates calculated given temp_x and temp_y self.train_model = theano.function(inputs=[ temp_x, temp_y, theano.Param(self.learning_rate, default=0.00001) ], outputs=[self.cost], updates=self.updates, givens={ x: temp_x, y: temp_y }, name='train_model') self.cost_clone = theano.clone(self.cost, replace=self.updates) self.line_function = theano.function([x, y, self.learning_rate], [self.cost_clone]) self.predict_rewards = theano.function( inputs=[temp_x], outputs=[self.layer_output.output], givens={x: temp_x}, name='predict_rewards') self.predict_rewards_and_cost = theano.function( inputs=[temp_x, temp_y], outputs=[self.layer_output.output, self.cost], givens={ x: temp_x, y: temp_y }, name='predict_rewards_and_cost')
class NeuralNet: def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out): ''' Initialize a NeuralNet @param input_shape: tuple or list of length 4 , (batch size, num input feature maps, image height, image width) @param filter_shapes: list of 2 (for each conv layer) * 4 values (number of filters, num input feature maps, filter height,filter width) @param strides: list of size 2, stride values for each hidden layer @param n_hidden: int, number of neurons in the all-to-all connected hidden layer @param n_out: int, number od nudes in output layer ''' #create theano variables corresponding to input_batch (x) and output of the network (y) x = T.ftensor4('x') y = T.fmatrix('y') #first hidden layer is convolutional: self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0]) #second convolutional hidden layer: the size of input depends on the size of output from first layer #it is defined as (num_batches, num_input_feature_maps, height_of_input_maps, width_of_input_maps) second_conv_input_shape = [ input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size ] self.layer_hidden_conv2 = ConvolutionalLayer( self.layer_hidden_conv1.output, filter_shapes[1], image_shape=second_conv_input_shape, stride=2) # Drops use of strides #output from convolutional layer is 4D, but normal hidden layer expects 2D. Because of all to all connections # 3rd hidden layer does not care from which feature map or from which position the input comes from flattened_input = self.layer_hidden_conv2.output.flatten(2) #create third hidden layer self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden) #create output layer self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out) #define the ensemble of parameters of the whole network self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \ + self.layer_hidden3.params + self.layer_output.params #discount factor self.gamma = 0.95 #: define regularization terms, for some reason we only take in count the weights, not biases) # linear regularization term, useful for having many weights zero self.l1 = abs(self.layer_hidden_conv1.W).sum() \ + abs(self.layer_hidden_conv2.W).sum() \ + abs(self.layer_hidden3.W).sum() \ + abs(self.layer_output.W).sum() #: square regularization term, useful for forcing small weights self.l2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \ + (self.layer_hidden_conv2.W ** 2).sum() \ + (self.layer_hidden3.W ** 2).sum() \ + (self.layer_output.W ** 2).sum() #: define the cost function self.cost = 0.0 * self.l1 + 0.0 * self.l2_sqr + self.layer_output.errors( y) self.cost_function = theano.function([x, y], [self.cost]) #: define gradient calculation self.grads = T.grad(self.cost, self.params) #: Define how much we need to change the parameter values self.learning_rate = T.scalar('lr') self.updates = [] for param_i, gparam_i in zip(self.params, self.grads): self.updates.append( (param_i, param_i - self.learning_rate * gparam_i)) self.x = x self.y = y #: we need another set of theano variables (other than x and y) to use in train and predict functions temp_x = T.ftensor4('temp_x') temp_y = T.fmatrix('temp_y') #: define the training operation as applying the updates calculated given temp_x and temp_y self.train_model = theano.function(inputs=[ temp_x, temp_y, theano.Param(self.learning_rate, default=0.00001) ], outputs=[self.cost], updates=self.updates, givens={ x: temp_x, y: temp_y }, name='train_model') self.cost_clone = theano.clone(self.cost, replace=self.updates) self.line_function = theano.function([x, y, self.learning_rate], [self.cost_clone]) self.predict_rewards = theano.function( inputs=[temp_x], outputs=[self.layer_output.output], givens={x: temp_x}, name='predict_rewards') self.predict_rewards_and_cost = theano.function( inputs=[temp_x, temp_y], outputs=[self.layer_output.output, self.cost], givens={ x: temp_x, y: temp_y }, name='predict_rewards_and_cost') actual_learning_rate = 1e-5 learning_rates = [] def optimal_learning_rate(self, prestates, new_estimated_Q, lr): objective = lambda lr: self.line_function(np.array( prestates), new_estimated_Q, float(lr))[0] res = scipy.optimize.minimize(objective, 0, method='Nelder-Mead', options={'xtol': 1e-1}) print 'optimization result' print res self.learning_rates.append(max(1e-6, float(res.x))) def train(self, minibatch): """ Train function that transforms (state,action,reward,state) into (input, expected_output) for neural net and trains the network @param minibatch: array of dictionaries, each dictionary contains one transition (prestate,action,reward,poststate) """ prestates = [t['prestate'] for t in minibatch] initial_estimated_Q = self.predict_rewards(prestates)[0] new_estimated_Q = initial_estimated_Q.copy() poststates = [t['poststate'] for t in minibatch] post_eQ = [ self.predict_rewards([s])[0] if s is not None else None for s in poststates ] actions = [t['action'] for t in minibatch] game_end_ps = [t['game_end'] for t in minibatch] rewards = np.array([t['reward'] for t in minibatch]) for row, (peQ, action, reward, game_end) in enumerate( zip(post_eQ, actions, rewards, game_end_ps)): new_estimated_Q[row, action] = reward + (0 if game_end else self.gamma * np.max(peQ)) initial_cost = self.cost_function(prestates, new_estimated_Q) optimal_learning_rate = lambda: self.optimal_learning_rate( prestates, new_estimated_Q, self.learning_rates[-1] if self.learning_rates else self.actual_learning_rate) if (len(self.learning_rates) % 50) == 0: print 'computing optimal learning rate' optimal_learning_rate() else: self.learning_rates.append(self.learning_rates[-1]) self.train_model(np.array(prestates), new_estimated_Q, self.learning_rates[-1]) final_cost = self.cost_function(prestates, new_estimated_Q) final_estimated_Q = self.predict_rewards(prestates)[0] print 'initial_cost', initial_cost, 'final_cost', final_cost, 'foo baz' print 'current rewards', (final_estimated_Q - final_estimated_Q.min(axis=0)).mean(axis=0) print 'current rewards absolute' for r, a, s in sorted( zip(rewards, actions, map(list, final_estimated_Q))): print r, a, s if final_cost > initial_cost: print 'overstepped; computing current optimal learning rate' optimal_learning_rate() if os.path.exists('/var/tmp/stop'): import pdb pdb.set_trace() def predict_best_action(self, state): """ Predict_best_action returns the action with the highest Q-value @param state: 4D array, input (game state) for which we want to know the best action """ predicted_values_for_actions = self.predict_rewards(state)[0][0] return np.argmax(predicted_values_for_actions)
def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out): x = T.dtensor4('x') y = T.dmatrix('y') self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0]) second_conv_input_shape = [ input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size ] self.layer_hidden_conv2 = ConvolutionalLayer( self.layer_hidden_conv1.output, filter_shapes[1], image_shape=second_conv_input_shape, stride=2) flattened_input = self.layer_hidden_conv2.output.flatten(2) self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden) self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out) self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \ + self.layer_hidden3.params + self.layer_output.params self.gamma = 0.95 self.L1 = abs(self.layer_hidden_conv1.W).sum() \ + abs(self.layer_hidden_conv2.W).sum() \ + abs(self.layer_hidden3.W).sum() \ + abs(self.layer_output.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \ + (self.layer_hidden_conv2.W ** 2).sum() \ + (self.layer_hidden3.W ** 2).sum() \ + (self.layer_output.W ** 2).sum() cost = 0.0 * self.L1 + 0.0 * self.L2_sqr + self.layer_output.errors(y) grads = T.grad(cost, self.params) # Define how much we need to change the parameter values learning_rate = 0.01 updates = [] for param_i, gparam_i in zip(self.params, grads): updates.append((param_i, param_i - learning_rate * gparam_i)) temp1 = T.dtensor4('temp1') temp2 = T.dmatrix('temp2') self.train_model = theano.function(inputs=[temp1, temp2], outputs=[cost], updates=updates, givens={ x: temp1, y: temp2 }) #self.shared_q = theano.shared(np.zeros((32,4))) #self.shared_s = theano.shared(np.zeros((32,4,84,84))) #self.train_model_shared = theano.function(inputs=[], outputs=[cost], # updates=updates, # givens={ # x: self.shared_s, # y: self.shared_q # }) self.predict_rewards = theano.function( inputs=[temp1], outputs=[self.layer_output.output], givens={x: temp1}) self.predict_rewards_and_cost = theano.function( inputs=[temp1, temp2], outputs=[self.layer_output.output, cost], givens={ x: temp1, y: temp2 })
class NeuralNet: def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out): x = T.dtensor4('x') y = T.dmatrix('y') self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0]) second_conv_input_shape = [ input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size ] self.layer_hidden_conv2 = ConvolutionalLayer( self.layer_hidden_conv1.output, filter_shapes[1], image_shape=second_conv_input_shape, stride=2) flattened_input = self.layer_hidden_conv2.output.flatten(2) self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden) self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out) self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \ + self.layer_hidden3.params + self.layer_output.params self.gamma = 0.95 self.L1 = abs(self.layer_hidden_conv1.W).sum() \ + abs(self.layer_hidden_conv2.W).sum() \ + abs(self.layer_hidden3.W).sum() \ + abs(self.layer_output.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \ + (self.layer_hidden_conv2.W ** 2).sum() \ + (self.layer_hidden3.W ** 2).sum() \ + (self.layer_output.W ** 2).sum() cost = 0.0 * self.L1 + 0.0 * self.L2_sqr + self.layer_output.errors(y) grads = T.grad(cost, self.params) # Define how much we need to change the parameter values learning_rate = 0.01 updates = [] for param_i, gparam_i in zip(self.params, grads): updates.append((param_i, param_i - learning_rate * gparam_i)) temp1 = T.dtensor4('temp1') temp2 = T.dmatrix('temp2') self.train_model = theano.function(inputs=[temp1, temp2], outputs=[cost], updates=updates, givens={ x: temp1, y: temp2 }) #self.shared_q = theano.shared(np.zeros((32,4))) #self.shared_s = theano.shared(np.zeros((32,4,84,84))) #self.train_model_shared = theano.function(inputs=[], outputs=[cost], # updates=updates, # givens={ # x: self.shared_s, # y: self.shared_q # }) self.predict_rewards = theano.function( inputs=[temp1], outputs=[self.layer_output.output], givens={x: temp1}) self.predict_rewards_and_cost = theano.function( inputs=[temp1, temp2], outputs=[self.layer_output.output, cost], givens={ x: temp1, y: temp2 }) def train(self, minibatch): states = [] expected_Qs = [] states1 = [element['prestate'] for element in minibatch] states2 = [element['poststate'] for element in minibatch] current_predicted_rewards = self.predict_rewards(states1)[0] predicted_future_rewards = self.predict_rewards(states2)[0] for i, transition in enumerate(minibatch): rewards = current_predicted_rewards[i] rewards[transition['action']] = transition[ 'reward'] + self.gamma * np.max(predicted_future_rewards[i]) states.append(transition['prestate']) expected_Qs.append(rewards) #self.shared_s = theano.shared(states) #self.shared_q = theano.shared(expected_Qs) #print "expected", expected_Qs[0] #print "expected", self.shared_q.eval()[0] #print self.predict_rewards_and_cost(self.shared_s.eval(),self.shared_q.eval())[0][0] #return self.train_model_shared() self.train_model(states, expected_Qs)
def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out): x = T.dtensor4('x') y = T.dmatrix('y') self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0]) second_conv_input_shape=[input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size] self.layer_hidden_conv2 = ConvolutionalLayer(self.layer_hidden_conv1.output, filter_shapes[1], image_shape=second_conv_input_shape, stride=2) flattened_input=self.layer_hidden_conv2.output.flatten(2) self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden) self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out) self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \ + self.layer_hidden3.params + self.layer_output.params self.gamma = 0.95 self.L1 = abs(self.layer_hidden_conv1.W).sum() \ + abs(self.layer_hidden_conv2.W).sum() \ + abs(self.layer_hidden3.W).sum() \ + abs(self.layer_output.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \ + (self.layer_hidden_conv2.W ** 2).sum() \ + (self.layer_hidden3.W ** 2).sum() \ + (self.layer_output.W ** 2).sum() cost = 0.0*self.L1 + 0.0*self.L2_sqr + self.layer_output.errors(y) grads = T.grad(cost, self.params) # Define how much we need to change the parameter values learning_rate = 0.01 updates = [] for param_i, gparam_i in zip(self.params, grads): updates.append((param_i, param_i - learning_rate * gparam_i)) temp1 = T.dtensor4('temp1') temp2 = T.dmatrix('temp2') self.train_model = theano.function(inputs=[temp1, temp2], outputs=[cost], updates=updates, givens={ x: temp1, y: temp2}) #self.shared_q = theano.shared(np.zeros((32,4))) #self.shared_s = theano.shared(np.zeros((32,4,84,84))) #self.train_model_shared = theano.function(inputs=[], outputs=[cost], # updates=updates, # givens={ # x: self.shared_s, # y: self.shared_q # }) self.predict_rewards = theano.function( inputs=[temp1], outputs=[self.layer_output.output], givens={ x: temp1 }) self.predict_rewards_and_cost = theano.function( inputs=[temp1, temp2], outputs=[self.layer_output.output, cost], givens={ x: temp1, y: temp2 })
class NeuralNet: def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out): x = T.dtensor4('x') y = T.dmatrix('y') self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0]) second_conv_input_shape=[input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size] self.layer_hidden_conv2 = ConvolutionalLayer(self.layer_hidden_conv1.output, filter_shapes[1], image_shape=second_conv_input_shape, stride=2) flattened_input=self.layer_hidden_conv2.output.flatten(2) self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden) self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out) self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \ + self.layer_hidden3.params + self.layer_output.params self.gamma = 0.95 self.L1 = abs(self.layer_hidden_conv1.W).sum() \ + abs(self.layer_hidden_conv2.W).sum() \ + abs(self.layer_hidden3.W).sum() \ + abs(self.layer_output.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \ + (self.layer_hidden_conv2.W ** 2).sum() \ + (self.layer_hidden3.W ** 2).sum() \ + (self.layer_output.W ** 2).sum() cost = 0.0*self.L1 + 0.0*self.L2_sqr + self.layer_output.errors(y) grads = T.grad(cost, self.params) # Define how much we need to change the parameter values learning_rate = 0.01 updates = [] for param_i, gparam_i in zip(self.params, grads): updates.append((param_i, param_i - learning_rate * gparam_i)) temp1 = T.dtensor4('temp1') temp2 = T.dmatrix('temp2') self.train_model = theano.function(inputs=[temp1, temp2], outputs=[cost], updates=updates, givens={ x: temp1, y: temp2}) #self.shared_q = theano.shared(np.zeros((32,4))) #self.shared_s = theano.shared(np.zeros((32,4,84,84))) #self.train_model_shared = theano.function(inputs=[], outputs=[cost], # updates=updates, # givens={ # x: self.shared_s, # y: self.shared_q # }) self.predict_rewards = theano.function( inputs=[temp1], outputs=[self.layer_output.output], givens={ x: temp1 }) self.predict_rewards_and_cost = theano.function( inputs=[temp1, temp2], outputs=[self.layer_output.output, cost], givens={ x: temp1, y: temp2 }) def train(self, minibatch): states = [] expected_Qs = [] states1 = [element['prestate'] for element in minibatch] states2 = [element['poststate'] for element in minibatch] current_predicted_rewards = self.predict_rewards(states1)[0] predicted_future_rewards = self.predict_rewards(states2)[0] for i, transition in enumerate(minibatch): rewards = current_predicted_rewards[i] rewards[transition['action']] = transition['reward'] + self.gamma*np.max(predicted_future_rewards[i]) states.append(transition['prestate']) expected_Qs.append(rewards) #self.shared_s = theano.shared(states) #self.shared_q = theano.shared(expected_Qs) #print "expected", expected_Qs[0] #print "expected", self.shared_q.eval()[0] #print self.predict_rewards_and_cost(self.shared_s.eval(),self.shared_q.eval())[0][0] #return self.train_model_shared() self.train_model(states, expected_Qs)
def create_output_layer(self): output_layer = OutputLayer.OutputLayer(self.class_num) return output_layer
class NeuralNet: def __init__(self, input_shape, filter_shapes, strides, n_hidden, n_out): ''' Initialize a NeuralNet @param input_shape: tuple or list of length 4 , (batch size, num input feature maps, image height, image width) @param filter_shapes: list of 2 (for each conv layer) * 4 values (number of filters, num input feature maps, filter height,filter width) @param strides: list of size 2, stride values for each hidden layer @param n_hidden: int, number of neurons in the all-to-all connected hidden layer @param n_out: int, number od nudes in output layer ''' #create theano variables corresponding to input_batch (x) and output of the network (y) x = T.ftensor4('x') y = T.fmatrix('y') #first hidden layer is convolutional: self.layer_hidden_conv1 = ConvolutionalLayer(x, filter_shapes[0], input_shape, strides[0]) #second convolutional hidden layer: the size of input depends on the size of output from first layer #it is defined as (num_batches, num_input_feature_maps, height_of_input_maps, width_of_input_maps) second_conv_input_shape = [ input_shape[0], filter_shapes[0][0], self.layer_hidden_conv1.feature_map_size, self.layer_hidden_conv1.feature_map_size ] self.layer_hidden_conv2 = ConvolutionalLayer( self.layer_hidden_conv1.output, filter_shapes[1], image_shape=second_conv_input_shape, stride=2) #output from convolutional layer is 4D, but normal hidden layer expects 2D. Because of all to all connections # 3rd hidden layer does not care from which feature map or from which position the input comes from flattened_input = self.layer_hidden_conv2.output.flatten(2) #create third hidden layer self.layer_hidden3 = HiddenLayer(flattened_input, self.layer_hidden_conv2.fan_out, n_hidden) #create output layer self.layer_output = OutputLayer(self.layer_hidden3.output, n_hidden, n_out) #define the ensemble of parameters of the whole network self.params = self.layer_hidden_conv1.params + self.layer_hidden_conv2.params \ + self.layer_hidden3.params + self.layer_output.params #discount factor self.gamma = 0.95 #: define regularization terms, for some reason we only take in count the weights, not biases) # linear regularization term, useful for having many weights zero self.l1 = abs(self.layer_hidden_conv1.W).sum() \ + abs(self.layer_hidden_conv2.W).sum() \ + abs(self.layer_hidden3.W).sum() \ + abs(self.layer_output.W).sum() #: square regularization term, useful for forcing small weights self.l2_sqr = (self.layer_hidden_conv1.W ** 2).sum() \ + (self.layer_hidden_conv2.W ** 2).sum() \ + (self.layer_hidden3.W ** 2).sum() \ + (self.layer_output.W ** 2).sum() #: define the cost function cost = 0.0 * self.l1 + 0.0 * self.l2_sqr + self.layer_output.errors(y) #: define gradient calculation grads = T.grad(cost, self.params) #: Define how much we need to change the parameter values learning_rate = 0.0001 updates = [] for param_i, gparam_i in zip(self.params, grads): updates.append((param_i, param_i - learning_rate * gparam_i)) #: we need another set of theano variables (other than x and y) to use in train and predict functions temp_x = T.ftensor4('temp_x') temp_y = T.fmatrix('temp_y') #: define the training operation as applying the updates calculated given temp_x and temp_y self.train_model = theano.function(inputs=[temp_x, temp_y], outputs=[cost], updates=updates, givens={ x: temp_x, y: temp_y }) self.predict_rewards = theano.function( inputs=[temp_x], outputs=[self.layer_output.output], givens={x: temp_x}) self.predict_rewards_and_cost = theano.function( inputs=[temp_x, temp_y], outputs=[self.layer_output.output, cost], givens={ x: temp_x, y: temp_y }) @profile def train(self, minibatch): """ Train function that transforms (state,action,reward,state) into (input, expected_output) for neural net and trains the network @param minibatch: array of dictionaries, each dictionary contains one transition (prestate,action,reward,poststate) """ #: we have a new, better estimation for the Q-val of the action we chose, it is the sum of the reward # received on transition and the maximum of future rewards. Q-s for other actions remain the same. for i, transition in enumerate(minibatch): estimated_Q = self.predict_rewards([transition['prestate']])[0][0] estimated_Q[transition['action']] = transition['reward'] + self.gamma \ * np.max(self.predict_rewards([transition['prestate']])) #: knowing what estimated_Q looks like, we can train the model self.train_model([transition['prestate']], [estimated_Q]) @profile def predict_best_action(self, state): """ Predict_best_action returns the action with the highest Q-value @param state: 4D array, input (game state) for which we want to know the best action """ predicted_values_for_actions = self.predict_rewards(state)[0][0] return np.argmax(predicted_values_for_actions)