def test_multi_optimizer(backend_default): opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) opt = MultiOptimizer({ 'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1 }) map_list = opt._map_optimizers(layer_list) assert map_list[opt_adam][0].__class__.__name__ == 'Convolution' assert map_list[opt_ada][0].__class__.__name__ == 'Bias' assert map_list[opt_rms][0].__class__.__name__ == 'Linear' assert map_list[opt_gdm][0].__class__.__name__ == 'Activation' assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def __init__(self, num_actions, batch_size=32, discount_rate=0.99, history_length=4, cols=64, rows=64, clip_error=1, min_reward=-1, max_reward=1, batch_norm=False): self.num_actions = num_actions self.batch_size = batch_size self.discount_rate = discount_rate self.history_length = history_length self.board_dim = (cols, rows) self.clip_error = clip_error self.min_reward = min_reward self.max_reward = max_reward self.batch_norm = batch_norm self.be = gen_backend(backend='gpu', batch_size=self.batch_size, datatype=np.dtype('float32').type) self.input_shape = (self.history_length, ) + self.board_dim + ( self.batch_size, ) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # hack from simple_dqn "needed for convolutional networks" self.targets = self.be.empty((self.num_actions, self.batch_size)) layers = self._createLayers(self.num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) # for l in self.model.layers.layers: # l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], cost=self.cost) self.optimizer = RMSProp(learning_rate=0.002, decay_rate=0.95, stochastic_round=True) self.train_iterations = 0 self.target_model = Model(layers=self._createLayers(num_actions)) # for l in self.target_model.layers.layers: # l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.callback = None
def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error self.min_reward = args.min_reward self.max_reward = args.max_reward self.batch_norm = args.batch_norm # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self._createLayers(num_actions) self.model = Model(layers = layers) self.cost = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.train_iterations = 0 if args.target_steps: self.target_model = Model(layers = self._createLayers(num_actions)) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model self.callback = None
def test_multi_optimizer(backend_default_mkl): """ A test for MultiOptimizer. """ opt_gdm = GradientDescentMomentum( learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) for l in layer_list: l.configure(in_obj=(16, 28, 28)) l.allocate() # separate layer_list into two, the last two recurrent layers and the rest layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:] opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Convolution_bias': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1}) layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)] layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)] opt.optimize(layers_to_optimize1, 0) assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias' assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear' opt.optimize(layers_to_optimize2, 0) assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def __init__(self, args, max_action_no, batch_dimension): self.args = args self.train_batch_size = args.train_batch_size self.discount_factor = args.discount_factor self.use_gpu_replay_mem = args.use_gpu_replay_mem self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size) self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0]) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((max_action_no, self.train_batch_size)) if self.use_gpu_replay_mem: self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8) self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8) else: self.history_buffer = np.zeros(batch_dimension, dtype=np.float32) self.train_net = Model(self.create_layers(max_action_no)) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.train_net.layers.layers: l.parallelism = 'Disabled' self.train_net.initialize(self.input_shape[:-1], self.cost) self.target_net = Model(self.create_layers(max_action_no)) # Bug fix for l in self.target_net.layers.layers: l.parallelism = 'Disabled' self.target_net.initialize(self.input_shape[:-1]) if self.args.optimizer == 'Adam': # Adam self.optimizer = Adam(beta_1=args.rms_decay, beta_2=args.rms_decay, learning_rate=args.learning_rate) else: # Neon RMSProp self.optimizer = RMSProp(decay_rate=args.rms_decay, learning_rate=args.learning_rate) self.max_action_no = max_action_no self.running = True
def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error # create Neon backend self.be = gen_backend(backend=args.backend, batch_size=args.batch_size, rng_seed=args.random_seed, device_id=args.device_id, default_dtype=np.dtype(args.datatype).type, stochastic_round=args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length, ) + self.screen_dim + ( self.batch_size, ) self.tensor = self.be.empty(self.input_shape) self.tensor.lshape = self.input_shape # needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self.createLayers(num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) self.model.initialize(self.tensor.shape[:-1], self.cost) self.optimizer = RMSProp(learning_rate=args.learning_rate, decay_rate=args.rmsprop_decay_rate, stochastic_round=args.stochastic_round) # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers=self.createLayers(num_actions)) self.target_model.initialize(self.tensor.shape[:-1]) self.save_weights_path = args.save_weights_path else: self.target_model = self.model self.callback = None
def test_rmsprop(backend): rms = RMSProp() param = np.random.rand(200, 128) param2 = copy.deepcopy(param) grad = 0.01 * np.random.rand(200, 128) states = [0.01 * np.random.rand(200, 128)] state = states[0] decay = rms.decay_rate denom = np.sqrt(decay * state + np.square(grad) * (1.0 - decay) + rms.epsilon) + rms.epsilon param2[:] -= grad * rms.learning_rate / denom param_list = [((wrap(param), wrap(grad)), [wrap(states[0])])] compare_tensors(rms, param_list, param2, tol=1e-7)
def __init__(self, state_size, num_steers, num_speeds, args): # remember parameters self.state_size = state_size self.num_steers = num_steers self.num_speeds = num_speeds self.num_actions = num_steers + num_speeds self.num_layers = args.hidden_layers self.hidden_nodes = args.hidden_nodes self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.clip_error = args.clip_error # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.state_size, self.batch_size) self.input = self.be.empty(self.input_shape) self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model self.model = Model(layers = self._createLayers()) self.cost = GeneralizedCost(costfunc = SumSquared()) self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers = self._createLayers()) self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model
def test_rmsprop_wclip(backend_default): wclip = 0.5 rms = RMSProp(param_clip_value=wclip) param = np.random.rand(200, 128) param2 = copy.deepcopy(param) grad = 0.01 * np.random.rand(200, 128) grad2 = grad / 128. states = [0.01 * np.random.rand(200, 128)] state = states[0] decay = rms.decay_rate denom = np.sqrt(decay * state + np.square(grad2) * (1.0 - decay) + rms.epsilon) + rms.epsilon param2[:] -= grad2 * float(rms.learning_rate) / denom np.clip(param2, -wclip, wclip, param2) param_list = [((wrap(param), wrap(grad)), [wrap(states[0])])] compare_tensors(rms, param_list, param2, tol=1e-7)
def _set_optimizer(self): """ Initializes the selected optimization algorithm. """ _logger.debug("Optimizer = %s" % str(self.args.optimizer)) if self.args.optimizer == 'rmsprop': self.optimizer = RMSProp( learning_rate = self.args.learning_rate, decay_rate = self.args.decay_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adam': self.optimizer = Adam( learning_rate = self.args.learning_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adadelta': self.optimizer = Adadelta( decay = self.args.decay_rate, stochastic_round = self.args.stochastic_round) else: assert false, "Unknown optimizer"
def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, default_dtype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,) self.tensor = self.be.empty(self.input_shape) self.tensor.lshape = self.input_shape # needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self.createLayers(num_actions) self.model = Model(layers = layers) self.cost = GeneralizedCost(costfunc = SumSquared()) self.model.initialize(self.tensor.shape[:-1], self.cost) self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.rmsprop_decay_rate, stochastic_round = args.stochastic_round) # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers = self.createLayers(num_actions)) self.target_model.initialize(self.tensor.shape[:-1]) self.save_weights_path = args.save_weights_path else: self.target_model = self.model self.callback = None
class DeepQNetwork: def __init__(self, num_actions, batch_size=32, discount_rate=0.99, history_length=4, cols=64, rows=64, clip_error=1, min_reward=-1, max_reward=1, batch_norm=False): self.num_actions = num_actions self.batch_size = batch_size self.discount_rate = discount_rate self.history_length = history_length self.board_dim = (cols, rows) self.clip_error = clip_error self.min_reward = min_reward self.max_reward = max_reward self.batch_norm = batch_norm self.be = gen_backend(backend='gpu', batch_size=self.batch_size, datatype=np.dtype('float32').type) self.input_shape = (self.history_length, ) + self.board_dim + ( self.batch_size, ) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # hack from simple_dqn "needed for convolutional networks" self.targets = self.be.empty((self.num_actions, self.batch_size)) layers = self._createLayers(self.num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) # for l in self.model.layers.layers: # l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], cost=self.cost) self.optimizer = RMSProp(learning_rate=0.002, decay_rate=0.95, stochastic_round=True) self.train_iterations = 0 self.target_model = Model(layers=self._createLayers(num_actions)) # for l in self.target_model.layers.layers: # l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.callback = None def _createLayers(self, num_actions): init_xavier_conv = Xavier(local=True) init_xavier_affine = Xavier(local=False) layers = [] layers.append( Conv((8, 8, 32), strides=4, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Conv((4, 4, 64), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Conv((2, 2, 128), strides=1, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Affine(nout=256, init=init_xavier_affine, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append(Affine(nout=num_actions, init=init_xavier_affine)) return layers def _setInput(self, states): states = np.transpose(states, axes=(1, 2, 3, 0)) self.input.set(states.copy()) self.be.add(self.input, 1, self.input) self.be.divide(self.input, 2, self.input) def update_target_network(self): pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) def train(self, minibatch, epoch): prestates, actions, rewards, poststates, terminals = minibatch self._setInput(poststates) postq = self.target_model.fprop(self.input, inference=True) assert postq.shape == (self.num_actions, self.batch_size) maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) self._setInput(prestates) preq = self.model.fprop(self.input, inference=False) assert preq.shape == (self.num_actions, self.batch_size) targets = preq.asnumpyarray().copy() rewards = np.clip(rewards, -1, 1) for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float( rewards[i]) + self.discount_rate * maxpostq[0, i] self.targets.set(targets) deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1, 1) if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas) self.model.bprop(deltas) self.optimizer.optimize(self.model.layers_to_optimize, epoch) self.train_iterations += 1 self.callback.on_train(cost[0, 0]) def predict(self, states): assert states.shape == (( self.batch_size, self.history_length, ) + self.board_dim) self._setInput(states) qvalues = self.model.fprop(self.input, inference=True) assert qvalues.shape == (self.num_actions, self.batch_size) return qvalues.T.asnumpyarray() def load_weights(self, load_path): self.model.load_params(load_path) def save_weights(self, save_path): self.model.save_params(save_path)
class DeepQNetwork: def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error # create Neon backend self.be = gen_backend(backend=args.backend, batch_size=args.batch_size, rng_seed=args.random_seed, device_id=args.device_id, default_dtype=np.dtype(args.datatype).type, stochastic_round=args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length, ) + self.screen_dim + ( self.batch_size, ) self.tensor = self.be.empty(self.input_shape) self.tensor.lshape = self.input_shape # needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self.createLayers(num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) self.model.initialize(self.tensor.shape[:-1], self.cost) self.optimizer = RMSProp(learning_rate=args.learning_rate, decay_rate=args.rmsprop_decay_rate, stochastic_round=args.stochastic_round) # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers=self.createLayers(num_actions)) self.target_model.initialize(self.tensor.shape[:-1]) self.save_weights_path = args.save_weights_path else: self.target_model = self.model self.callback = None def createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin())) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin())) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin())) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=512, init=init_norm, activation=Rectlin())) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_norm)) return layers def setTensor(self, states): # change order of axes to match what Neon expects states = np.transpose(states, axes=(1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.tensor.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.tensor, 255, self.tensor) def train(self, minibatch, epoch): # expand components of minibatch prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[ 0] == poststates.shape[0] == terminals.shape[0] if self.target_steps and self.train_iterations % self.target_steps == 0: # HACK: push something through network, so that weights exist self.model.fprop(self.tensor) # HACK: serialize network to disk and read it back to clone filename = os.path.join(self.save_weights_path, "target_network.pkl") save_obj(self.model.serialize(keep_states=False), filename) self.target_model.load_weights(filename) # feed-forward pass for poststates to get Q-values self.setTensor(poststates) postq = self.target_model.fprop(self.tensor, inference=True) assert postq.shape == (self.num_actions, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # feed-forward pass for prestates self.setTensor(prestates) preq = self.model.fprop(self.tensor, inference=False) assert preq.shape == (self.num_actions, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray() # update Q-value targets for actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float( rewards[i]) + self.discount_rate * maxpostq[0, i] # copy targets to GPU memory self.targets.set(targets) # calculate errors deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) #assert np.count_nonzero(deltas.asnumpyarray()) == 32 # calculate cost, just in case cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1, 1) # clip errors if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas) # perform back-propagation of gradients self.model.bprop(deltas) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.train_iterations += 1 # calculate statistics if self.callback: self.callback.on_train(cost.asnumpyarray()[0, 0]) def predict(self, states): # minibatch is full size, because Neon doesn't let change the minibatch size assert states.shape == (( self.batch_size, self.history_length, ) + self.screen_dim) # calculate Q-values for the states self.setTensor(states) qvalues = self.model.fprop(self.tensor, inference=True) assert qvalues.shape == (self.num_actions, self.batch_size) if logger.isEnabledFor(logging.DEBUG): logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:, 0])) # find the action with highest q-value actions = self.be.argmax(qvalues, axis=0) assert actions.shape == (1, self.batch_size) # take only the first result return actions.asnumpyarray()[0, 0] def getMeanQ(self, states): assert states.shape == (( self.batch_size, self.history_length, ) + self.screen_dim) # calculate Q-values for the states self.setTensor(states) qvalues = self.model.fprop(self.tensor, inference=True) assert qvalues.shape == (self.num_actions, self.batch_size) # take maximum Q-value for each state actions = self.be.max(qvalues, axis=0) assert actions.astensor().shape == (1, self.batch_size) # calculate mean Q-value of all states meanq = self.be.mean(actions, axis=1) assert meanq.astensor().shape == (1, 1) # return the mean return meanq.asnumpyarray()[0, 0] def load_weights(self, load_path): self.model.load_weights(load_path) def save_weights(self, save_path): save_obj(self.model.serialize(keep_states=True), save_path)
# weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model layers = [] layers.append(Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin())) layers.append(Affine(nout=10, init=init_norm, bias=Constant(0), activation=Logistic(shortcut=True), name='special_linear')) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp = Model(layers=layers) # fit and validate optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) optimizer_two = RMSProp() # all bias layers and the last linear layer will use # optimizer_two. all other layers will use optimizer_one. opt = MultiOptimizer({'default': optimizer_one, 'Bias': optimizer_two, 'special_linear': optimizer_two}) # configure callbacks callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args) mlp.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] else: layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] model = Model(layers=layers) cost = GeneralizedCost(MeanSquared()) optimizer = RMSProp(stochastic_round=args.rounding) callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # fit model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # =======visualize how the model does on validation set============== # run the trained model on train and valid dataset and see how the outputs match train_output = model.get_outputs(train_set).reshape( -1, train_set.nfeatures) valid_output = model.get_outputs(valid_set).reshape(
MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) # configure callbacks checkpoint_model_path = "~/image_caption2.pickle" if args.callback_args['save_path'] is None: args.callback_args['save_path'] = checkpoint_model_path if args.callback_args['serialize'] is None: args.callback_args['serialize'] = 1 model = Model(layers=layers) callbacks = Callbacks(model, train_set, **args.callback_args) opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, gradient_clip_value=1) # train model model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # load model (if exited) and evaluate bleu score on test set model.load_params(checkpoint_model_path) test_set = ImageCaptionTest(path=data_path) sents, targets = test_set.predict(model) test_set.bleu_score(sents, targets)
cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) checkpoint_model_path = "~/image_caption2.pickle" checkpoint_schedule = range(num_epochs) model = Model(layers=layers) callbacks = Callbacks(model, train_set, output_file=args.output_file, progress_bar=args.progress_bar) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path) opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, clip_gradients=True, gradient_limit=1.0) # train model model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # load model (if exited) and evaluate bleu score on test set model.load_weights(checkpoint_model_path) test_set = ImageCaptionTest(path=data_path) sents, targets = test_set.predict(model) test_set.bleu_score(sents, targets)
# setup model layers layers = [ Conv((5, 5, 16), init=init_norm, activation=Rectlin()), Pooling(2), Conv((5, 5, 32), init=init_norm, activation=Rectlin()), Pooling(2), Conv((3, 3, 32), init=init_norm, activation=Rectlin()), Pooling(2), Affine(nout=100, init=init_norm, activation=Rectlin()), Linear(nout=4, init=init_norm) ] model = Model(layers=layers) # cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost = GeneralizedCost(costfunc=SumSquared()) # fit and validate optimizer = RMSProp() # configure callbacks callbacks = Callbacks(model, eval_set=eval_set, eval_freq=1) model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=10, callbacks=callbacks) y_test = model.get_outputs(test_set)
class ModelRunnerNeon(): def __init__(self, args, max_action_no, batch_dimension): self.args = args self.train_batch_size = args.train_batch_size self.discount_factor = args.discount_factor self.use_gpu_replay_mem = args.use_gpu_replay_mem self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size) self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0]) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((max_action_no, self.train_batch_size)) if self.use_gpu_replay_mem: self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8) self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8) else: self.history_buffer = np.zeros(batch_dimension, dtype=np.float32) self.train_net = Model(self.create_layers(max_action_no)) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.train_net.layers.layers: l.parallelism = 'Disabled' self.train_net.initialize(self.input_shape[:-1], self.cost) self.target_net = Model(self.create_layers(max_action_no)) # Bug fix for l in self.target_net.layers.layers: l.parallelism = 'Disabled' self.target_net.initialize(self.input_shape[:-1]) if self.args.optimizer == 'Adam': # Adam self.optimizer = Adam(beta_1=args.rms_decay, beta_2=args.rms_decay, learning_rate=args.learning_rate) else: # Neon RMSProp self.optimizer = RMSProp(decay_rate=args.rms_decay, learning_rate=args.learning_rate) self.max_action_no = max_action_no self.running = True def get_initializer(self, input_size): dnnInit = self.args.dnn_initializer if dnnInit == 'xavier': initializer = Xavier() elif dnnInit == 'fan_in': std_dev = 1.0 / math.sqrt(input_size) initializer = Uniform(low=-std_dev, high=std_dev) else: initializer = Gaussian(0, 0.01) return initializer def create_layers(self, max_action_no): layers = [] initializer = self.get_initializer(input_size=4 * 8 * 8) layers.append( Conv(fshape=(8, 8, 32), strides=4, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=32 * 4 * 4) layers.append( Conv(fshape=(4, 4, 64), strides=2, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=64 * 3 * 3) layers.append( Conv(fshape=(3, 3, 64), strides=1, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=7 * 7 * 64) layers.append( Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=512) layers.append( Affine(nout=max_action_no, init=initializer, bias=initializer)) return layers def clip_reward(self, reward): if reward > self.args.clip_reward_high: return self.args.clip_reward_high elif reward < self.args.clip_reward_low: return self.args.clip_reward_low else: return reward def set_input(self, data): if self.use_gpu_replay_mem: self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0)) self.input[:] = self.input_uint8 / 255 else: self.input.set(data.transpose(1, 2, 3, 0).copy()) self.be.divide(self.input, 255, self.input) def predict(self, history_buffer): self.set_input(history_buffer) output = self.train_net.fprop(self.input, inference=True) return output.T.asnumpyarray()[0] def print_weights(self): pass def train(self, minibatch, replay_memory, learning_rate, debug): if self.args.prioritized_replay == True: prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch else: prestates, actions, rewards, poststates, terminals = minibatch # Get Q*(s, a) with targetNet self.set_input(poststates) post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray() if self.args.double_dqn == True: # Get Q*(s, a) with trainNet post_qvalue2 = self.train_net.fprop( self.input, inference=True).T.asnumpyarray() # Get Q(s, a) with trainNet self.set_input(prestates) pre_qvalue = self.train_net.fprop(self.input, inference=False) label = pre_qvalue.asnumpyarray().copy() for i in range(0, self.train_batch_size): if self.args.clip_reward: reward = self.clip_reward(rewards[i]) else: reward = rewards[i] if terminals[i]: label[actions[i], i] = reward else: if self.args.double_dqn == True: max_index = np.argmax(post_qvalue2[i]) label[actions[i], i] = reward + self.discount_factor * post_qvalue[i][ max_index] else: label[actions[i], i] = reward + self.discount_factor * np.max( post_qvalue[i]) # copy targets to GPU memory self.targets.set(label) delta = self.cost.get_errors(pre_qvalue, self.targets) if self.args.prioritized_replay == True: delta_value = delta.asnumpyarray() for i in range(self.train_batch_size): if debug: print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % ( i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i]) replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i])) delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i] delta.set(delta_value.copy()) if self.args.clip_loss: self.be.clip(delta, -1.0, 1.0, out=delta) self.train_net.bprop(delta) self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0) def update_model(self): # have to serialize also states for batch normalization to work pdict = self.train_net.get_description(get_weights=True, keep_states=True) self.target_net.deserialize(pdict, load_states=True) #print ('Updated target model') def finish_train(self): self.running = False def load(self, file_name): self.train_net.load_params(file_name) self.update_model() def save(self, file_name): self.train_net.save_params(file_name)
# model initialization if rlayer_type == 'lstm': rlayer = LSTM(hidden_size, init, Logistic(), Tanh()) elif rlayer_type == 'gru': rlayer = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) else: raise NotImplementedError('%s layer not implemented' % rlayer_type) layers = [ rlayer, Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) optimizer = RMSProp(clip_gradients=clip_gradients, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, train_set, output_file=args.output_file, valid_set=valid_set, valid_freq=args.validation_freq, progress_bar=args.progress_bar) # train model model.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
class DeepQNetwork: def __init__(self, num_actions, args): # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, default_dtype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # create model layers = self.createLayers(num_actions) self.model = Model(layers = layers) self.cost = GeneralizedCost(costfunc = SumSquared()) self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.rmsprop_decay_rate, stochastic_round = args.stochastic_round) # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers = self.createLayers(num_actions)) self.save_weights_path = args.save_weights_path else: self.target_model = self.model # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error # prepare tensors once and reuse them self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,) self.tensor = self.be.empty(self.input_shape) self.tensor.lshape = self.input_shape # needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) self.callback = None def createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin())) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin())) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin())) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=512, init=init_norm, activation=Rectlin())) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout = num_actions, init = init_norm)) return layers def setTensor(self, states): # change order of axes to match what Neon expects states = np.transpose(states, axes = (1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.tensor.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.tensor, 255, self.tensor) def train(self, minibatch, epoch): # expand components of minibatch prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] if self.target_steps and self.train_iterations % self.target_steps == 0: # HACK: push something through network, so that weights exist self.model.fprop(self.tensor) # HACK: serialize network to disk and read it back to clone filename = os.path.join(self.save_weights_path, "target_network.pkl") save_obj(self.model.serialize(keep_states = False), filename) self.target_model.load_weights(filename) # feed-forward pass for poststates to get Q-values self.setTensor(poststates) postq = self.target_model.fprop(self.tensor, inference = True) assert postq.shape == (self.num_actions, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # feed-forward pass for prestates self.setTensor(prestates) preq = self.model.fprop(self.tensor, inference = False) assert preq.shape == (self.num_actions, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray() # update Q-value targets for actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i] # copy targets to GPU memory self.targets.set(targets) # calculate errors deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) #assert np.count_nonzero(deltas.asnumpyarray()) == 32 # calculate cost, just in case cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1,1) # clip errors if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas) # perform back-propagation of gradients self.model.bprop(deltas) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.train_iterations += 1 # calculate statistics if self.callback: self.callback.on_train(cost.asnumpyarray()[0,0]) def predict(self, states): # minibatch is full size, because Neon doesn't let change the minibatch size assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim) # calculate Q-values for the states self.setTensor(states) qvalues = self.model.fprop(self.tensor, inference = True) assert qvalues.shape == (self.num_actions, self.batch_size) if logger.isEnabledFor(logging.DEBUG): logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0])) # find the action with highest q-value actions = self.be.argmax(qvalues, axis = 0) assert actions.shape == (1, self.batch_size) # take only the first result return actions.asnumpyarray()[0,0] def getMeanQ(self, states): assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim) # calculate Q-values for the states self.setTensor(states) qvalues = self.model.fprop(self.tensor, inference = True) assert qvalues.shape == (self.num_actions, self.batch_size) # take maximum Q-value for each state actions = self.be.max(qvalues, axis = 0) assert actions.astensor().shape == (1, self.batch_size) # calculate mean Q-value of all states meanq = self.be.mean(actions, axis = 1) assert meanq.astensor().shape == (1, 1) # return the mean return meanq.asnumpyarray()[0,0] def load_weights(self, load_path): self.model.load_weights(load_path) def save_weights(self, save_path): save_obj(self.model.serialize(keep_states = True), save_path)
from neon.optimizers import RMSProp from neon.transforms import Misclassification from neon.callbacks.callbacks import Callbacks from network import create_network from data import make_train_loader, make_val_loader eval_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'whale_eval.cfg') config_files = [eval_config] if os.path.exists(eval_config) else [] parser = NeonArgparser(__doc__, default_config_files=config_files) args = parser.parse_args() model, cost_obj = create_network() assert 'train' in args.manifest, "Missing train manifest" assert 'val' in args.manifest, "Missing val manifest" train = make_train_loader(args.manifest['train'], args.manifest_root, model.be, noise_file=args.manifest.get('noise')) neon_logger.display('Performing train and test in validation mode') val = make_val_loader(args.manifest['val'], args.manifest_root, model.be) metric = Misclassification() model.fit(dataset=train, cost=cost_obj, optimizer=RMSProp(learning_rate=1e-4), num_epochs=args.epochs, callbacks=Callbacks(model, eval_set=val, metric=metric, **args.callback_args)) neon_logger.display('Misclassification error = %.1f%%' % (model.eval(val, metric=metric) * 100))
train_set = Text(time_steps, train_path) valid_set = Text(time_steps, valid_path, vocab=train_set.vocab) # weight initialization init = Uniform(low=-0.08, high=0.08) # model initialization layers = [ LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh()), Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # fit and validate model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) def sample(prob): """ Sample index from probability distribution
init, activation=Tanh(), gate_activation=Logistic()) layers = [ rlayer1, rlayer2, Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) learning_rate_sched = Schedule(list(range(10, args.epochs)), .97) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding, schedule=learning_rate_sched) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # get predictions ypred = model.get_outputs(valid_set) shape = (valid_set.nbatches, args.batch_size, time_steps)
gen_model=args.gmodel, cost_type='wasserstein', im_size=64, n_chan=3, n_noise=100, n_gen_ftr=64, n_dis_ftr=64, depth=4, n_extra_layers=4, batch_norm=True, dis_iters=5, wgan_param_clamp=0.01, wgan_train_sched=True) # setup optimizer optimizer = RMSProp(learning_rate=5e-5, decay_rate=0.99, epsilon=1e-8) # setup data provider train = make_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, random_seed) # configure callbacks callbacks = Callbacks(model, **args.callback_args) fdir = ensure_dirs_exist( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/')) fname = os.path.splitext(os.path.basename(__file__))[0] +\ '_[' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ']' im_args = dict(filename=os.path.join(fdir, fname), hw=64, num_samples=args.batch_size, nchan=3,
class ModelRunnerNeon(): def __init__(self, args, max_action_no, batch_dimension): self.args = args self.train_batch_size = args.train_batch_size self.discount_factor = args.discount_factor self.use_gpu_replay_mem = args.use_gpu_replay_mem self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size) self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0]) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((max_action_no, self.train_batch_size)) if self.use_gpu_replay_mem: self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8) self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8) else: self.history_buffer = np.zeros(batch_dimension, dtype=np.float32) self.train_net = Model(self.create_layers(max_action_no)) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.train_net.layers.layers: l.parallelism = 'Disabled' self.train_net.initialize(self.input_shape[:-1], self.cost) self.target_net = Model(self.create_layers(max_action_no)) # Bug fix for l in self.target_net.layers.layers: l.parallelism = 'Disabled' self.target_net.initialize(self.input_shape[:-1]) if self.args.optimizer == 'Adam': # Adam self.optimizer = Adam(beta_1=args.rms_decay, beta_2=args.rms_decay, learning_rate=args.learning_rate) else: # Neon RMSProp self.optimizer = RMSProp(decay_rate=args.rms_decay, learning_rate=args.learning_rate) self.max_action_no = max_action_no self.running = True def get_initializer(self, input_size): dnnInit = self.args.dnn_initializer if dnnInit == 'xavier': initializer = Xavier() elif dnnInit == 'fan_in': std_dev = 1.0 / math.sqrt(input_size) initializer = Uniform(low=-std_dev, high=std_dev) else: initializer = Gaussian(0, 0.01) return initializer def create_layers(self, max_action_no): layers = [] initializer = self.get_initializer(input_size = 4 * 8 * 8) layers.append(Conv(fshape=(8, 8, 32), strides=4, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 32 * 4 * 4) layers.append(Conv(fshape=(4, 4, 64), strides=2, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 64 * 3 * 3) layers.append(Conv(fshape=(3, 3, 64), strides=1, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 7 * 7 * 64) layers.append(Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 512) layers.append(Affine(nout=max_action_no, init=initializer, bias=initializer)) return layers def clip_reward(self, reward): if reward > self.args.clip_reward_high: return self.args.clip_reward_high elif reward < self.args.clip_reward_low: return self.args.clip_reward_low else: return reward def set_input(self, data): if self.use_gpu_replay_mem: self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0)) self.input[:] = self.input_uint8 / 255 else: self.input.set(data.transpose(1, 2, 3, 0).copy()) self.be.divide(self.input, 255, self.input) def predict(self, history_buffer): self.set_input(history_buffer) output = self.train_net.fprop(self.input, inference=True) return output.T.asnumpyarray()[0] def print_weights(self): pass def train(self, minibatch, replay_memory, learning_rate, debug): if self.args.prioritized_replay == True: prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch else: prestates, actions, rewards, poststates, terminals = minibatch # Get Q*(s, a) with targetNet self.set_input(poststates) post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray() if self.args.double_dqn == True: # Get Q*(s, a) with trainNet post_qvalue2 = self.train_net.fprop(self.input, inference=True).T.asnumpyarray() # Get Q(s, a) with trainNet self.set_input(prestates) pre_qvalue = self.train_net.fprop(self.input, inference=False) label = pre_qvalue.asnumpyarray().copy() for i in range(0, self.train_batch_size): if self.args.clip_reward: reward = self.clip_reward(rewards[i]) else: reward = rewards[i] if terminals[i]: label[actions[i], i] = reward else: if self.args.double_dqn == True: max_index = np.argmax(post_qvalue2[i]) label[actions[i], i] = reward + self.discount_factor* post_qvalue[i][max_index] else: label[actions[i], i] = reward + self.discount_factor* np.max(post_qvalue[i]) # copy targets to GPU memory self.targets.set(label) delta = self.cost.get_errors(pre_qvalue, self.targets) if self.args.prioritized_replay == True: delta_value = delta.asnumpyarray() for i in range(self.train_batch_size): if debug: print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i]) replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i])) delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i] delta.set(delta_value.copy()) if self.args.clip_loss: self.be.clip(delta, -1.0, 1.0, out = delta) self.train_net.bprop(delta) self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0) def update_model(self): # have to serialize also states for batch normalization to work pdict = self.train_net.get_description(get_weights=True, keep_states=True) self.target_net.deserialize(pdict, load_states=True) #print ('Updated target model') def finish_train(self): self.running = False def load(self, file_name): self.train_net.load_params(file_name) self.update_model() def save(self, file_name): self.train_net.save_params(file_name)