def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error self.min_reward = args.min_reward self.max_reward = args.max_reward self.batch_norm = args.batch_norm # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self._createLayers(num_actions) self.model = Model(layers = layers) self.cost = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.train_iterations = 0 if args.target_steps: self.target_model = Model(layers = self._createLayers(num_actions)) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model self.callback = None
def __init__(self, env, args, rng, name = "DQNNeon"): """ Initializes a network based on the Neon framework. Args: env (AtariEnv): The envirnoment in which the agent actuates. args (argparse.Namespace): All settings either with a default value or set via command line arguments. rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator. name (str): The name of the network object. Note: This function should always call the base class first to initialize the common values for the networks. """ _logger.info("Initializing new object of type " + str(type(self).__name__)) super(DQNNeon, self).__init__(env, args, rng, name) self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,) self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8) self.batch_norm = args.batch_norm self.be = gen_backend( backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.output_shape, self.batch_size)) # create model layers = self._create_layer() self.model = Model(layers = layers) self.cost_func = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost_func) self._set_optimizer() if not self.args.load_weights == None: self.load_weights(self.args.load_weights) # create target model if self.target_update_frequency: layers = self._create_layer() self.target_model = Model(layers) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) else: self.target_model = self.model self.callback = None _logger.debug("%s" % self)
def test_sum_squared_derivative(backend_default): outputs = np.array([0.5, 1.0, 0.0, 0.0001]).reshape((4, 1)) targets = np.array(([0.5, 0.0, 1.0, 0.2])).reshape((4, 1)) expected_result = (outputs - targets) / outputs.shape[1] compare_tensors(SumSquared(), outputs, targets, expected_result, deriv=True, tol=1e-8)
def __init__(self, rounding, callback_args, epochs): # setup weight initialization function self.init = Gaussian(loc=0.0, scale=0.01) # setup optimizer self.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9, stochastic_round=rounding) # setup cost function as CrossEntropy self.cost = GeneralizedCost(costfunc=SumSquared()) self.epochs = epochs self.model = None self.callback_args = callback_args
def __init__(self, state_size, num_steers, num_speeds, args): # remember parameters self.state_size = state_size self.num_steers = num_steers self.num_speeds = num_speeds self.num_actions = num_steers + num_speeds self.num_layers = args.hidden_layers self.hidden_nodes = args.hidden_nodes self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.clip_error = args.clip_error # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.state_size, self.batch_size) self.input = self.be.empty(self.input_shape) self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model self.model = Model(layers = self._createLayers()) self.cost = GeneralizedCost(costfunc = SumSquared()) self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers = self._createLayers()) self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model
def __init__(self, num_actions, batch_size=32, discount_rate=0.99, history_length=4, cols=64, rows=64, clip_error=1, min_reward=-1, max_reward=1, batch_norm=False): self.num_actions = num_actions self.batch_size = batch_size self.discount_rate = discount_rate self.history_length = history_length self.board_dim = (cols, rows) self.clip_error = clip_error self.min_reward = min_reward self.max_reward = max_reward self.batch_norm = batch_norm self.be = gen_backend(backend='gpu', batch_size=self.batch_size, datatype=np.dtype('float32').type) self.input_shape = (self.history_length, ) + self.board_dim + ( self.batch_size, ) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # hack from simple_dqn "needed for convolutional networks" self.targets = self.be.empty((self.num_actions, self.batch_size)) layers = self._createLayers(self.num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) # for l in self.model.layers.layers: # l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], cost=self.cost) self.optimizer = RMSProp(learning_rate=0.002, decay_rate=0.95, stochastic_round=True) self.train_iterations = 0 self.target_model = Model(layers=self._createLayers(num_actions)) # for l in self.target_model.layers.layers: # l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.callback = None
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab): """ Return regressor to map word2vec to RNN word space Function modified from: https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py """ # Gather all words from word2vec that appear in wordvecs d = defaultdict(lambda: 0) for w in w2v_vocab.keys(): d[w] = 1 shared = OrderedDict() count = 0 for w in list(orig_wordvecs.keys())[:-2]: if d[w] > 0: shared[w] = count count += 1 # Get the vectors for all words in 'shared' w2v = np.zeros((len(shared), 300), dtype='float32') sg = np.zeros((len(shared), 620), dtype='float32') for w in shared.keys(): w2v[shared[w]] = w2v_W[w2v_vocab[w]] sg[shared[w]] = orig_wordvecs[w] train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False) layers = [ Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)), Bias(init=Constant(0.0)) ] clf = Model(layers=layers) # regression model is trained using default global batch size cost = GeneralizedCost(costfunc=SumSquared()) opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0) callbacks = Callbacks(clf) clf.fit(train_set, num_epochs=20, optimizer=opt, cost=cost, callbacks=callbacks) return clf
def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error # create Neon backend self.be = gen_backend(backend=args.backend, batch_size=args.batch_size, rng_seed=args.random_seed, device_id=args.device_id, default_dtype=np.dtype(args.datatype).type, stochastic_round=args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length, ) + self.screen_dim + ( self.batch_size, ) self.tensor = self.be.empty(self.input_shape) self.tensor.lshape = self.input_shape # needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self.createLayers(num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) self.model.initialize(self.tensor.shape[:-1], self.cost) self.optimizer = RMSProp(learning_rate=args.learning_rate, decay_rate=args.rmsprop_decay_rate, stochastic_round=args.stochastic_round) # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers=self.createLayers(num_actions)) self.target_model.initialize(self.tensor.shape[:-1]) self.save_weights_path = args.save_weights_path else: self.target_model = self.model self.callback = None
def __init__(self, args, max_action_no, batch_dimension): self.args = args self.train_batch_size = args.train_batch_size self.discount_factor = args.discount_factor self.use_gpu_replay_mem = args.use_gpu_replay_mem self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size) self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0]) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((max_action_no, self.train_batch_size)) if self.use_gpu_replay_mem: self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8) self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8) else: self.history_buffer = np.zeros(batch_dimension, dtype=np.float32) self.train_net = Model(self.create_layers(max_action_no)) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.train_net.layers.layers: l.parallelism = 'Disabled' self.train_net.initialize(self.input_shape[:-1], self.cost) self.target_net = Model(self.create_layers(max_action_no)) # Bug fix for l in self.target_net.layers.layers: l.parallelism = 'Disabled' self.target_net.initialize(self.input_shape[:-1]) if self.args.optimizer == 'Adam': # Adam self.optimizer = Adam(beta_1=args.rms_decay, beta_2=args.rms_decay, learning_rate=args.learning_rate) else: # Neon RMSProp self.optimizer = RMSProp(decay_rate=args.rms_decay, learning_rate=args.learning_rate) self.max_action_no = max_action_no self.running = True
def test_sum_squared_limits(backend_default): outputs = np.array([0.5, 1.0, 0.0, 0.0001]).reshape((4, 1)) targets = np.array(([0.5, 0.0, 1.0, 0.2])).reshape((4, 1)) expected_result = np.sum((outputs - targets) ** 2, axis=0, keepdims=True) / 2. compare_tensors(SumSquared(), outputs, targets, expected_result, tol=1e-7)
# setup model layers layers = [ Conv((5, 5, 16), init=init_norm, activation=Rectlin()), Pooling(2), Conv((5, 5, 32), init=init_norm, activation=Rectlin()), Pooling(2), Conv((3, 3, 32), init=init_norm, activation=Rectlin()), Pooling(2), Affine(nout=100, init=init_norm, activation=Rectlin()), Linear(nout=4, init=init_norm) ] model = Model(layers=layers) # cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost = GeneralizedCost(costfunc=SumSquared()) # fit and validate optimizer = RMSProp() # configure callbacks callbacks = Callbacks(model, eval_set=eval_set, eval_freq=1) model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=10, callbacks=callbacks) y_test = model.get_outputs(test_set)
def test_sum_squared(backend): outputs = np.array([0.5, 0.9, 0.1, 0.0001]).reshape((4, 1)) targets = np.array([0.5, 0.99, 0.01, 0.2]).reshape((4, 1)) expected_result = np.sum( (outputs - targets)**2, axis=0, keepdims=True) / 2. compare_tensors(SumSquared(), outputs, targets, expected_result, tol=1e-8)