def test_conv_rnn(backend_default): train_shape = (1, 17, 142) be = NervanaObject.be inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz)) delta = be.array(be.rng.randn(10, be.bsz)) init_norm = Gaussian(loc=0.0, scale=0.01) bilstm = DeepBiLSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), depth=1, reset_cells=True) birnn_1 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False) birnn_2 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=2, reset_cells=True, batch_norm=False) bibnrnn = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=True) birnnsum = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False, bi_sum=True) rnn = Recurrent(128, init=init_norm, activation=Rectlin(), reset_cells=True) lstm = LSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) gru = GRU(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru] for rl in rlayers: layers = [ Conv((2, 2, 4), init=init_norm, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init_norm, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), rl, RecurrentMean(), Affine(nout=10, init=init_norm, activation=Rectlin()), ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(train_shape, cost) model.fprop(inp) model.bprop(delta)
def test_model_get_outputs(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator(X_train[:backend.bsz * 3]) init_norm = Gaussian(loc=0.0, scale=0.1) layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] mlp = Model(layers=layers) out_list = [] for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output)
def test_model_get_outputs(backend_default): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator(X_train[:backend_default.bsz * 3]) init_norm = Gaussian(loc=0.0, scale=0.1) layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output)
def test_model_get_outputs(backend_default, data): dataset = MNIST(path=data) train_set = dataset.train_iter init_norm = Gaussian(loc=0.0, scale=0.1) layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert allclose_with_out(output, ref_output[:output.shape[0], :]) # test model benchmark inference mlp.benchmark(train_set, inference=True, niterations=5)
def test_model_get_outputs(backend_default, data): (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data) train_set = ArrayIterator(X_train[:backend_default.bsz * 3]) init_norm = Gaussian(loc=0.0, scale=0.1) layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output) # test model benchmark inference mlp.benchmark(train_set, inference=True, niterations=5)
def test_model_get_outputs(backend_default, data): dataset = MNIST(path=data) train_set = dataset.train_iter init_norm = Gaussian(loc=0.0, scale=0.1) layers = [ Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output[:output.shape[0], :]) # test model benchmark inference mlp.benchmark(train_set, inference=True, niterations=5)
# parse the command line arguments parser = NeonArgparser(__doc__) parser.add_argument("hdf5") parser.add_argument("model_pkl") args = parser.parse_args() model = Model(args.model_pkl) h5s = [h5py.File(args.hdf5)] num_moves = sum(h['X'].shape[0] for h in h5s) print("Found {} HDF5 files with {} moves".format(len(h5s), num_moves)) inputs = HDF5Iterator([h['X'] for h in h5s], [h['y'] for h in h5s], ndata=(1024 * 1024)) out_predict = h5s[0].require_dataset("predictions", (num_moves, 362), dtype=np.float32) out_score = h5s[0].require_dataset("scores", (num_moves,), dtype=np.float32) out_max = h5s[0].require_dataset("best", (num_moves,), dtype=np.float32) model.initialize(inputs) for indata, actual, sl in inputs.predict(): prediction = model.fprop(indata, inference=False).get().T actual = actual.astype(int) actual_idx = actual[:, 0] * 19 + actual[:, 1] actual_idx[actual_idx < 0] = 361 out_predict[sl, :] = prediction out_score[sl] = prediction[range(prediction.shape[0]), actual_idx] out_max[sl] = prediction.max(axis=1) print (sl)
class DeepQNetwork: def __init__(self, num_actions, batch_size=32, discount_rate=0.99, history_length=4, cols=64, rows=64, clip_error=1, min_reward=-1, max_reward=1, batch_norm=False): self.num_actions = num_actions self.batch_size = batch_size self.discount_rate = discount_rate self.history_length = history_length self.board_dim = (cols, rows) self.clip_error = clip_error self.min_reward = min_reward self.max_reward = max_reward self.batch_norm = batch_norm self.be = gen_backend(backend='gpu', batch_size=self.batch_size, datatype=np.dtype('float32').type) self.input_shape = (self.history_length, ) + self.board_dim + ( self.batch_size, ) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # hack from simple_dqn "needed for convolutional networks" self.targets = self.be.empty((self.num_actions, self.batch_size)) layers = self._createLayers(self.num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) # for l in self.model.layers.layers: # l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], cost=self.cost) self.optimizer = RMSProp(learning_rate=0.002, decay_rate=0.95, stochastic_round=True) self.train_iterations = 0 self.target_model = Model(layers=self._createLayers(num_actions)) # for l in self.target_model.layers.layers: # l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.callback = None def _createLayers(self, num_actions): init_xavier_conv = Xavier(local=True) init_xavier_affine = Xavier(local=False) layers = [] layers.append( Conv((8, 8, 32), strides=4, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Conv((4, 4, 64), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Conv((2, 2, 128), strides=1, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Affine(nout=256, init=init_xavier_affine, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append(Affine(nout=num_actions, init=init_xavier_affine)) return layers def _setInput(self, states): states = np.transpose(states, axes=(1, 2, 3, 0)) self.input.set(states.copy()) self.be.add(self.input, 1, self.input) self.be.divide(self.input, 2, self.input) def update_target_network(self): pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) def train(self, minibatch, epoch): prestates, actions, rewards, poststates, terminals = minibatch self._setInput(poststates) postq = self.target_model.fprop(self.input, inference=True) assert postq.shape == (self.num_actions, self.batch_size) maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) self._setInput(prestates) preq = self.model.fprop(self.input, inference=False) assert preq.shape == (self.num_actions, self.batch_size) targets = preq.asnumpyarray().copy() rewards = np.clip(rewards, -1, 1) for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float( rewards[i]) + self.discount_rate * maxpostq[0, i] self.targets.set(targets) deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1, 1) if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas) self.model.bprop(deltas) self.optimizer.optimize(self.model.layers_to_optimize, epoch) self.train_iterations += 1 self.callback.on_train(cost[0, 0]) def predict(self, states): assert states.shape == (( self.batch_size, self.history_length, ) + self.board_dim) self._setInput(states) qvalues = self.model.fprop(self.input, inference=True) assert qvalues.shape == (self.num_actions, self.batch_size) return qvalues.T.asnumpyarray() def load_weights(self, load_path): self.model.load_params(load_path) def save_weights(self, save_path): self.model.save_params(save_path)
def test_model_serialize(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = [Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())] path2 = [Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())] layers = [MergeConcat([path1, path2]), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), BatchNorm(), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model save_obj(mlp.serialize(keep_states=True), tmp_save) # Load model mlp = Model(layers=layers) mlp.load_weights(tmp_save) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) else: assert np.allclose(p, p_e) os.remove(tmp_save)
class DeepQNetwork: def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error self.min_reward = args.min_reward self.max_reward = args.max_reward self.batch_norm = args.batch_norm # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self._createLayers(num_actions) self.model = Model(layers = layers) self.cost = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers = self._createLayers(num_actions)) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model self.callback = None def _createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init = init_norm)) return layers def _setInput(self, states): # change order of axes to match what Neon expects states = np.transpose(states, axes = (1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.input.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.input, 255, self.input) def train(self, minibatch, epoch): # expand components of minibatch prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] if self.target_steps and self.train_iterations % self.target_steps == 0: # have to serialize also states for batch normalization to work pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) # feed-forward pass for poststates to get Q-values self._setInput(poststates) postq = self.target_model.fprop(self.input, inference = True) assert postq.shape == (self.num_actions, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # feed-forward pass for prestates self._setInput(prestates) preq = self.model.fprop(self.input, inference = False) assert preq.shape == (self.num_actions, self.batch_size) # make copy of prestate Q-values as targets # It seems neccessary for cpu backend. targets = preq.asnumpyarray().copy() # clip rewards between -1 and 1 rewards = np.clip(rewards, self.min_reward, self.max_reward) # update Q-value targets for actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i] # copy targets to GPU memory self.targets.set(targets) # calculate errors deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) #assert np.count_nonzero(deltas.asnumpyarray()) == 32 # calculate cost, just in case cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1,1) # clip errors if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas) # perform back-propagation of gradients self.model.bprop(deltas) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.train_iterations += 1 # calculate statistics if self.callback: self.callback.on_train(cost[0,0]) def predict(self, states): # minibatch is full size, because Neon doesn't let change the minibatch size assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim) # calculate Q-values for the states self._setInput(states) qvalues = self.model.fprop(self.input, inference = True) assert qvalues.shape == (self.num_actions, self.batch_size) if logger.isEnabledFor(logging.DEBUG): logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0])) # transpose the result, so that batch size is first dimension return qvalues.T.asnumpyarray() def load_weights(self, load_path): self.model.load_params(load_path) def save_weights(self, save_path): self.model.save_params(save_path)
def test_reshape_layer_model(backend_default, fargs): """ test cases: - conv before RNNs - conv after RNNs - conv after LUT """ np.random.seed(seed=0) nin, nout, bsz = fargs be = backend_default be.bsz = bsz input_size = (nin, be.bsz) init = Uniform(-0.1, 0.1) g_uni = GlorotUniform() inp_np = np.random.rand(nin, be.bsz) delta_np = np.random.rand(nout, be.bsz) inp = be.array(inp_np) delta = be.array(delta_np) conv_lut_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Reshape(reshape=(4, 100, -1)), Conv((3, 3, 16), init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Affine(nout, init, bias=init, activation=Softmax()) ] conv_lut_2 = [ LookupTable(vocab_size=1000, embedding_dim=400, init=init), Reshape(reshape=(4, 50, -1)), Conv((3, 3, 16), init=init), Pooling(2, strides=2), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] conv_rnn_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), Reshape(reshape=(4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] conv_rnn_2 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Recurrent(64, g_uni, activation=Tanh(), reset_cells=True), Reshape(reshape=(4, -1, 32)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] lut_sum_1 = [ LookupTable(vocab_size=1000, embedding_dim=128, init=init), RecurrentSum(), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] lut_birnn_1 = [ LookupTable(vocab_size=1000, embedding_dim=200, init=init), DeepBiRNN(32, init=GlorotUniform(), batch_norm=True, activation=Tanh(), reset_cells=True, depth=1), Reshape((4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout=nout, init=init, bias=init, activation=Softmax()) ] layers_test = [ conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1 ] for lg in layers_test: model = Model(layers=lg) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(input_size, cost) model.fprop(inp) model.bprop(delta)
class DeepQNetwork: def __init__(self, num_actions, args): # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, default_dtype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # create model layers = self.createLayers(num_actions) self.model = Model(layers = layers) self.cost = GeneralizedCost(costfunc = SumSquared()) self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.rmsprop_decay_rate, stochastic_round = args.stochastic_round) # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers = self.createLayers(num_actions)) self.save_weights_path = args.save_weights_path else: self.target_model = self.model # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error # prepare tensors once and reuse them self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,) self.tensor = self.be.empty(self.input_shape) self.tensor.lshape = self.input_shape # needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) self.callback = None def createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin())) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin())) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin())) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=512, init=init_norm, activation=Rectlin())) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout = num_actions, init = init_norm)) return layers def setTensor(self, states): # change order of axes to match what Neon expects states = np.transpose(states, axes = (1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.tensor.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.tensor, 255, self.tensor) def train(self, minibatch, epoch): # expand components of minibatch prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] if self.target_steps and self.train_iterations % self.target_steps == 0: # HACK: push something through network, so that weights exist self.model.fprop(self.tensor) # HACK: serialize network to disk and read it back to clone filename = os.path.join(self.save_weights_path, "target_network.pkl") save_obj(self.model.serialize(keep_states = False), filename) self.target_model.load_weights(filename) # feed-forward pass for poststates to get Q-values self.setTensor(poststates) postq = self.target_model.fprop(self.tensor, inference = True) assert postq.shape == (self.num_actions, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # feed-forward pass for prestates self.setTensor(prestates) preq = self.model.fprop(self.tensor, inference = False) assert preq.shape == (self.num_actions, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray() # update Q-value targets for actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i] # copy targets to GPU memory self.targets.set(targets) # calculate errors deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) #assert np.count_nonzero(deltas.asnumpyarray()) == 32 # calculate cost, just in case cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1,1) # clip errors if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas) # perform back-propagation of gradients self.model.bprop(deltas) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.train_iterations += 1 # calculate statistics if self.callback: self.callback.on_train(cost.asnumpyarray()[0,0]) def predict(self, states): # minibatch is full size, because Neon doesn't let change the minibatch size assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim) # calculate Q-values for the states self.setTensor(states) qvalues = self.model.fprop(self.tensor, inference = True) assert qvalues.shape == (self.num_actions, self.batch_size) if logger.isEnabledFor(logging.DEBUG): logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0])) # find the action with highest q-value actions = self.be.argmax(qvalues, axis = 0) assert actions.shape == (1, self.batch_size) # take only the first result return actions.asnumpyarray()[0,0] def getMeanQ(self, states): assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim) # calculate Q-values for the states self.setTensor(states) qvalues = self.model.fprop(self.tensor, inference = True) assert qvalues.shape == (self.num_actions, self.batch_size) # take maximum Q-value for each state actions = self.be.max(qvalues, axis = 0) assert actions.astensor().shape == (1, self.batch_size) # calculate mean Q-value of all states meanq = self.be.mean(actions, axis = 1) assert meanq.astensor().shape == (1, 1) # return the mean return meanq.asnumpyarray()[0,0] def load_weights(self, load_path): self.model.load_weights(load_path) def save_weights(self, save_path): save_obj(self.model.serialize(keep_states = True), save_path)
callbacks=callbacks) train.reset() # get 1 image for im, l in train: break train.exit_batch_provider() save_obj((im.get(), l.get()), 'im1.pkl') im_save = im.get().copy() if args.resume: (im2, l2) = load_obj('im1.pkl') im.set(im2) l.set(l2) # run fprop and bprop on this minibatch save the results out_fprop = model.fprop(im) out_fprop_save = [x.get() for x in out_fprop] im.set(im_save) out_fprop = model.fprop(im) out_fprop_save2 = [x.get() for x in out_fprop] for x, y in zip(out_fprop_save, out_fprop_save2): assert np.max(np.abs(x - y)) == 0.0, '2 fprop iterations do not match' # run fit fot 1 minibatch # have to do this by hand delta = model.cost.get_errors(im, l) model.bprop(delta) if args.resume: model.optimizer = opt model.optimizer.optimize(model.layers_to_optimize, epoch=model.epoch_index)
class ModelRunnerNeon(): def __init__(self, args, max_action_no, batch_dimension): self.args = args self.train_batch_size = args.train_batch_size self.discount_factor = args.discount_factor self.use_gpu_replay_mem = args.use_gpu_replay_mem self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size) self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0]) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((max_action_no, self.train_batch_size)) if self.use_gpu_replay_mem: self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8) self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8) else: self.history_buffer = np.zeros(batch_dimension, dtype=np.float32) self.train_net = Model(self.create_layers(max_action_no)) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.train_net.layers.layers: l.parallelism = 'Disabled' self.train_net.initialize(self.input_shape[:-1], self.cost) self.target_net = Model(self.create_layers(max_action_no)) # Bug fix for l in self.target_net.layers.layers: l.parallelism = 'Disabled' self.target_net.initialize(self.input_shape[:-1]) if self.args.optimizer == 'Adam': # Adam self.optimizer = Adam(beta_1=args.rms_decay, beta_2=args.rms_decay, learning_rate=args.learning_rate) else: # Neon RMSProp self.optimizer = RMSProp(decay_rate=args.rms_decay, learning_rate=args.learning_rate) self.max_action_no = max_action_no self.running = True def get_initializer(self, input_size): dnnInit = self.args.dnn_initializer if dnnInit == 'xavier': initializer = Xavier() elif dnnInit == 'fan_in': std_dev = 1.0 / math.sqrt(input_size) initializer = Uniform(low=-std_dev, high=std_dev) else: initializer = Gaussian(0, 0.01) return initializer def create_layers(self, max_action_no): layers = [] initializer = self.get_initializer(input_size=4 * 8 * 8) layers.append( Conv(fshape=(8, 8, 32), strides=4, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=32 * 4 * 4) layers.append( Conv(fshape=(4, 4, 64), strides=2, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=64 * 3 * 3) layers.append( Conv(fshape=(3, 3, 64), strides=1, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=7 * 7 * 64) layers.append( Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=512) layers.append( Affine(nout=max_action_no, init=initializer, bias=initializer)) return layers def clip_reward(self, reward): if reward > self.args.clip_reward_high: return self.args.clip_reward_high elif reward < self.args.clip_reward_low: return self.args.clip_reward_low else: return reward def set_input(self, data): if self.use_gpu_replay_mem: self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0)) self.input[:] = self.input_uint8 / 255 else: self.input.set(data.transpose(1, 2, 3, 0).copy()) self.be.divide(self.input, 255, self.input) def predict(self, history_buffer): self.set_input(history_buffer) output = self.train_net.fprop(self.input, inference=True) return output.T.asnumpyarray()[0] def print_weights(self): pass def train(self, minibatch, replay_memory, learning_rate, debug): if self.args.prioritized_replay == True: prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch else: prestates, actions, rewards, poststates, terminals = minibatch # Get Q*(s, a) with targetNet self.set_input(poststates) post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray() if self.args.double_dqn == True: # Get Q*(s, a) with trainNet post_qvalue2 = self.train_net.fprop( self.input, inference=True).T.asnumpyarray() # Get Q(s, a) with trainNet self.set_input(prestates) pre_qvalue = self.train_net.fprop(self.input, inference=False) label = pre_qvalue.asnumpyarray().copy() for i in range(0, self.train_batch_size): if self.args.clip_reward: reward = self.clip_reward(rewards[i]) else: reward = rewards[i] if terminals[i]: label[actions[i], i] = reward else: if self.args.double_dqn == True: max_index = np.argmax(post_qvalue2[i]) label[actions[i], i] = reward + self.discount_factor * post_qvalue[i][ max_index] else: label[actions[i], i] = reward + self.discount_factor * np.max( post_qvalue[i]) # copy targets to GPU memory self.targets.set(label) delta = self.cost.get_errors(pre_qvalue, self.targets) if self.args.prioritized_replay == True: delta_value = delta.asnumpyarray() for i in range(self.train_batch_size): if debug: print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % ( i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i]) replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i])) delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i] delta.set(delta_value.copy()) if self.args.clip_loss: self.be.clip(delta, -1.0, 1.0, out=delta) self.train_net.bprop(delta) self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0) def update_model(self): # have to serialize also states for batch normalization to work pdict = self.train_net.get_description(get_weights=True, keep_states=True) self.target_net.deserialize(pdict, load_states=True) #print ('Updated target model') def finish_train(self): self.running = False def load(self, file_name): self.train_net.load_params(file_name) self.update_model() def save(self, file_name): self.train_net.save_params(file_name)
def test_model_serialize(backend_default, data): dataset = MNIST(path=data) (X_train, y_train), (X_test, y_test), nclass = dataset.load_data() train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential([ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) path2 = Sequential([ Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) layers = [ MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp.initialize(train_set, cost=mlp.cost) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model mlp.save_params(tmp_save, keep_states=True) # Load model mlp = Model(tmp_save) mlp.initialize(train_set) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert allclose_with_out(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert allclose_with_out(_s, _s_e) else: assert allclose_with_out(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): assert type(p) == type(p_e) if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert allclose_with_out(_p, _p_e) elif isinstance(p, np.ndarray): assert allclose_with_out(p, p_e) else: assert p == p_e os.remove(tmp_save)
num_predict = 1000 layers = [ LSTM(hidden_size, init, Logistic(), Tanh()), Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] model = Model(layers=layers) model.load_weights(args.save_path) # Generate text text = [] seed_tokens = list('ROMEO:') x = be.zeros((len(train_set.vocab), time_steps)) for s in seed_tokens: x.fill(0) x[train_set.token_to_index[s], 0] = 1 y = model.fprop(x) for i in range(num_predict): # Take last prediction and feed into next fprop pred = sample(y.get()[:, -1]) text.append(train_set.index_to_token[int(pred)]) x.fill(0) x[pred, 0] = 1 y = model.fprop(x) print ''.join(seed_tokens + text)
class DQNNeon(Learner): """ This class is an implementation of the DQN network based on Neon. The modules that interact with the agent, the replay memory and the statistic calls are implemented here, taking the individual requirements of the Lasagne framework into account. The code is adapted from: https://github.com/tambetm/simple_dqn Attributes: input_shape (tuple[int]): Dimension of the network input. dummy_batch (numpy.ndarray): Dummy batche used to calculate Q-values for single states. batch_norm (bool): Indicates if normalization is wanted for a certain layer (default=False). be (neon.backends.nervanagpu.NervanaGPU): Describes the backend for the Neon implementation. input (neon.backends.nervanagpu.GPUTensor): Definition of network input shape. targets(neon.backends.nervanagpu.GPUTensor): Definition of network output shape. model (neon.models.model.Model): Generated Neon model. target_model (neon.models.model.Model): Generated target Neon model. cost_func (neon.layers.layer.GeneralizedCost): Cost function for model training. callback (Statistics): Hook for the statistics object to pass train and test information. Note: More attributes of this class are defined in the base class Learner. """ def __init__(self, env, args, rng, name = "DQNNeon"): """ Initializes a network based on the Neon framework. Args: env (AtariEnv): The envirnoment in which the agent actuates. args (argparse.Namespace): All settings either with a default value or set via command line arguments. rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator. name (str): The name of the network object. Note: This function should always call the base class first to initialize the common values for the networks. """ _logger.info("Initializing new object of type " + str(type(self).__name__)) super(DQNNeon, self).__init__(env, args, rng, name) self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,) self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8) self.batch_norm = args.batch_norm self.be = gen_backend( backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.output_shape, self.batch_size)) # create model layers = self._create_layer() self.model = Model(layers = layers) self.cost_func = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost_func) self._set_optimizer() if not self.args.load_weights == None: self.load_weights(self.args.load_weights) # create target model if self.target_update_frequency: layers = self._create_layer() self.target_model = Model(layers) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) else: self.target_model = self.model self.callback = None _logger.debug("%s" % self) def _create_layer(self): """ Build a network consistent with the DeepMind Nature paper. """ _logger.debug("Output shape = %d" % self.output_shape) # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append( Affine( nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append( Affine( nout= self.output_shape, init = init_norm)) return layers def _set_optimizer(self): """ Initializes the selected optimization algorithm. """ _logger.debug("Optimizer = %s" % str(self.args.optimizer)) if self.args.optimizer == 'rmsprop': self.optimizer = RMSProp( learning_rate = self.args.learning_rate, decay_rate = self.args.decay_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adam': self.optimizer = Adam( learning_rate = self.args.learning_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adadelta': self.optimizer = Adadelta( decay = self.args.decay_rate, stochastic_round = self.args.stochastic_round) else: assert false, "Unknown optimizer" def _prepare_network_input(self, states): """ Transforms and normalizes the states from one minibatch. Args: states (): a set of states with the size of minibatch """ _logger.debug("Normalizing and transforming input") # change order of axes to match what Neon expects states = np.transpose(states, axes = (1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.input.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.input, self.grayscales, self.input) def train(self, minibatch, epoch): """ Prepare, perform and document a complete train step for one minibatch. Args: minibatch (numpy.ndarray): Mini-batch of states, shape=(batch_size,sequence_length,frame_width,frame_height) epoch (int): Current train epoch """ _logger.debug("Complete trainig step for one minibatch") prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] # feed-forward pass for poststates to get Q-values self._prepare_network_input(poststates) postq = self.target_model.fprop(self.input, inference = True) assert postq.shape == (self.output_shape, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # average maxpostq for stats maxpostq_avg = maxpostq.mean() # feed-forward pass for prestates self._prepare_network_input(prestates) preq = self.model.fprop(self.input, inference = False) assert preq.shape == (self.output_shape, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray() # clip rewards between -1 and 1 rewards = np.clip(rewards, self.min_reward, self.max_reward) # update Q-value targets for each state only at actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i] # copy targets to GPU memory self.targets.set(targets) # calculate errors errors = self.cost_func.get_errors(preq, self.targets) assert errors.shape == (self.output_shape, self.batch_size) # average error where there is a error (should be 1 in every row) #TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.]) # clip errors if self.clip_error: self.be.clip(errors, -self.clip_error, self.clip_error, out = errors) # calculate cost, just in case cost = self.cost_func.get_cost(preq, self.targets) assert cost.shape == (1,1) # perform back-propagation of gradients self.model.bprop(errors) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.update_iterations += 1 if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0: self._copy_theta() _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg))) # update statistics if self.callback: self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg) def get_Q(self, state): """ Calculates the Q-values for one mini-batch. Args: state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height). Returns: q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,) """ _logger.debug("State shape = %s" % str(state.shape)) # minibatch is full size, because Neon doesn't let change the minibatch size # so we need to run 32 forward steps to get the one we actually want self.dummy_batch[0] = state states = self.dummy_batch assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims) # calculate Q-values for the states self._prepare_network_input(states) qvalues = self.model.fprop(self.input, inference = True) assert qvalues.shape == (self.output_shape, self.batch_size) _logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0]))) return qvalues.asnumpyarray()[:,0] def _copy_theta(self): """ Copies the weights of the current network to the target network. """ _logger.debug("Copying weights") pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) def save_weights(self, target_dir, epoch): """ Saves the current network parameters to disk. Args: target_dir (str): Directory where the network parameters are stored for each episode. epoch (int): Current epoch. """ filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.net_type.lower()), str(self.args.optimizer.lower()), (epoch + 1)) self.model.save_params(os.path.join(target_dir, filename)) def load_weights(self, source_file): """ Loads the network parameters from a given file. Args: source_file (str): Complete path to a file with network parameters. """ self.model.load_params(source_file)
class Vectorizer: model = None layer = None patch_width = None patch_height = None backend = None cores = None generated_backend_object= None def __init__(self, prm_path=default_prm_path, layer=-4, backend='gpu', cores=32): print 'Log::Vectorizer:: Initialising Vectorizer' self.layer = layer if not os.path.isfile(prm_path): raise Exception('FileNotFound: Cannot find the file %s' % prm_path) print 'Log::Vectorizer:: Generating backend, backend: {}'.format(backend) if backend == 'cpu': cores = 1 self.cores = cores self.generated_backend_object = gen_backend(batch_size=self.cores, backend=backend) self.backend = backend print 'Log::Vectorizer:: Loading model from %s' % prm_path model_dict = load_obj(prm_path) print 'Log::Vectorizer:: Generating model with loaded file' self.model = Model(model_dict) # now we are going to extract the middle patch from the image, # based on the size used to train the model self.patch_height = model_dict['train_input_shape'][1] self.patch_width = model_dict['train_input_shape'][2] print 'Log::Vectorizer:: Initialising Model' # initialise the model so that internally the arrays are allocated to the correct size self.model.initialize(model_dict['train_input_shape']) print 'Log::Vectorizer:: DONE!' def get_attribute_vector(self, img_path): if not image_is_local(img_path): print 'Error::Vectorizer:: File Not Found: The image at %s does not exist' % img_path return -1 im = imread(img_path).astype(float) # Fix the image into a flat array organised as [RRRRR..GGGGG..BBBB] patch_array = self.patch_image(im) # make an image buffer on host, pad out to batch size host_buf = np.zeros((3 * self.patch_height * self.patch_width, self.model.be.bsz)) # set the first image to be the image data loaded above host_buf[:, 0] = patch_array.copy() # make buffer on the device dev_buf = self.model.be.zeros((3 * self.patch_height * self.patch_width, self.model.be.bsz)) # copy host buffer to device buffer dev_buf[:] = host_buf # Send through the network. Note that in the returned array there # will be one column for each item in the batch; as we only put data # in the first item, we only want the first column predictions = self.model.fprop(dev_buf, True).asnumpyarray()[:, 0] # print predictions # Print the activations of the 4th layer from the end of the model # Note 1: model.layers represents a SingleOutputTree when using GoogLeNet; # during inference only the main branch (index 0) outputs are considered # Note 2: in the returned array there will be one column for each item # in the batch; as we only put data in the first item, we only want the # first column return self.model.layers.layers[0].layers[self.layer].outputs.asnumpyarray()[:, 0] def get_batch_attribute_vectors(self, img_path_array): """ Given you are running on a GPU, you batch process self.cores number of images at a time. :param img_path_array: An array of self.cores number of images :return: A dict of the paths and their respective attribute vectors :raise GpuNotSupportedException -- This method will only work when your `self.backend == 'gpu'` """ # Ensure that a cpu user is not accessing a GPU command if self.backend is not 'gpu': raise GpuNotSupportedException(self.backend) imgs_to_process, failed_images = self.get_images_to_process(img_path_array) # make an image buffer on host, pad out to batch size # Note: self.model.be.bsz == self.cores host_buf = np.zeros((3 * self.patch_height * self.patch_width, self.model.be.bsz)) for img_index in range(len(imgs_to_process)): im = imread(imgs_to_process[img_index]).astype(float) # Fix the image into a flat array organised as [RRRRR..GGGGG..BBBB] patch_array = self.patch_image(im) # set the first image to be the image data loaded above host_buf[:, img_index] = patch_array.copy() # make buffer on the device dev_buf = self.model.be.zeros((3 * self.patch_height * self.patch_width, self.model.be.bsz)) # copy host buffer to device buffer dev_buf[:] = host_buf # Send through the network. predictions = self.model.fprop(dev_buf, True).asnumpyarray()[:, 0] # print predictions # Print the activations of the 4th layer from the end of the model # Note 1: model.layers represents a SingleOutputTree when using GoogLeNet; # during inference only the main branch (index 0) outputs are considered img_vector_dict = {} for img_index in range(len(imgs_to_process)): img_path = imgs_to_process[img_index] img_vect = self.model.layers.layers[0].layers[self.layer].outputs.asnumpyarray()[:, img_index].tolist() img_vector_dict[img_path] = img_vect return img_vector_dict, failed_images def get_NSEW_batch_attribute_vectors(self, img_path_array): """ Given you are running on a GPU, you batch process self.cores number of images at a time. :param img_path_array: An array of self.cores number of images :return: A dict of the paths and their respective attribute vectors :raise GpuNotSupportedException -- This method will only work when your `self.backend == 'gpu'` """ # Ensure that a cpu user is not accessing a GPU command if self.backend is not 'gpu': raise GpuNotSupportedException(self.backend) # Ensure there are enough (& not too many images) that are also available locally imgs_to_process, failed_images = self.get_images_to_process_for_NSEW(img_path_array) # Shift the focus around the 9 locations with the image imgs_urls_with_direction = [] patches = [] for local_img_url in imgs_to_process: im_data = imread(local_img_url).astype(float) # Get each patch around the image: NW, N, NE, W, mid, E, SW, S, SE # Fix the image into a flat array organised as [RRRRR..GGGGG..BBBB] for direction, patch in self.make_NSEW_image_patches(im_data).items(): imgs_urls_with_direction.append(local_img_url+direction) patches.append(patch) # make an image buffer on host, pad out to batch size # Note: self.model.be.bsz == self.cores # Then fill it with each image patch host_buf = np.zeros((3 * self.patch_height * self.patch_width, self.model.be.bsz)) for img_index in range(len(patches)): host_buf[:, img_index] = patches[img_index].copy() # make buffer on the device dev_buf = self.model.be.zeros((3 * self.patch_height * self.patch_width, self.model.be.bsz)) # copy host buffer to device buffer dev_buf[:] = host_buf # Send through the network. predictions = self.model.fprop(dev_buf, True).asnumpyarray()[:, 0] # print predictions # Print the activations of the 4th layer from the end of the model # Note 1: model.layers represents a SingleOutputTree when using GoogLeNet; # during inference only the main branch (index 0) outputs are considered img_vector_dict = {} for img_index in range(len(imgs_urls_with_direction)): img_path = imgs_urls_with_direction[img_index] img_vect = self.model.layers.layers[0].layers[self.layer].outputs.asnumpyarray()[:, img_index].tolist() img_vector_dict[img_path] = img_vect return img_vector_dict, failed_images # Expects 256x256 def patch_image(self, im): # convert to BGR im = im[:, :, ::-1] # approximately mean-centre it im = im - [128, 128, 128] # Finding the co-ordinates for each corner of the centre patch padY = int(self.patch_height / 2.0) padX = int(self.patch_width / 2.0) y = im.shape[0] - 2 * padY x = im.shape[1] - 2 * padX col = int(x / 2) row = int(y / 2) right = col + self.patch_width left = col top = row bottom = row + self.patch_height # Cropping the image patch = im[top:bottom, left:right, :] # Neon wants the data as a flat array organised as [RRRRR..GGGGG..BBBB] patch_array = patch.transpose((2, 0, 1)).flatten() return patch_array # Expects 256x256 def make_NSEW_image_patches(self, im): # convert to BGR im = im[:, :, ::-1] # approximately mean-centre it im = im - [128, 128, 128] # Determine the region of the image not used to process image_width = im.shape[1] image_height = im.shape[0] x_remainder = image_width - 2 * int(self.patch_width / 2.0) # makes it even y_remainder = image_height - 2 * int(self.patch_height / 2.0) # Determine the width of the horiz and vertical borders west_x = 0 mid_x = x_remainder / 2 east_x = image_width - self.patch_width north_y = 0 mid_y = y_remainder / 2 south_y = image_height - self.patch_height # Describe the position of the image (based on its top left point of the patch) # Assuming [0,0] is the top left point in the whole image factors = { # North Level '#NW': [west_x, north_y], '#N': [mid_x, north_y], '#NE': [east_x, north_y], # Mid Level '#W': [west_x, mid_y], '#mid': [mid_x, mid_y], '#E': [east_x, mid_y], # South Level '#SW': [west_x, south_y], '#S': [mid_x, south_y], '#SE': [east_x, south_y], } patch_array_dict = {} for direction, coord in factors.items(): top = coord[1] left = coord[0] bottom = top + self.patch_height right = left + self.patch_width # Cropping the image patch = im[top:bottom, left:right, :] # Neon wants the data as a flat array organised as [RRRRR..GGGGG..BBBB] patch_array_dict[direction] = patch.transpose((2, 0, 1)).flatten() return patch_array_dict def get_images_to_process(self, img_path_array): """ Keeps the first self.cores number of images that can be found locally. :param img_path_array: :return: passed_images (array<string>) -- Up to self.cores number of images that it can process failed_images (dict<string,string>) -- The images that could not be found or excess images """ failed_images = {} local_images = [] for img_path in img_path_array: if image_is_local(img_path): local_images.append(img_path) else: failed_images[img_path] = 'Image not found locally' # If there are any more than self.cores number of images, ignore them for excess_img in local_images[self.cores:]: failed_images[excess_img] = 'Skipped, more than {} images'.format(self.cores) return local_images[:self.cores], failed_images def get_images_to_process_for_NSEW(self, img_path_array): """ Keeps the first number of images that can be found locally. Given that there is enough for 9 of them to be processed. :param img_path_array: :return: passed_images (array<string>) -- Up to self.cores number of images that it can process failed_images (dict<string,string>) -- The images that could not be found or excess images """ failed_images = {} local_images = [] for img_path in img_path_array: if image_is_local(img_path): local_images.append(img_path) else: failed_images[img_path] = 'Image not found locally' # If there are any more than self.cores number of images, ignore them max_local_images = self.cores / 9 for excess_img in local_images[max_local_images:]: failed_images[excess_img] = 'Skipped, more than {} images, as ({}+1) > {}' \ .format(max_local_images, max_local_images, self.cores) return local_images[:max_local_images], failed_images
# preprocess sentence to one hot sentence = [ len(vocab) + 1 if t not in vocab else vocab[t] for t in tokens ] sentence = [start] + [w + index_from for w in sentence] sentence = [oov if w >= vocab_size else w for w in sentence] # truncate and padding trunc = sentence[ -sentence_length:] # take the last sentence_length words input_numpy[:] = 0 # fill with zeros input_numpy[-len(trunc):, 0] = trunc # place the input into the numpy array y_pred = model.fprop( input_numpy, inference=True) # run the forward pass through the model if (y_pred.get()[1] <= 0.5): neg_predictions = neg_predictions + 1 else: pos_predictions = pos_predictions + 1 print '[neg = ' + str(neg_predictions) + '] [pos = ' + str( pos_predictions) + ']' + '\r', neg_files = neg_files + 1 if (how_many_inputs != 0 and neg_files >= how_many_inputs): break elapsed_time = time.time() - start_time
class ModelRunnerNeon(): def __init__(self, args, max_action_no, batch_dimension): self.args = args self.train_batch_size = args.train_batch_size self.discount_factor = args.discount_factor self.use_gpu_replay_mem = args.use_gpu_replay_mem self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size) self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0]) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((max_action_no, self.train_batch_size)) if self.use_gpu_replay_mem: self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8) self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8) else: self.history_buffer = np.zeros(batch_dimension, dtype=np.float32) self.train_net = Model(self.create_layers(max_action_no)) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.train_net.layers.layers: l.parallelism = 'Disabled' self.train_net.initialize(self.input_shape[:-1], self.cost) self.target_net = Model(self.create_layers(max_action_no)) # Bug fix for l in self.target_net.layers.layers: l.parallelism = 'Disabled' self.target_net.initialize(self.input_shape[:-1]) if self.args.optimizer == 'Adam': # Adam self.optimizer = Adam(beta_1=args.rms_decay, beta_2=args.rms_decay, learning_rate=args.learning_rate) else: # Neon RMSProp self.optimizer = RMSProp(decay_rate=args.rms_decay, learning_rate=args.learning_rate) self.max_action_no = max_action_no self.running = True def get_initializer(self, input_size): dnnInit = self.args.dnn_initializer if dnnInit == 'xavier': initializer = Xavier() elif dnnInit == 'fan_in': std_dev = 1.0 / math.sqrt(input_size) initializer = Uniform(low=-std_dev, high=std_dev) else: initializer = Gaussian(0, 0.01) return initializer def create_layers(self, max_action_no): layers = [] initializer = self.get_initializer(input_size = 4 * 8 * 8) layers.append(Conv(fshape=(8, 8, 32), strides=4, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 32 * 4 * 4) layers.append(Conv(fshape=(4, 4, 64), strides=2, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 64 * 3 * 3) layers.append(Conv(fshape=(3, 3, 64), strides=1, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 7 * 7 * 64) layers.append(Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 512) layers.append(Affine(nout=max_action_no, init=initializer, bias=initializer)) return layers def clip_reward(self, reward): if reward > self.args.clip_reward_high: return self.args.clip_reward_high elif reward < self.args.clip_reward_low: return self.args.clip_reward_low else: return reward def set_input(self, data): if self.use_gpu_replay_mem: self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0)) self.input[:] = self.input_uint8 / 255 else: self.input.set(data.transpose(1, 2, 3, 0).copy()) self.be.divide(self.input, 255, self.input) def predict(self, history_buffer): self.set_input(history_buffer) output = self.train_net.fprop(self.input, inference=True) return output.T.asnumpyarray()[0] def print_weights(self): pass def train(self, minibatch, replay_memory, learning_rate, debug): if self.args.prioritized_replay == True: prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch else: prestates, actions, rewards, poststates, terminals = minibatch # Get Q*(s, a) with targetNet self.set_input(poststates) post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray() if self.args.double_dqn == True: # Get Q*(s, a) with trainNet post_qvalue2 = self.train_net.fprop(self.input, inference=True).T.asnumpyarray() # Get Q(s, a) with trainNet self.set_input(prestates) pre_qvalue = self.train_net.fprop(self.input, inference=False) label = pre_qvalue.asnumpyarray().copy() for i in range(0, self.train_batch_size): if self.args.clip_reward: reward = self.clip_reward(rewards[i]) else: reward = rewards[i] if terminals[i]: label[actions[i], i] = reward else: if self.args.double_dqn == True: max_index = np.argmax(post_qvalue2[i]) label[actions[i], i] = reward + self.discount_factor* post_qvalue[i][max_index] else: label[actions[i], i] = reward + self.discount_factor* np.max(post_qvalue[i]) # copy targets to GPU memory self.targets.set(label) delta = self.cost.get_errors(pre_qvalue, self.targets) if self.args.prioritized_replay == True: delta_value = delta.asnumpyarray() for i in range(self.train_batch_size): if debug: print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i]) replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i])) delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i] delta.set(delta_value.copy()) if self.args.clip_loss: self.be.clip(delta, -1.0, 1.0, out = delta) self.train_net.bprop(delta) self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0) def update_model(self): # have to serialize also states for batch normalization to work pdict = self.train_net.get_description(get_weights=True, keep_states=True) self.target_net.deserialize(pdict, load_states=True) #print ('Updated target model') def finish_train(self): self.running = False def load(self, file_name): self.train_net.load_params(file_name) self.update_model() def save(self, file_name): self.train_net.save_params(file_name)
model_desc = ModelDescription(load_obj(args.save_model_file)) for layer in segnet_model.layers_to_optimize: name = layer.name trained_layer = model_desc.getlayer(name) layer.load_weights(trained_layer) fig = plt.figure() if args.display: plt.ion() im1 = None im2 = None cnt = 1 for x, t in test_set: z = segnet_model.fprop(x).get() z = np.argmax(z.reshape((c, h, w)), axis=0) t = np.argmax(t.get().reshape((c, h, w)), axis=0) # calculate the misclass rate acc = (np.where(z == t)[0].size / float(z.size))*100.0 plt.subplot(2,1,1); if im1 is None: im1 = plt.imshow(t);plt.title('Truth') else: im1.set_data(t) plt.subplot(2,1,2); if im2 is None:
metric = Accuracy() ########################################################################## model = Model(layers=layers) optimizer = Adagrad(learning_rate=0.01, clip_gradients=clip_gradients) callbacks = Callbacks(model, train_set, args, eval_set=valid_set) model.load_weights(os.path.join(args.data_dir, '128128_49_model_e2.pkl')) print "Test Accuracy - ", 100 * model.eval(valid_set, metric=metric) print "Train Accuracy - ", 100 * model.eval(train_set, metric=metric) # output result directly for x, y in valid_set: x = model.fprop(x, inference=True) print(x.get()) print(y.get()) break ######################################################################### # continue training # optimizer = Adagrad(learning_rate=0.01, clip_gradients=clip_gradients) # callbacks = Callbacks(model, train_set, args, eval_set=valid_set) # import ipdb; ipdb.set_trace() # re-allocate output memories for each layer # model.initialized = False # model.initialize(train_set, cost=cost)
def test_conv_rnn(backend_default): train_shape = (1, 17, 142) be = backend_default inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz)) delta = be.array(be.rng.randn(10, be.bsz)) init_norm = Gaussian(loc=0.0, scale=0.01) bilstm = DeepBiLSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), depth=1, reset_cells=True) birnn_1 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False) birnn_2 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=2, reset_cells=True, batch_norm=False) bibnrnn = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=True) birnnsum = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False, bi_sum=True) rnn = Recurrent(128, init=init_norm, activation=Rectlin(), reset_cells=True) lstm = LSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) gru = GRU(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru] for rl in rlayers: layers = [ Conv((2, 2, 4), init=init_norm, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init_norm, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), rl, RecurrentMean(), Affine(nout=10, init=init_norm, activation=Rectlin()), ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(train_shape, cost) model.fprop(inp) model.bprop(delta)
end = drv.Event() num_iterations = config.num_warmup_iters + config.num_timing_iters forward_time = np.zeros(config.num_timing_iters) backward_time = np.zeros(config.num_timing_iters) iter = 0 flag = True while flag: for (x, t) in data: iter += 1 if iter > num_iterations: flag = False break if iter > config.num_warmup_iters: # time it if config.backend == 'cpu': s = time.time() * 1000 x = network.fprop(x) cost_iter = network.cost.get_cost(x, t) e = time.time() * 1000 # in milliseconds forward_time[iter - config.num_warmup_iters - 1] = e - s s = time.time() * 1000 delta = network.cost.get_errors(x, t) # gradient of the cost network.bprop(delta) e = time.time() * 1000 backward_time[iter - config.num_warmup_iters - 1] = e - s else: start.record() x = network.fprop(x) cost_iter = network.cost.get_cost(x, t) end.record() end.synchronize() forward_time[iter - config.num_warmup_iters - 1] \
LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=False), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] model_new = Model(layers=layers) model_new.load_params(args.save_path) model_new.initialize(dataset=(train_set.nfeatures, seq_len)) output = np.zeros((train_set.nfeatures, num_predict)) seed = time_series.train[:seed_seq_len] x = model_new.be.empty((train_set.nfeatures, seq_len)) for s_in in seed: x.set(s_in.reshape(train_set.nfeatures, seq_len)) y = model_new.fprop(x, inference=False) for i in range(num_predict): # Take last prediction and feed into next fprop pred = y.get()[:, -1] output[:, i] = pred x[:] = pred.reshape(train_set.nfeatures, seq_len) y = model_new.fprop(x, inference=False) output_seq = np.vstack([seed, output.T]) if do_plots: plt.figure() plt.plot(output_seq[:, 0], output_seq[:, 1], 'b.-', label='generated sequence') plt.plot(seed[:, 0], seed[:, 1], 'r.', label='seed sequence') plt.legend()
train.reset() # get 1 image for im, l in train: break train.exit_batch_provider() with open('im1.pkl', 'w') as fid: pickle.dump((im.get(), l.get()), fid) im_save = im.get().copy() if args.resume: with open('im1.pkl', 'r') as fid: (im2, l2) = pickle.load(fid) im.set(im2) l.set(l2) # run fprop and bprop on this minibatch save the results out_fprop = model.fprop(im) out_fprop_save = [x.get() for x in out_fprop] im.set(im_save) out_fprop = model.fprop(im) out_fprop_save2 = [x.get() for x in out_fprop] for x, y in zip(out_fprop_save, out_fprop_save2): assert np.max(np.abs(x-y)) == 0.0, '2 fprop iterations do not match' # run fit fot 1 minibatch # have to do this by hand delta = model.cost.get_errors(im, l) model.bprop(delta) if args.resume: model.optimizer = opt model.optimizer.optimize(model.layers_to_optimize, epoch=model.epoch_index)
reset_cells=False), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] model_new = Model(layers=layers) model_new.load_params(args.save_path) model_new.initialize(dataset=(train_set.nfeatures, seq_len)) output = np.zeros((train_set.nfeatures, num_predict)) seed = time_series.train[:seed_seq_len] x = model_new.be.empty((train_set.nfeatures, seq_len)) for s_in in seed: x.set(s_in.reshape(train_set.nfeatures, seq_len)) y = model_new.fprop(x, inference=False) for i in range(num_predict): # Take last prediction and feed into next fprop pred = y.get()[:, -1] output[:, i] = pred x[:] = pred.reshape(train_set.nfeatures, seq_len) y = model_new.fprop(x, inference=False) output_seq = np.vstack([seed, output.T]) if do_plots: plt.figure() plt.plot(output_seq[:, 0], output_seq[:, 1], 'b.-',
layers = [ LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh()), Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] model_new = Model(layers=layers) model_new.load_params(args.save_path) model_new.initialize(dataset=(train_set.shape[0], time_steps)) # Generate text text = [] seed_tokens = list('ROMEO:') x = model_new.be.zeros((len(train_set.vocab), time_steps)) for s in seed_tokens: x.fill(0) x[train_set.token_to_index[s], 0] = 1 y = model_new.fprop(x) for i in range(num_predict): # Take last prediction and feed into next fprop pred = sample(y.get()[:, -1]) text.append(train_set.index_to_token[int(pred)]) x.fill(0) x[int(pred), 0] = 1 y = model_new.fprop(x) neon_logger.display(''.join(seed_tokens + text))
class DQNNeon(Learner): """ This class is an implementation of the DQN network based on Neon. The modules that interact with the agent, the replay memory and the statistic calls are implemented here, taking the individual requirements of the Lasagne framework into account. The code is adapted from: https://github.com/tambetm/simple_dqn Attributes: input_shape (tuple[int]): Dimension of the network input. dummy_batch (numpy.ndarray): Dummy batche used to calculate Q-values for single states. batch_norm (bool): Indicates if normalization is wanted for a certain layer (default=False). be (neon.backends.nervanagpu.NervanaGPU): Describes the backend for the Neon implementation. input (neon.backends.nervanagpu.GPUTensor): Definition of network input shape. targets(neon.backends.nervanagpu.GPUTensor): Definition of network output shape. model (neon.models.model.Model): Generated Neon model. target_model (neon.models.model.Model): Generated target Neon model. cost_func (neon.layers.layer.GeneralizedCost): Cost function for model training. callback (Statistics): Hook for the statistics object to pass train and test information. Note: More attributes of this class are defined in the base class Learner. """ def __init__(self, env, args, rng, name = "DQNNeon"): """ Initializes a network based on the Neon framework. Args: env (AtariEnv): The envirnoment in which the agent actuates. args (argparse.Namespace): All settings either with a default value or set via command line arguments. rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator. name (str): The name of the network object. Note: This function should always call the base class first to initialize the common values for the networks. """ _logger.info("Initializing new object of type " + str(type(self).__name__)) super(DQNNeon, self).__init__(env, args, rng, name) self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,) self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8) self.batch_norm = args.batch_norm self.be = gen_backend( backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.output_shape, self.batch_size)) # create model layers = self._create_layer() self.model = Model(layers = layers) self.cost_func = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost_func) self._set_optimizer() if not self.args.load_weights == None: self.load_weights(self.args.load_weights) # create target model if self.target_update_frequency: layers = self._create_layer() self.target_model = Model(layers) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) else: self.target_model = self.model self.callback = None _logger.debug("%s" % self) def _create_layer(self): """ Build a network consistent with the DeepMind Nature paper. """ _logger.debug("Output shape = %d" % self.output_shape) # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append( Affine( nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append( Affine( nout= self.output_shape, init = init_norm)) return layers def _set_optimizer(self): """ Initializes the selected optimization algorithm. """ _logger.debug("Optimizer = %s" % str(self.args.optimizer)) if self.args.optimizer == 'rmsprop': self.optimizer = RMSProp( learning_rate = self.args.learning_rate, decay_rate = self.args.decay_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adam': self.optimizer = Adam( learning_rate = self.args.learning_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adadelta': self.optimizer = Adadelta( decay = self.args.decay_rate, stochastic_round = self.args.stochastic_round) else: assert false, "Unknown optimizer" def _prepare_network_input(self, states): """ Transforms and normalizes the states from one minibatch. Args: states (): a set of states with the size of minibatch """ _logger.debug("Normalizing and transforming input") # change order of axes to match what Neon expects states = np.transpose(states, axes = (1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.input.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.input, self.grayscales, self.input) def train(self, minibatch, epoch): """ Prepare, perform and document a complete train step for one minibatch. Args: minibatch (numpy.ndarray): Mini-batch of states, shape=(batch_size,sequence_length,frame_width,frame_height) epoch (int): Current train epoch """ _logger.debug("Complete trainig step for one minibatch") prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] # feed-forward pass for poststates to get Q-values self._prepare_network_input(poststates) postq = self.target_model.fprop(self.input, inference = True) assert postq.shape == (self.output_shape, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # average maxpostq for stats maxpostq_avg = maxpostq.mean() # feed-forward pass for prestates self._prepare_network_input(prestates) preq = self.model.fprop(self.input, inference = False) assert preq.shape == (self.output_shape, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray() # clip rewards between -1 and 1 rewards = np.clip(rewards, self.min_reward, self.max_reward) # update Q-value targets for each state only at actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i] # copy targets to GPU memory self.targets.set(targets) # calculate errors errors = self.cost_func.get_errors(preq, self.targets) assert errors.shape == (self.output_shape, self.batch_size) # average error where there is a error (should be 1 in every row) #TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.]) # clip errors if self.clip_error: self.be.clip(errors, -self.clip_error, self.clip_error, out = errors) # calculate cost, just in case cost = self.cost_func.get_cost(preq, self.targets) assert cost.shape == (1,1) # perform back-propagation of gradients self.model.bprop(errors) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.update_iterations += 1 if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0: self._copy_theta() if isinstance(cost, np.ndarray): _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost[0][0]), str(maxpostq_avg))) else: _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg))) # update statistics if self.callback: if isinstance(cost, np.ndarray): self.callback.from_learner(cost[0,0], maxpostq_avg) else: self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg) def get_Q(self, state): """ Calculates the Q-values for one mini-batch. Args: state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height). Returns: q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,) """ _logger.debug("State shape = %s" % str(state.shape)) # minibatch is full size, because Neon doesn't let change the minibatch size # so we need to run 32 forward steps to get the one we actually want self.dummy_batch[0] = state states = self.dummy_batch assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims) # calculate Q-values for the states self._prepare_network_input(states) qvalues = self.model.fprop(self.input, inference = True) assert qvalues.shape == (self.output_shape, self.batch_size) _logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0]))) return qvalues.asnumpyarray()[:,0] def _copy_theta(self): """ Copies the weights of the current network to the target network. """ _logger.debug("Copying weights") pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) def save_weights(self, target_dir, epoch): """ Saves the current network parameters to disk. Args: target_dir (str): Directory where the network parameters are stored for each episode. epoch (int): Current epoch. """ filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.learner_type.lower()), str(self.args.optimizer.lower()), (epoch + 1)) self.model.save_params(os.path.join(target_dir, filename)) def load_weights(self, source_file): """ Loads the network parameters from a given file. Args: source_file (str): Complete path to a file with network parameters. """ self.model.load_params(source_file)
# setup buffers before accepting reviews xdev = be.zeros((sentence_length, 1), dtype=np.int32) # bsz is 1, feature size xbuf = np.zeros((1, sentence_length), dtype=np.int32) oov = 2 start = 1 index_from = 3 pad_char = 0 vocab, rev_vocab = pickle.load(open(args.vocab_file, 'rb')) while True: line = input('Enter a Review from testData.tsv file \n') # clean the input tokens = clean_string(line).strip().split() # check for oov and add start sent = [len(vocab) + 1 if t not in vocab else vocab[t] for t in tokens] sent = [start] + [w + index_from for w in sent] sent = [oov if w >= vocab_size else w for w in sent] # pad sentences xbuf[:] = 0 trunc = sent[-sentence_length:] xbuf[0, -len(trunc):] = trunc xdev[:] = xbuf.T.copy() y_pred = model_new.fprop(xdev, inference=True) # inference flag dropout print("Sent - {0}".format(xbuf)) print("Pred - {0} ".format(y_pred.get().T)) print('-' * 100)
def test_model_serialize(backend_default, data): dataset = MNIST(path=data) (X_train, y_train), (X_test, y_test), nclass = dataset.load_data() train_set = ArrayIterator( [X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential([Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())]) path2 = Sequential([Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())]) layers = [MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp.initialize(train_set, cost=mlp.cost) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model mlp.save_params(tmp_save, keep_states=True) # Load model mlp = Model(tmp_save) mlp.initialize(train_set) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert allclose_with_out(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert allclose_with_out(_s, _s_e) else: assert allclose_with_out(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): assert type(p) == type(p_e) if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert allclose_with_out(_p, _p_e) elif isinstance(p, np.ndarray): assert allclose_with_out(p, p_e) else: assert p == p_e os.remove(tmp_save)
def test_model_serialize(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = [ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] path2 = [ Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] layers = [ MergeConcat([path1, path2]), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), BatchNorm(), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model save_obj(mlp.serialize(keep_states=True), tmp_save) # Load model mlp = Model(layers=layers) mlp.load_weights(tmp_save) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) else: assert np.allclose(p, p_e) os.remove(tmp_save)
class DeepQNetwork: def __init__(self, state_size, num_actions, args): # remember parameters self.state_size = state_size self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.clip_error = args.clip_error self.action_count = np.zeros(21) # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.state_size, self.batch_size) self.input = self.be.empty(self.input_shape) self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self._createLayers(num_actions) self.model = Model(layers = layers) self.cost = GeneralizedCost(costfunc = SumSquared()) self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert False, "Unknown optimizer" # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers = self._createLayers(num_actions)) self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model def _createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=64, init=init_norm, bias=init_norm, activation=Rectlin())) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_norm, bias=init_norm)) return layers def _setInput(self, states): # change order of axes to match what Neon expects states = np.transpose(states) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.input.set(states.copy()) # normalize network input between 0 and 1 # self.be.divide(self.input, 255, self.input) def train(self, minibatch, epoch): # expand components of minibatch prestates, actions, speed_actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 2 assert len(poststates.shape) == 2 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] #print "WE ARE ACTUALLY TRAINING IN HERE" if self.target_steps and self.train_iterations % self.target_steps == 0: # HACK: serialize network to disk and read it back to clone filename = self.save_weights_prefix + "_target.pkl" save_obj(self.model.serialize(keep_states = False), filename) self.target_model.load_weights(filename) # feed-forward pass for poststates to get Q-values self._setInput(poststates) postq = self.target_model.fprop(self.input, inference = True) assert postq.shape == (self.num_actions, self.batch_size) # calculate max Q-value for each poststate postq = postq.asnumpyarray() maxpostq = np.max(postq, axis=0) #print maxpostq.shape assert maxpostq.shape == (self.batch_size,) # feed-forward pass for prestates self._setInput(prestates) preq = self.model.fprop(self.input, inference = False) assert preq.shape == (self.num_actions, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray().copy() # update Q-value targets for actions taken for i, action in enumerate(actions): self.action_count[action] += 1 if terminals[i]: targets[action, i] = float(rewards[i]) if rewards[i] == -1000: print "######################### action ", action, "should never be sampled again" print "sampled_terminal" else: targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[i] #targets[i,action] = float(rewards[i]) + self.discount_rate * maxpostq[i] #print "action count", self.action_count # copy targets to GPU memory self.targets.set(targets) # calculate errors deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) #assert np.count_nonzero(deltas.asnumpyarray()) == 32 print "nonzero deltas", np.count_nonzero(deltas.asnumpyarray()) # calculate cost, just in case cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1,1) print "cost:", cost.asnumpyarray() # clip errors #if self.clip_error: # self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas) # perform back-propagation of gradients self.model.bprop(deltas) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.train_iterations += 1 def predict(self, states): # minibatch is full size, because Neon doesn't let change the minibatch size assert states.shape == (self.batch_size, self.state_size) # calculate Q-values for the states self._setInput(states) qvalues = self.model.fprop(self.input, inference = True) assert qvalues.shape == (self.num_actions, self.batch_size) if logger.isEnabledFor(logging.DEBUG): logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0])) # transpose the result, so that batch size is first dimension return qvalues.T.asnumpyarray() def load_weights(self, load_path): self.model.load_weights(load_path) def save_weights(self, save_path): save_obj(self.model.serialize(keep_states = True), save_path)
class DeepQNetwork: def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error # create Neon backend self.be = gen_backend(backend=args.backend, batch_size=args.batch_size, rng_seed=args.random_seed, device_id=args.device_id, default_dtype=np.dtype(args.datatype).type, stochastic_round=args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length, ) + self.screen_dim + ( self.batch_size, ) self.tensor = self.be.empty(self.input_shape) self.tensor.lshape = self.input_shape # needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self.createLayers(num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) self.model.initialize(self.tensor.shape[:-1], self.cost) self.optimizer = RMSProp(learning_rate=args.learning_rate, decay_rate=args.rmsprop_decay_rate, stochastic_round=args.stochastic_round) # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers=self.createLayers(num_actions)) self.target_model.initialize(self.tensor.shape[:-1]) self.save_weights_path = args.save_weights_path else: self.target_model = self.model self.callback = None def createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin())) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin())) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin())) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=512, init=init_norm, activation=Rectlin())) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_norm)) return layers def setTensor(self, states): # change order of axes to match what Neon expects states = np.transpose(states, axes=(1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.tensor.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.tensor, 255, self.tensor) def train(self, minibatch, epoch): # expand components of minibatch prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[ 0] == poststates.shape[0] == terminals.shape[0] if self.target_steps and self.train_iterations % self.target_steps == 0: # HACK: push something through network, so that weights exist self.model.fprop(self.tensor) # HACK: serialize network to disk and read it back to clone filename = os.path.join(self.save_weights_path, "target_network.pkl") save_obj(self.model.serialize(keep_states=False), filename) self.target_model.load_weights(filename) # feed-forward pass for poststates to get Q-values self.setTensor(poststates) postq = self.target_model.fprop(self.tensor, inference=True) assert postq.shape == (self.num_actions, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # feed-forward pass for prestates self.setTensor(prestates) preq = self.model.fprop(self.tensor, inference=False) assert preq.shape == (self.num_actions, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray() # update Q-value targets for actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float( rewards[i]) + self.discount_rate * maxpostq[0, i] # copy targets to GPU memory self.targets.set(targets) # calculate errors deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) #assert np.count_nonzero(deltas.asnumpyarray()) == 32 # calculate cost, just in case cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1, 1) # clip errors if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas) # perform back-propagation of gradients self.model.bprop(deltas) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.train_iterations += 1 # calculate statistics if self.callback: self.callback.on_train(cost.asnumpyarray()[0, 0]) def predict(self, states): # minibatch is full size, because Neon doesn't let change the minibatch size assert states.shape == (( self.batch_size, self.history_length, ) + self.screen_dim) # calculate Q-values for the states self.setTensor(states) qvalues = self.model.fprop(self.tensor, inference=True) assert qvalues.shape == (self.num_actions, self.batch_size) if logger.isEnabledFor(logging.DEBUG): logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:, 0])) # find the action with highest q-value actions = self.be.argmax(qvalues, axis=0) assert actions.shape == (1, self.batch_size) # take only the first result return actions.asnumpyarray()[0, 0] def getMeanQ(self, states): assert states.shape == (( self.batch_size, self.history_length, ) + self.screen_dim) # calculate Q-values for the states self.setTensor(states) qvalues = self.model.fprop(self.tensor, inference=True) assert qvalues.shape == (self.num_actions, self.batch_size) # take maximum Q-value for each state actions = self.be.max(qvalues, axis=0) assert actions.astensor().shape == (1, self.batch_size) # calculate mean Q-value of all states meanq = self.be.mean(actions, axis=1) assert meanq.astensor().shape == (1, 1) # return the mean return meanq.asnumpyarray()[0, 0] def load_weights(self, load_path): self.model.load_weights(load_path) def save_weights(self, save_path): save_obj(self.model.serialize(keep_states=True), save_path)
class DeepQNetwork: def __init__(self, state_size, num_steers, num_speeds, args): # remember parameters self.state_size = state_size self.num_steers = num_steers self.num_speeds = num_speeds self.num_actions = num_steers + num_speeds self.num_layers = args.hidden_layers self.hidden_nodes = args.hidden_nodes self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.clip_error = args.clip_error # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.state_size, self.batch_size) self.input = self.be.empty(self.input_shape) self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model self.model = Model(layers = self._createLayers()) self.cost = GeneralizedCost(costfunc = SumSquared()) self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers = self._createLayers()) self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model def _createLayers(self): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] for i in xrange(self.num_layers): layers.append(Affine(nout=self.hidden_nodes, init=init_norm, activation=Rectlin())) layers.append(Affine(nout=self.num_actions, init = init_norm)) return layers def _setInput(self, states): # change order of axes to match what Neon expects states = np.transpose(states) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.input.set(states.copy()) # normalize network input between 0 and 1 #self.be.divide(self.input, 200, self.input) def train(self, minibatch, epoch = 0): # expand components of minibatch prestates, steers, speeds, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 2 assert len(poststates.shape) == 2 assert len(steers.shape) == 1 assert len(speeds.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == steers.shape[0] == speeds.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] if self.target_steps and self.train_iterations % self.target_steps == 0: # HACK: serialize network to disk and read it back to clone filename = self.save_weights_prefix + "_target.pkl" save_obj(self.model.serialize(keep_states = False), filename) self.target_model.load_weights(filename) # feed-forward pass for poststates to get Q-values self._setInput(poststates) postq = self.target_model.fprop(self.input, inference = True) assert postq.shape == (self.num_actions, self.batch_size) # calculate max Q-value for each poststate postq = postq.asnumpyarray() maxsteerq = np.max(postq[:self.num_steers,:], axis=0) assert maxsteerq.shape == (self.batch_size,), "size: %s" % str(maxsteerq.shape) maxspeedq = np.max(postq[-self.num_speeds:,:], axis=0) assert maxspeedq.shape == (self.batch_size,) # feed-forward pass for prestates self._setInput(prestates) preq = self.model.fprop(self.input, inference = False) assert preq.shape == (self.num_actions, self.batch_size) # make copy of prestate Q-values as targets # HACK: copy() was needed to make it work on CPU targets = preq.asnumpyarray().copy() # update Q-value targets for actions taken for i, (steer, speed) in enumerate(zip(steers, speeds)): if terminals[i]: targets[steer, i] = float(rewards[i]) targets[self.num_steers + speed, i] = float(rewards[i]) else: targets[steer, i] = float(rewards[i]) + self.discount_rate * maxsteerq[i] targets[self.num_steers + speed, i] = float(rewards[i]) + self.discount_rate * maxspeedq[i] # copy targets to GPU memory self.targets.set(targets) # calculate errors deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) #assert np.count_nonzero(deltas.asnumpyarray()) == 2 * self.batch_size, str(np.count_nonzero(deltas.asnumpyarray())) # calculate cost, just in case cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1,1) #print "cost:", cost.asnumpyarray() # clip errors if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas) # perform back-propagation of gradients self.model.bprop(deltas) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) ''' if np.any(rewards < 0): preqq = preq.asnumpyarray().copy() self._setInput(prestates) qvalues = self.model.fprop(self.input, inference = True).asnumpyarray().copy() indexes = rewards < 0 print "indexes:", indexes print "preq:", preqq[:, indexes].T print "preq':", qvalues[:, indexes].T print "diff:", (qvalues[:, indexes]-preqq[:, indexes]).T print "steers:", steers[indexes] print "speeds:", speeds[indexes] print "rewards:", rewards[indexes] print "terminals:", terminals[indexes] print "preq[0]:", preqq[:, 0] print "preq[0]':", qvalues[:, 0] print "diff:", qvalues[:, 0] - preqq[:, 0] print "deltas:", deltas.asnumpyarray()[:, indexes].T raw_input("Press Enter to continue...") ''' # increase number of weight updates (needed for target clone interval) self.train_iterations += 1 def predict(self, states): # minibatch is full size, because Neon doesn't let change the minibatch size assert states.shape == (self.batch_size, self.state_size) # calculate Q-values for the states self._setInput(states) qvalues = self.model.fprop(self.input, inference = True) assert qvalues.shape == (self.num_actions, self.batch_size) if logger.isEnabledFor(logging.DEBUG): logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0])) # transpose the result, so that batch size is first dimension return qvalues.T.asnumpyarray() def load_weights(self, load_path): self.model.load_weights(load_path) def save_weights(self, save_path): save_obj(self.model.serialize(keep_states = True), save_path)
class DeepQNetwork: def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error self.min_reward = args.min_reward self.max_reward = args.max_reward self.batch_norm = args.batch_norm # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self._createLayers(num_actions) self.model = Model(layers = layers) self.cost = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.train_iterations = 0 if args.target_steps: self.target_model = Model(layers = self._createLayers(num_actions)) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model self.callback = None def _createLayers(self, num_actions): # create network init_xavier_conv = Xavier(local=True) init_xavier_affine = Xavier(local=False) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append(Conv((8, 8, 32), strides=4, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append(Conv((4, 4, 64), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append(Conv((3, 3, 64), strides=1, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=512, init=init_xavier_affine, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init = init_xavier_affine)) return layers def _setInput(self, states): # change order of axes to match what Neon expects states = np.transpose(states, axes = (1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.input.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.input, 255, self.input) def update_target_network(self): # have to serialize also states for batch normalization to work pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) def train(self, minibatch, epoch): # expand components of minibatch prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] # feed-forward pass for poststates to get Q-values self._setInput(poststates) postq = self.target_model.fprop(self.input, inference = True) assert postq.shape == (self.num_actions, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # feed-forward pass for prestates self._setInput(prestates) preq = self.model.fprop(self.input, inference = False) assert preq.shape == (self.num_actions, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray().copy() # clip rewards between -1 and 1 rewards = np.clip(rewards, self.min_reward, self.max_reward) # update Q-value targets for actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i] # copy targets to GPU memory self.targets.set(targets) # calculate errors deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) #assert np.count_nonzero(deltas.asnumpyarray()) == 32 # calculate cost, just in case cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1,1) # clip errors if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas) # perform back-propagation of gradients self.model.bprop(deltas) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for stats callback) self.train_iterations += 1 # calculate statistics if self.callback: self.callback.on_train(cost[0,0]) def predict(self, states): # minibatch is full size, because Neon doesn't let change the minibatch size assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim) # calculate Q-values for the states self._setInput(states) qvalues = self.model.fprop(self.input, inference = True) assert qvalues.shape == (self.num_actions, self.batch_size) if logger.isEnabledFor(logging.DEBUG): logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0])) # transpose the result, so that batch size is first dimension return qvalues.T.asnumpyarray() def load_weights(self, load_path): self.model.load_params(load_path) def save_weights(self, save_path): self.model.save_params(save_path)
model_desc = ModelDescription(load_obj(args.save_model_file)) for layer in segnet_model.layers_to_optimize: name = layer.name trained_layer = model_desc.getlayer(name) layer.load_weights(trained_layer) fig = plt.figure() if args.display: plt.ion() im1 = None im2 = None cnt = 1 for x, t in test_set: z = segnet_model.fprop(x).get() z = np.argmax(z.reshape((c, h, w)), axis=0) t = np.argmax(t.get().reshape((c, h, w)), axis=0) # calculate the misclass rate acc = (np.where(z == t)[0].size / float(z.size)) * 100.0 plt.subplot(2, 1, 1) if im1 is None: im1 = plt.imshow(t) plt.title('Truth') else: im1.set_data(t) plt.subplot(2, 1, 2)
end = drv.Event() num_iterations = config.num_warmup_iters + config.num_timing_iters forward_time = np.zeros(config.num_timing_iters) backward_time = np.zeros(config.num_timing_iters) iter = 0 flag = True while flag: for (x, t) in data: iter += 1 if iter > num_iterations: flag = False break if iter > config.num_warmup_iters: # time it if config.backend == 'cpu': s = time.time()*1000 x = network.fprop(x) cost_iter = network.cost.get_cost(x, t) e = time.time()*1000 # in milliseconds forward_time[iter - config.num_warmup_iters - 1] = e - s s = time.time()*1000 delta = network.cost.get_errors(x, t) # gradient of the cost network.bprop(delta) e = time.time()*1000 backward_time[iter - config.num_warmup_iters - 1] = e - s else: start.record() x = network.fprop(x) cost_iter = network.cost.get_cost(x, t) end.record() end.synchronize() forward_time[iter - config.num_warmup_iters - 1] \
def test_reshape_layer_model(backend_default, fargs): """ test cases: - conv before RNNs - conv after RNNs - conv after LUT """ np.random.seed(seed=0) nin, nout, bsz = fargs be = backend_default be.bsz = bsz input_size = (nin, be.bsz) init = Uniform(-0.1, 0.1) g_uni = GlorotUniform() inp_np = np.random.rand(nin, be.bsz) delta_np = np.random.rand(nout, be.bsz) inp = be.array(inp_np) delta = be.array(delta_np) conv_lut_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Reshape(reshape=(4, 100, -1)), Conv((3, 3, 16), init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Affine(nout, init, bias=init, activation=Softmax()) ] conv_lut_2 = [ LookupTable(vocab_size=1000, embedding_dim=400, init=init), Reshape(reshape=(4, 50, -1)), Conv((3, 3, 16), init=init), Pooling(2, strides=2), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] conv_rnn_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), Reshape(reshape=(4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] conv_rnn_2 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Recurrent(64, g_uni, activation=Tanh(), reset_cells=True), Reshape(reshape=(4, -1, 32)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] lut_sum_1 = [ LookupTable(vocab_size=1000, embedding_dim=128, init=init), RecurrentSum(), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] lut_birnn_1 = [ LookupTable(vocab_size=1000, embedding_dim=200, init=init), DeepBiRNN(32, init=GlorotUniform(), batch_norm=True, activation=Tanh(), reset_cells=True, depth=1), Reshape((4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout=nout, init=init, bias=init, activation=Softmax()) ] layers_test = [conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1] for lg in layers_test: model = Model(layers=lg) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(input_size, cost) model.fprop(inp) model.bprop(delta)
# Cropping the image patch = im[top:bottom, left:right, :] # Neon wants the data as a flat array organised as [RRRRR...GGGGG...BBBB] patch_array = patch.transpose((2, 0, 1)).flatten() # make an image buffer on host, pad out to batch size host_buf = np.zeros((3*patch_height*patch_width, model.be.bsz)) # set the first image to be the image data loaded above host_buf[:, 0] = patch_array.copy() # make buffer on the device dev_buf = model.be.zeros((3*patch_height*patch_width, model.be.bsz)) # copy host buffer to device buffer dev_buf[:] = host_buf # Send through the network. Note that in the returned array there # will be one column for each item in the batch; as we only put data # in the first item, we only want the first column predictions = model.fprop(dev_buf, True).asnumpyarray()[:,0] # print predictions # Print the activations of the 4th layer from the end of the model # Note 1: model.layers represents a SingleOutputTree when using GoogLeNet; # during inference only the main branch (index 0) outputs are considered # Note 2: in the returned array there will be one column for each item # in the batch; as we only put data in the first item, we only want the # first column ndprint(model.layers.layers[0].layers[int(args.layer_index)].outputs.asnumpyarray()[:,0])