class MostCommonWordSense: def __init__(self, rounding, callback_args, epochs): # setup weight initialization function self.init = Gaussian(loc=0.0, scale=0.01) # setup optimizer self.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9, stochastic_round=rounding) # setup cost function as CrossEntropy self.cost = GeneralizedCost(costfunc=SumSquared()) self.epochs = epochs self.model = None self.callback_args = callback_args def build(self): # setup model layers layers = [Affine(nout=100, init=self.init, bias=self.init, activation=Rectlin()), Affine(nout=2, init=self.init, bias=self.init, activation=Softmax())] # initialize model object self.model = Model(layers=layers) def fit(self, valid_set, train_set): # configure callbacks callbacks = Callbacks(self.model, eval_set=valid_set, **self.callback_args) self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs, cost=self.cost, callbacks=callbacks) def save(self, save_path): self.model.save_params(save_path) def load(self, model_path): self.model = Model(model_path) def eval(self, valid_set): eval_rate = self.model.eval(valid_set, metric=Misclassification()) return eval_rate def get_outputs(self, valid_set): return self.model.get_outputs(valid_set)
class DQNNeon(Learner): """ This class is an implementation of the DQN network based on Neon. The modules that interact with the agent, the replay memory and the statistic calls are implemented here, taking the individual requirements of the Lasagne framework into account. The code is adapted from: https://github.com/tambetm/simple_dqn Attributes: input_shape (tuple[int]): Dimension of the network input. dummy_batch (numpy.ndarray): Dummy batche used to calculate Q-values for single states. batch_norm (bool): Indicates if normalization is wanted for a certain layer (default=False). be (neon.backends.nervanagpu.NervanaGPU): Describes the backend for the Neon implementation. input (neon.backends.nervanagpu.GPUTensor): Definition of network input shape. targets(neon.backends.nervanagpu.GPUTensor): Definition of network output shape. model (neon.models.model.Model): Generated Neon model. target_model (neon.models.model.Model): Generated target Neon model. cost_func (neon.layers.layer.GeneralizedCost): Cost function for model training. callback (Statistics): Hook for the statistics object to pass train and test information. Note: More attributes of this class are defined in the base class Learner. """ def __init__(self, env, args, rng, name = "DQNNeon"): """ Initializes a network based on the Neon framework. Args: env (AtariEnv): The envirnoment in which the agent actuates. args (argparse.Namespace): All settings either with a default value or set via command line arguments. rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator. name (str): The name of the network object. Note: This function should always call the base class first to initialize the common values for the networks. """ _logger.info("Initializing new object of type " + str(type(self).__name__)) super(DQNNeon, self).__init__(env, args, rng, name) self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,) self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8) self.batch_norm = args.batch_norm self.be = gen_backend( backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.output_shape, self.batch_size)) # create model layers = self._create_layer() self.model = Model(layers = layers) self.cost_func = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost_func) self._set_optimizer() if not self.args.load_weights == None: self.load_weights(self.args.load_weights) # create target model if self.target_update_frequency: layers = self._create_layer() self.target_model = Model(layers) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) else: self.target_model = self.model self.callback = None _logger.debug("%s" % self) def _create_layer(self): """ Build a network consistent with the DeepMind Nature paper. """ _logger.debug("Output shape = %d" % self.output_shape) # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append( Affine( nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append( Affine( nout= self.output_shape, init = init_norm)) return layers def _set_optimizer(self): """ Initializes the selected optimization algorithm. """ _logger.debug("Optimizer = %s" % str(self.args.optimizer)) if self.args.optimizer == 'rmsprop': self.optimizer = RMSProp( learning_rate = self.args.learning_rate, decay_rate = self.args.decay_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adam': self.optimizer = Adam( learning_rate = self.args.learning_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adadelta': self.optimizer = Adadelta( decay = self.args.decay_rate, stochastic_round = self.args.stochastic_round) else: assert false, "Unknown optimizer" def _prepare_network_input(self, states): """ Transforms and normalizes the states from one minibatch. Args: states (): a set of states with the size of minibatch """ _logger.debug("Normalizing and transforming input") # change order of axes to match what Neon expects states = np.transpose(states, axes = (1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.input.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.input, self.grayscales, self.input) def train(self, minibatch, epoch): """ Prepare, perform and document a complete train step for one minibatch. Args: minibatch (numpy.ndarray): Mini-batch of states, shape=(batch_size,sequence_length,frame_width,frame_height) epoch (int): Current train epoch """ _logger.debug("Complete trainig step for one minibatch") prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] # feed-forward pass for poststates to get Q-values self._prepare_network_input(poststates) postq = self.target_model.fprop(self.input, inference = True) assert postq.shape == (self.output_shape, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # average maxpostq for stats maxpostq_avg = maxpostq.mean() # feed-forward pass for prestates self._prepare_network_input(prestates) preq = self.model.fprop(self.input, inference = False) assert preq.shape == (self.output_shape, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray() # clip rewards between -1 and 1 rewards = np.clip(rewards, self.min_reward, self.max_reward) # update Q-value targets for each state only at actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i] # copy targets to GPU memory self.targets.set(targets) # calculate errors errors = self.cost_func.get_errors(preq, self.targets) assert errors.shape == (self.output_shape, self.batch_size) # average error where there is a error (should be 1 in every row) #TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.]) # clip errors if self.clip_error: self.be.clip(errors, -self.clip_error, self.clip_error, out = errors) # calculate cost, just in case cost = self.cost_func.get_cost(preq, self.targets) assert cost.shape == (1,1) # perform back-propagation of gradients self.model.bprop(errors) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.update_iterations += 1 if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0: self._copy_theta() if isinstance(cost, np.ndarray): _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost[0][0]), str(maxpostq_avg))) else: _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg))) # update statistics if self.callback: if isinstance(cost, np.ndarray): self.callback.from_learner(cost[0,0], maxpostq_avg) else: self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg) def get_Q(self, state): """ Calculates the Q-values for one mini-batch. Args: state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height). Returns: q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,) """ _logger.debug("State shape = %s" % str(state.shape)) # minibatch is full size, because Neon doesn't let change the minibatch size # so we need to run 32 forward steps to get the one we actually want self.dummy_batch[0] = state states = self.dummy_batch assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims) # calculate Q-values for the states self._prepare_network_input(states) qvalues = self.model.fprop(self.input, inference = True) assert qvalues.shape == (self.output_shape, self.batch_size) _logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0]))) return qvalues.asnumpyarray()[:,0] def _copy_theta(self): """ Copies the weights of the current network to the target network. """ _logger.debug("Copying weights") pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) def save_weights(self, target_dir, epoch): """ Saves the current network parameters to disk. Args: target_dir (str): Directory where the network parameters are stored for each episode. epoch (int): Current epoch. """ filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.learner_type.lower()), str(self.args.optimizer.lower()), (epoch + 1)) self.model.save_params(os.path.join(target_dir, filename)) def load_weights(self, source_file): """ Loads the network parameters from a given file. Args: source_file (str): Complete path to a file with network parameters. """ self.model.load_params(source_file)
momentum_coef=0.9, wdecay=0.0005) opt_b = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b}, name='multiopt') # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, metric=Misclassification(), **args.callback_args) callbacks.add_callback( TrainByStageCallback(model, valid_set, Misclassification(), max_patience=5)) num_prune = [5, 10, 25, 10] callbacks.add_callback( FuzzyPruneCallback(num_states=100, num_prune=num_prune, model=model)) print('Original Accuracy = %.2f%%' % (100. - model.eval(valid_set, metric=Misclassification()) * 100)) logger.info('Training ...') model.fit(train_set, optimizer=opt, num_epochs=250, cost=cost, callbacks=callbacks) print('Accuracy = %.2f%%' % (100. - model.eval(valid_set, metric=Misclassification()) * 100)) model.save_params('./models/mnist/mnistfp.pkl')
def test_model_serialize(backend_default, data): (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data) train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential([ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) path2 = Sequential([ Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) layers = [ MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp.initialize(train_set, cost=mlp.cost) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model mlp.save_params(tmp_save, keep_states=True) # Load model mlp = Model(tmp_save) mlp.initialize(train_set) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): assert type(p) == type(p_e) if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) elif isinstance(p, np.ndarray): assert np.allclose(p, p_e) else: assert p == p_e os.remove(tmp_save)
class NpSemanticSegClassifier: """ NP Semantic Segmentation classifier model (based on Neon framework). Args: num_epochs(int): number of epochs to train the model **callback_args (dict): callback args keyword arguments to init a Callback for the model cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is 'neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)' """ def __init__(self, num_epochs, callback_args, optimizer=GradientDescentMomentum(0.07, momentum_coef=0.9)): """ Args: num_epochs(int): number of epochs to train the model **callback_args (dict): callback args keyword arguments to init Callback for the model cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is `neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)` """ self.model = None self.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) self.optimizer = optimizer self.epochs = num_epochs self.callback_args = callback_args def build(self): """ Build the model's layers """ first_layer_dens = 64 second_layer_dens = 64 output_layer_dens = 2 # setup weight initialization function init_norm = Gaussian(scale=0.01) # setup model layers layers = [Affine(nout=first_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=second_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=output_layer_dens, init=init_norm, activation=Logistic(shortcut=True))] # initialize model object self.model = Model(layers=layers) def fit(self, test_set, train_set): """ Train and fit the model on the datasets Args: test_set (:obj:`neon.data.ArrayIterators`): The test set train_set (:obj:`neon.data.ArrayIterators`): The train set args: callback_args and epochs from ArgParser input """ # configure callbacks callbacks = Callbacks(self.model, eval_set=test_set, **self.callback_args) self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs, cost=self.cost, callbacks=callbacks) def save(self, model_path): """ Save the model's prm file in model_path location Args: model_path(str): local path for saving the model """ self.model.save_params(model_path) def load(self, model_path): """ Load pre-trained model's .prm file to NpSemanticSegClassifier object Args: model_path(str): local path for loading the model """ self.model = Model(model_path) def eval(self, test_set): """ Evaluate the model's test_set on error_rate, test_accuracy_rate and precision_recall_rate Args: test_set (ArrayIterator): The test set Returns: tuple(int): error_rate, test_accuracy_rate and precision_recall_rate """ error_rate = self.model.eval(test_set, metric=Misclassification()) test_accuracy_rate = self.model.eval(test_set, metric=Accuracy()) precision_recall_rate = self.model.eval(test_set, metric=PrecisionRecall(2)) return error_rate, test_accuracy_rate, precision_recall_rate def get_outputs(self, test_set): """ Classify the dataset on the model Args: test_set (:obj:`neon.data.ArrayIterators`): The test set Returns: list(float): model's predictions """ return self.model.get_outputs(test_set)
Affine(nout=4, init=init_uni, activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) # Create model mlp = Model(layers=layers) callbacks = Callbacks(mlp, eval_set=test) # Track cost function # Train model mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # Check performance print 'Misclassification error = %.1f%%' % (mlp.eval(test, metric=Misclassification())*100) # Save trained model mlp.save_params(param_file_name) # Sanity check from PIL import Image import numpy as np from neon.data.dataiterator import ArrayIterator W = img_size H = img_size L = W*H*3 size = H, W x_new = np.zeros((128, L), dtype=np.float32) def load_sample(test_file_name):
def ShiftAdaMax_with_Scale(LR=1): return ShiftAdaMax(learning_rate=LR_start * LR, schedule=ShiftSchedule(2, shift_size=1)) optimizer = MultiOptimizer({ 'default': ShiftAdaMax_with_Scale(), 'BinaryLinear_0': ShiftAdaMax_with_Scale(57.038), 'BinaryLinear_1': ShiftAdaMax_with_Scale(73.9008), 'BinaryLinear_2': ShiftAdaMax_with_Scale(73.9008), 'BinaryLinear_3': ShiftAdaMax_with_Scale(52.3195) }) # initialize model object bnn = Model(layers=layers) # configure callbacks callbacks = Callbacks(bnn, eval_set=val_set, **args.callback_args) # run fit bnn.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (bnn.eval(val_set, metric=Misclassification()) * 100)) bnn.save_params("bin_model/final_model.prm")
class ModelRunnerNeon(): def __init__(self, args, max_action_no, batch_dimension): self.args = args self.train_batch_size = args.train_batch_size self.discount_factor = args.discount_factor self.use_gpu_replay_mem = args.use_gpu_replay_mem self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size) self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0]) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((max_action_no, self.train_batch_size)) if self.use_gpu_replay_mem: self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8) self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8) else: self.history_buffer = np.zeros(batch_dimension, dtype=np.float32) self.train_net = Model(self.create_layers(max_action_no)) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.train_net.layers.layers: l.parallelism = 'Disabled' self.train_net.initialize(self.input_shape[:-1], self.cost) self.target_net = Model(self.create_layers(max_action_no)) # Bug fix for l in self.target_net.layers.layers: l.parallelism = 'Disabled' self.target_net.initialize(self.input_shape[:-1]) if self.args.optimizer == 'Adam': # Adam self.optimizer = Adam(beta_1=args.rms_decay, beta_2=args.rms_decay, learning_rate=args.learning_rate) else: # Neon RMSProp self.optimizer = RMSProp(decay_rate=args.rms_decay, learning_rate=args.learning_rate) self.max_action_no = max_action_no self.running = True def get_initializer(self, input_size): dnnInit = self.args.dnn_initializer if dnnInit == 'xavier': initializer = Xavier() elif dnnInit == 'fan_in': std_dev = 1.0 / math.sqrt(input_size) initializer = Uniform(low=-std_dev, high=std_dev) else: initializer = Gaussian(0, 0.01) return initializer def create_layers(self, max_action_no): layers = [] initializer = self.get_initializer(input_size=4 * 8 * 8) layers.append( Conv(fshape=(8, 8, 32), strides=4, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=32 * 4 * 4) layers.append( Conv(fshape=(4, 4, 64), strides=2, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=64 * 3 * 3) layers.append( Conv(fshape=(3, 3, 64), strides=1, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=7 * 7 * 64) layers.append( Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size=512) layers.append( Affine(nout=max_action_no, init=initializer, bias=initializer)) return layers def clip_reward(self, reward): if reward > self.args.clip_reward_high: return self.args.clip_reward_high elif reward < self.args.clip_reward_low: return self.args.clip_reward_low else: return reward def set_input(self, data): if self.use_gpu_replay_mem: self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0)) self.input[:] = self.input_uint8 / 255 else: self.input.set(data.transpose(1, 2, 3, 0).copy()) self.be.divide(self.input, 255, self.input) def predict(self, history_buffer): self.set_input(history_buffer) output = self.train_net.fprop(self.input, inference=True) return output.T.asnumpyarray()[0] def print_weights(self): pass def train(self, minibatch, replay_memory, learning_rate, debug): if self.args.prioritized_replay == True: prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch else: prestates, actions, rewards, poststates, terminals = minibatch # Get Q*(s, a) with targetNet self.set_input(poststates) post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray() if self.args.double_dqn == True: # Get Q*(s, a) with trainNet post_qvalue2 = self.train_net.fprop( self.input, inference=True).T.asnumpyarray() # Get Q(s, a) with trainNet self.set_input(prestates) pre_qvalue = self.train_net.fprop(self.input, inference=False) label = pre_qvalue.asnumpyarray().copy() for i in range(0, self.train_batch_size): if self.args.clip_reward: reward = self.clip_reward(rewards[i]) else: reward = rewards[i] if terminals[i]: label[actions[i], i] = reward else: if self.args.double_dqn == True: max_index = np.argmax(post_qvalue2[i]) label[actions[i], i] = reward + self.discount_factor * post_qvalue[i][ max_index] else: label[actions[i], i] = reward + self.discount_factor * np.max( post_qvalue[i]) # copy targets to GPU memory self.targets.set(label) delta = self.cost.get_errors(pre_qvalue, self.targets) if self.args.prioritized_replay == True: delta_value = delta.asnumpyarray() for i in range(self.train_batch_size): if debug: print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % ( i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i]) replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i])) delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i] delta.set(delta_value.copy()) if self.args.clip_loss: self.be.clip(delta, -1.0, 1.0, out=delta) self.train_net.bprop(delta) self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0) def update_model(self): # have to serialize also states for batch normalization to work pdict = self.train_net.get_description(get_weights=True, keep_states=True) self.target_net.deserialize(pdict, load_states=True) #print ('Updated target model') def finish_train(self): self.running = False def load(self, file_name): self.train_net.load_params(file_name) self.update_model() def save(self, file_name): self.train_net.save_params(file_name)
lunaModel.load_params(args.model_file) # configure callbacks if args.callback_args['eval_freq'] is None: args.callback_args['eval_freq'] = 1 # configure callbacks callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args) # add a callback that saves the best model state callbacks.add_save_best_state_callback( 'LUNA16_VGG_model_no_batch_sigmoid_pretrained.prm') if args.deconv: callbacks.add_deconv_callback(train_set, valid_set) lunaModel.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) lunaModel.save_params('LUNA16_VGG_model_no_batch_sigmoid_pretrained.prm') neon_logger.display( 'Calculating metrics on the test set. This could take a while...') neon_logger.display('Misclassification error (test) = {:.2f}%'.format( lunaModel.eval(test_set, metric=Misclassification())[0] * 100)) neon_logger.display('Precision/recall (test) = {}'.format( lunaModel.eval(test_set, metric=PrecisionRecall(num_classes=2))))
if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file lunaModel.load_params(args.model_file) # configure callbacks #callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args) callbacks = Callbacks(lunaModel, eval_set=valid_set, metric=Misclassification(), **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train_set, valid_set) lunaModel.fit(train_set, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) lunaModel.save_params('LUNA16_simpleCNN_model.prm') neon_logger.display( 'Finished training. Calculating error on the validation set...') neon_logger.display('Misclassification error = {:.2f}%'.format( lunaModel.eval(valid_set, metric=Misclassification())[0] * 100)) neon_logger.display('Precision/recall = {}'.format( lunaModel.eval(valid_set, metric=PrecisionRecall(num_classes=2))))
if args.callback_args['eval_freq'] is None: args.callback_args['eval_freq'] = 1 # configure callbacks callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args) #callbacks.add_early_stop_callback(stop_func) #callbacks.add_save_best_state_callback(os.path.join(args.data_dir, "early_stop-best_state.pkl")) callbacks.add_early_stop_callback(stop_func) callbacks.add_save_best_state_callback(os.path.join(args.data_dir, "early_stop-best_state.pkl")) # run fit mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) #evaluate model print('Evaluation Error = %.4f'%(mlp.eval(valid_set, metric=SmoothL1Metric()))) print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric()))) # Saving the model print 'Saving model parameters!' mlp.save_params("jobwait_model.prm") # Reloading saved model # This should go in run.py mlp=Model("jobwait_model.prm") print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric()))) # save the preprocessor vectors: np.savez("jobwait_preproc", mean=std_scale.mean_, std=std_scale.scale_)
Y = data.train_label-1 X_test = data.test_data Y_test = data.test_label-1 train_set = ArrayIterator(X=X, y=Y, nclass=11, lshape=(1,200,200)) test_set = ArrayIterator(X_test, None, nclass=11, lshape=(1,200,200)) init_uni = Uniform(low=-0.1, high=0.1) layers = [Conv(fshape=(4,4,16), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Conv(fshape=(4,4,32), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Conv(fshape=(4,4,32), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Affine(nout=500, init=init_uni, activation=Rectlin()), Affine(nout=11, init=init_uni, activation=Softmax())] model = Model(layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = GradientDescentMomentum(learning_rate=0.005, momentum_coef=0.9) callbacks = Callbacks(model, train_set) model.fit(dataset=train_set, cost=cost, optimizer=optimizer, num_epochs=40, callbacks=callbacks) model.save_params('model.pkl') # out = model.get_outputs(test_set) # row = len(Y_test) # result = np.zeros((row,1)) # i=0 # while i<row: # result[i] = out[i].argmax() # i=i+1 # np.save('result.npy', result)
cost = GeneralizedCost(costfunc=CrossEntropyMulti()) from neon.optimizers import GradientDescentMomentum, RMSProp optimizer = GradientDescentMomentum(learning_rate=0.005, momentum_coef=0.9) # Set up callbacks. By default sets up a progress bar from neon.callbacks.callbacks import Callbacks callbacks = Callbacks(model, train_set) model.fit(dataset=train_set, cost=cost, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks) model.save_params("cifar10_model.prm") # Evaluate performance from neon.transforms import Misclassification error_pct = 100 * model.eval(test_set, metric=Misclassification()) print 'Misclassification error = %.1f%%' % error_pct # Sanity check 1 # an image of a frog from wikipedia # img_source = "https://upload.wikimedia.org/wikipedia/commons/thumb/5/55/Atelopus_zeteki1.jpg/440px-Atelopus_zeteki1.jpg" # import urllib # urllib.urlretrieve(img_source, filename="image.jpg") from PIL import Image import numpy as np
class NpSemanticSegClassifier: """ NP Semantic Segmentation classifier model (based on Neon framework). Args: num_epochs(int): number of epochs to train the model **callback_args (dict): callback args keyword arguments to init a Callback for the model cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is 'neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)' """ def __init__(self, num_epochs, callback_args, optimizer=GradientDescentMomentum(0.07, momentum_coef=0.9)): """ Args: num_epochs(int): number of epochs to train the model **callback_args (dict): callback args keyword arguments to init Callback for the model cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is `neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)` """ self.model = None self.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) self.optimizer = optimizer self.epochs = num_epochs self.callback_args = callback_args def build(self): """ Build the model's layers """ first_layer_dens = 64 second_layer_dens = 64 output_layer_dens = 2 # setup weight initialization function init_norm = Gaussian(scale=0.01) # setup model layers layers = [ Affine(nout=first_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=second_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=output_layer_dens, init=init_norm, activation=Logistic(shortcut=True)) ] # initialize model object self.model = Model(layers=layers) def fit(self, test_set, train_set): """ Train and fit the model on the datasets Args: test_set (:obj:`neon.data.ArrayIterators`): The test set train_set (:obj:`neon.data.ArrayIterators`): The train set args: callback_args and epochs from ArgParser input """ # configure callbacks callbacks = Callbacks(self.model, eval_set=test_set, **self.callback_args) self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs, cost=self.cost, callbacks=callbacks) def save(self, model_path): """ Save the model's prm file in model_path location Args: model_path(str): local path for saving the model """ self.model.save_params(model_path) def load(self, model_path): """ Load pre-trained model's .prm file to NpSemanticSegClassifier object Args: model_path(str): local path for loading the model """ self.model = Model(model_path) def eval(self, test_set): """ Evaluate the model's test_set on error_rate, test_accuracy_rate and precision_recall_rate Args: test_set (ArrayIterator): The test set Returns: tuple(int): error_rate, test_accuracy_rate and precision_recall_rate """ error_rate = self.model.eval(test_set, metric=Misclassification()) test_accuracy_rate = self.model.eval(test_set, metric=Accuracy()) precision_recall_rate = self.model.eval(test_set, metric=PrecisionRecall(2)) return error_rate, test_accuracy_rate, precision_recall_rate def get_outputs(self, test_set): """ Classify the dataset on the model Args: test_set (:obj:`neon.data.ArrayIterators`): The test set Returns: list(float): model's predictions """ return self.model.get_outputs(test_set)
class DeepQNetwork: def __init__(self, num_actions, batch_size=32, discount_rate=0.99, history_length=4, cols=64, rows=64, clip_error=1, min_reward=-1, max_reward=1, batch_norm=False): self.num_actions = num_actions self.batch_size = batch_size self.discount_rate = discount_rate self.history_length = history_length self.board_dim = (cols, rows) self.clip_error = clip_error self.min_reward = min_reward self.max_reward = max_reward self.batch_norm = batch_norm self.be = gen_backend(backend='gpu', batch_size=self.batch_size, datatype=np.dtype('float32').type) self.input_shape = (self.history_length, ) + self.board_dim + ( self.batch_size, ) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # hack from simple_dqn "needed for convolutional networks" self.targets = self.be.empty((self.num_actions, self.batch_size)) layers = self._createLayers(self.num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) # for l in self.model.layers.layers: # l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], cost=self.cost) self.optimizer = RMSProp(learning_rate=0.002, decay_rate=0.95, stochastic_round=True) self.train_iterations = 0 self.target_model = Model(layers=self._createLayers(num_actions)) # for l in self.target_model.layers.layers: # l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.callback = None def _createLayers(self, num_actions): init_xavier_conv = Xavier(local=True) init_xavier_affine = Xavier(local=False) layers = [] layers.append( Conv((8, 8, 32), strides=4, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Conv((4, 4, 64), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Conv((2, 2, 128), strides=1, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append( Affine(nout=256, init=init_xavier_affine, activation=Rectlin(), batch_norm=self.batch_norm)) layers.append(Affine(nout=num_actions, init=init_xavier_affine)) return layers def _setInput(self, states): states = np.transpose(states, axes=(1, 2, 3, 0)) self.input.set(states.copy()) self.be.add(self.input, 1, self.input) self.be.divide(self.input, 2, self.input) def update_target_network(self): pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) def train(self, minibatch, epoch): prestates, actions, rewards, poststates, terminals = minibatch self._setInput(poststates) postq = self.target_model.fprop(self.input, inference=True) assert postq.shape == (self.num_actions, self.batch_size) maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) self._setInput(prestates) preq = self.model.fprop(self.input, inference=False) assert preq.shape == (self.num_actions, self.batch_size) targets = preq.asnumpyarray().copy() rewards = np.clip(rewards, -1, 1) for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float( rewards[i]) + self.discount_rate * maxpostq[0, i] self.targets.set(targets) deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1, 1) if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas) self.model.bprop(deltas) self.optimizer.optimize(self.model.layers_to_optimize, epoch) self.train_iterations += 1 self.callback.on_train(cost[0, 0]) def predict(self, states): assert states.shape == (( self.batch_size, self.history_length, ) + self.board_dim) self._setInput(states) qvalues = self.model.fprop(self.input, inference=True) assert qvalues.shape == (self.num_actions, self.batch_size) return qvalues.T.asnumpyarray() def load_weights(self, load_path): self.model.load_params(load_path) def save_weights(self, save_path): self.model.save_params(save_path)
class WordseqRegressor(): def __init__(self, pickle_model="", datadir=None): self.maxlen = 100 self.n_words = 100000 parser = NeonArgparser(__doc__) self.args = parser.parse_args() self.args.batch_size = self.batch_size = 2048 # self.args.deterministic = None self.args.rng_seed = 0 print extract_valid_args(self.args, gen_backend) self.be = gen_backend(**extract_valid_args(self.args, gen_backend)) embedding_dim = 100 init_emb = Uniform(-0.1 / embedding_dim, 0.1 / embedding_dim) init_glorot = GlorotUniform() self.layers = [ LookupTable(vocab_size=self.n_words, embedding_dim=embedding_dim, init=init_emb, pad_idx=0, update=True, name="LookupTable"), Dropout(keep=0.5), BiLSTM(100, init=init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True, split_inputs=False, name="BiLSTM"), RecurrentMean(), Affine(1, init_glorot, bias=init_glorot, activation=Identity(), name="Affine") ] self.wordbatch = wordbatch.WordBatch(normalize_text, n_words=self.n_words, extractors=[(wordbatch.WordSeq, { "seq_maxlen": self.maxlen })]) if datadir == None: self.model = Model(self.layers) self.model.load_params(pickle_model) self.wordbatch = pkl.load(gzip.open(pickle_model + ".wb", 'rb')) else: self.train(datadir, pickle_model) def remove_unks(self, x): return [[self.n_words if w >= self.n_words else w for w in sen] for sen in x] def format_texts(self, texts): return self.remove_unks(self.wordbatch.transform(texts)) class ThreadWithReturnValue(Thread): def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, Verbose=None): Thread.__init__(self, group, target, name, args, kwargs, Verbose) self._return = None def run(self): if self._Thread__target is not None: self._return = self._Thread__target(*self._Thread__args, **self._Thread__kwargs) def join(self): Thread.join(self) return self._return def train(self, datadir, pickle_model=""): texts = [] labels = [] training_data = os.listdir(datadir) rcount = 0 texts2 = [] batchsize = 100000 t = None for jsonfile in training_data: with open(datadir + "/" + jsonfile, u'r') as inputfile: for line in inputfile: #if rcount > 1000000: break try: line = json.loads(line.strip()) except: continue for review in line["Reviews"]: rcount += 1 if rcount % 100000 == 0: print rcount if rcount % 8 != 0: continue if "Overall" not in review["Ratings"]: continue texts.append(review["Content"]) labels.append( (float(review["Ratings"]["Overall"]) - 3) * 0.5) if len(texts) % batchsize == 0: if t != None: texts2.append(t.join()) t = self.ThreadWithReturnValue( target=self.wordbatch.transform, args=(texts, )) t.start() texts = [] texts2.append(t.join()) texts2.append(self.wordbatch.transform(texts)) del (texts) texts = sp.vstack(texts2) self.wordbatch.dictionary_freeze = True train = [ np.asarray(texts, dtype='int32'), np.asanyarray(labels, dtype='float32') ] train[1].shape = (train[1].shape[0], 1) num_epochs = 10 cost = GeneralizedCost(costfunc=SumSquared()) self.model = Model(layers=self.layers) optimizer = Adam(learning_rate=0.01) index_shuf = list(range(len(train[0]))) random.shuffle(index_shuf) train[0] = np.asarray([train[0][x] for x in index_shuf], dtype='int32') train[1] = np.asarray([train[1][x] for x in index_shuf], dtype='float32') train_iter = ArrayIterator(train[0], train[1], nclass=1, make_onehot=False) self.model.fit(train_iter, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=Callbacks(self.model, **self.args.callback_args)) if pickle_model != "": self.model.save_params(pickle_model) with gzip.open(pickle_model + ".wb", 'wb') as model_file: pkl.dump(self.wordbatch, model_file, protocol=2) def predict_batch(self, texts): input = np.array(self.format_texts(texts)) output = np.zeros((texts.shape[0], 1)) test = ArrayIterator(input, output, nclass=1, make_onehot=False) results = [row[0] for row in self.model.get_outputs(test)] return results
from neon.layers import GeneralizedCost from neon.transforms import CrossEntropyMulti cost = GeneralizedCost(costfunc=CrossEntropyMulti()) from neon.optimizers import GradientDescentMomentum, RMSProp optimizer = GradientDescentMomentum(learning_rate=0.005, momentum_coef=0.9) # Set up callbacks. By default sets up a progress bar from neon.callbacks.callbacks import Callbacks callbacks = Callbacks(model, train_set) model.fit(dataset=train_set, cost=cost, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks) model.save_params("cifar10_model.prm") # Evaluate performance from neon.transforms import Misclassification error_pct = 100 * model.eval(test_set, metric=Misclassification()) print 'Misclassification error = %.1f%%' % error_pct # Sanity check 1 # an image of a frog from wikipedia # img_source = "https://upload.wikimedia.org/wikipedia/commons/thumb/5/55/Atelopus_zeteki1.jpg/440px-Atelopus_zeteki1.jpg" # import urllib # urllib.urlretrieve(img_source, filename="image.jpg") from PIL import Image import numpy as np
class SequenceChunker(object): """ Sequence chunker model (Neon based) Args: sentence_length (str): max sentence length token_vocab_size (int): word vocabulary size pos_vocab_size (int, optional): POS vocabulary size char_vocab_size (int, optional): characters vocabulary size max_char_word_length (int, optional): max word length in characters token_embedding_size (int, optional): word embedding dims pos_embedding_size (int, optional): POS embedding dims char_embedding_size (int, optional): character embedding dims num_labels (int, optional): number of output labels possible per token lstm_hidden_size (int, optional): LSTM hidden size num_lstm_layers (int, optional): number of LSTM layers use_external_embedding (bool, optional): input is provided as external word embedding dropout (float, optional): dropout rate """ def __init__(self, sentence_length, token_vocab_size, pos_vocab_size=None, char_vocab_size=None, max_char_word_length=20, token_embedding_size=None, pos_embedding_size=None, char_embedding_size=None, num_labels=None, lstm_hidden_size=100, num_lstm_layers=1, use_external_embedding=None, dropout=0.5 ): init = GlorotUniform() tokens = [] if use_external_embedding is None: tokens.append(LookupTable(vocab_size=token_vocab_size, embedding_dim=token_embedding_size, init=init, pad_idx=0)) else: tokens.append(DataInput()) tokens.append(Reshape((-1, sentence_length))) f_layers = [tokens] # add POS tag input if pos_vocab_size is not None and pos_embedding_size is not None: f_layers.append([ LookupTable(vocab_size=pos_vocab_size, embedding_dim=pos_embedding_size, init=init, pad_idx=0), Reshape((-1, sentence_length)) ]) # add Character RNN input if char_vocab_size is not None and char_embedding_size is not None: char_lut_layer = LookupTable(vocab_size=char_vocab_size, embedding_dim=char_embedding_size, init=init, pad_idx=0) char_nn = [char_lut_layer, TimeDistBiLSTM(char_embedding_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, reset_freq=max_char_word_length), TimeDistributedRecurrentLast(timesteps=max_char_word_length), Reshape((-1, sentence_length))] f_layers.append(char_nn) layers = [] if len(f_layers) == 1: layers.append(f_layers[0][0]) else: layers.append(MergeMultistream(layers=f_layers, merge="stack")) layers.append(Reshape((-1, sentence_length))) layers += [DeepBiLSTM(lstm_hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, depth=num_lstm_layers), Dropout(keep=dropout), Affine(num_labels, init, bias=init, activation=Softmax())] self._model = Model(layers=layers) def fit(self, dataset, optimizer, cost, callbacks, epochs=10): """ fit a model Args: dataset: train/test set of CONLL2000 dataset optimizer: optimizer (Neon based) cost: cost function (Neon based) callbacks: callbacks (Neon based) epochs (int, optional): number of epochs to train """ self._model.fit(dataset, optimizer=optimizer, num_epochs=epochs, cost=cost, callbacks=callbacks) def predict(self, dataset): """ predict output of given dataset Args: dataset: Neon based iterator Returns: prediction on given dataset """ return self._model.get_outputs(dataset) def save(self, path): """ Save model weights to path Args: path (str): path to weights file """ self._model.save_params(path) def get_model(self): """ Get model Returns: Neon model object """ return self._model
class SequenceChunker(object): """ Sequence chunker model (Neon based) Args: sentence_length (str): max sentence length token_vocab_size (int): word vocabulary size pos_vocab_size (int, optional): POS vocabulary size char_vocab_size (int, optional): characters vocabulary size max_char_word_length (int, optional): max word length in characters token_embedding_size (int, optional): word embedding dims pos_embedding_size (int, optional): POS embedding dims char_embedding_size (int, optional): character embedding dims num_labels (int, optional): number of output labels possible per token lstm_hidden_size (int, optional): LSTM hidden size num_lstm_layers (int, optional): number of LSTM layers use_external_embedding (bool, optional): input is provided as external word embedding dropout (float, optional): dropout rate """ def __init__(self, sentence_length, token_vocab_size, pos_vocab_size=None, char_vocab_size=None, max_char_word_length=20, token_embedding_size=None, pos_embedding_size=None, char_embedding_size=None, num_labels=None, lstm_hidden_size=100, num_lstm_layers=1, use_external_embedding=None, dropout=0.5): init = GlorotUniform() tokens = [] if use_external_embedding is None: tokens.append( LookupTable(vocab_size=token_vocab_size, embedding_dim=token_embedding_size, init=init, pad_idx=0)) else: tokens.append(DataInput()) tokens.append(Reshape((-1, sentence_length))) f_layers = [tokens] # add POS tag input if pos_vocab_size is not None and pos_embedding_size is not None: f_layers.append([ LookupTable(vocab_size=pos_vocab_size, embedding_dim=pos_embedding_size, init=init, pad_idx=0), Reshape((-1, sentence_length)) ]) # add Character RNN input if char_vocab_size is not None and char_embedding_size is not None: char_lut_layer = LookupTable(vocab_size=char_vocab_size, embedding_dim=char_embedding_size, init=init, pad_idx=0) char_nn = [ char_lut_layer, TimeDistBiLSTM(char_embedding_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, reset_freq=max_char_word_length), TimeDistributedRecurrentLast(timesteps=max_char_word_length), Reshape((-1, sentence_length)) ] f_layers.append(char_nn) layers = [] if len(f_layers) == 1: layers.append(f_layers[0][0]) else: layers.append(MergeMultistream(layers=f_layers, merge="stack")) layers.append(Reshape((-1, sentence_length))) layers += [ DeepBiLSTM(lstm_hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, depth=num_lstm_layers), Dropout(keep=dropout), Affine(num_labels, init, bias=init, activation=Softmax()) ] self._model = Model(layers=layers) def fit(self, dataset, optimizer, cost, callbacks, epochs=10): """ fit a model Args: dataset: train/test set of CONLL2000 dataset optimizer: optimizer (Neon based) cost: cost function (Neon based) callbacks: callbacks (Neon based) epochs (int, optional): number of epochs to train """ self._model.fit(dataset, optimizer=optimizer, num_epochs=epochs, cost=cost, callbacks=callbacks) def predict(self, dataset): """ predict output of given dataset Args: dataset: Neon based iterator Returns: prediction on given dataset """ return self._model.get_outputs(dataset) def save(self, path): """ Save model weights to path Args: path (str): path to weights file """ self._model.save_params(path) def get_model(self): """ Get model Returns: Neon model object """ return self._model
validation=False, remove_history=False, minimal_set=False, next_N=3) valid = HDF5Iterator(filenames, ndata=(16 * 2014), validation=True, remove_history=False, minimal_set=False, next_N=1) out1, out2, out3 = model.layers.get_terminal() cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)), GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)), GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))]) schedule = ExpSchedule(decay=(1.0 / 50)) # halve the learning rate every 50 epochs opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding, gradient_clip_value=1, gradient_clip_norm=5, wdecay=0.0001, schedule=schedule) callbacks = Callbacks(model, eval_set=valid, metric=TopKMisclassification(5), **args.callback_args) callbacks.add_save_best_state_callback(os.path.join(args.workspace_dir, "best_state_h5resnet.pkl")) model.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) model.save_params(os.path.join(args.workspace_dir, "final_state_h5resnet.pkl"))
] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) # define optimizer opt_w = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, wdecay=0.0005) opt_b = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b}, name='multiopt') # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, metric=Misclassification(), **args.callback_args) callbacks.add_callback( TrainByStageCallback(model, valid_set, Misclassification(), max_patience=5)) logger.info('Training ...') model.fit(train_set, optimizer=opt, num_epochs=250, cost=cost, callbacks=callbacks) print('Accuracy = %.2f%%' % (100. - model.eval(valid_set, metric=Misclassification()) * 100)) model.save_params('./models/mnist/mnist_cnn.pkl')
mlp = Model(layers=layers) callbacks = Callbacks(mlp, eval_set=test) # Track cost function # Train model mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # Check performance print 'Misclassification error = %.1f%%' % ( mlp.eval(test, metric=Misclassification()) * 100) # Save trained model mlp.save_params(param_file_name) # Sanity check 1 from PIL import Image import numpy as np from neon.data.dataiterator import ArrayIterator W = img_size H = img_size L = W * H * 3 size = H, W x_new = np.zeros((128, L), dtype=np.float32) def load_sample(test_file_name): image = Image.open(test_file_name)
lunaModel = Model(layers=vgg_layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file lunaModel.load_params(args.model_file) # configure callbacks #callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args) callbacks = Callbacks(lunaModel, eval_set=valid_set, metric=Misclassification(), **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train_set, valid_set) lunaModel.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) lunaModel.save_params('LUNA16_VGG_model_no_batch.prm') # neon_logger.display('Finished training. Calculating error on the validation set...') # neon_logger.display('Misclassification error (validation) = {:.2f}%'.format(lunaModel.eval(valid_set, metric=Misclassification())[0] * 100)) # neon_logger.display('Precision/recall (validation) = {}'.format(lunaModel.eval(valid_set, metric=PrecisionRecall(num_classes=2)))) # neon_logger.display('Calculating metrics on the test set. This could take a while...') # neon_logger.display('Misclassification error (test) = {:.2f}%'.format(lunaModel.eval(test_set, metric=Misclassification())[0] * 100)) # neon_logger.display('Precision/recall (test) = {}'.format(lunaModel.eval(test_set, metric=PrecisionRecall(num_classes=2))))
class DeepQNetwork: def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error self.min_reward = args.min_reward self.max_reward = args.max_reward self.batch_norm = args.batch_norm # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self._createLayers(num_actions) self.model = Model(layers = layers) self.cost = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers = self._createLayers(num_actions)) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model self.callback = None def _createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init = init_norm)) return layers def _setInput(self, states): # change order of axes to match what Neon expects states = np.transpose(states, axes = (1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.input.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.input, 255, self.input) def train(self, minibatch, epoch): # expand components of minibatch prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] if self.target_steps and self.train_iterations % self.target_steps == 0: # have to serialize also states for batch normalization to work pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) # feed-forward pass for poststates to get Q-values self._setInput(poststates) postq = self.target_model.fprop(self.input, inference = True) assert postq.shape == (self.num_actions, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # feed-forward pass for prestates self._setInput(prestates) preq = self.model.fprop(self.input, inference = False) assert preq.shape == (self.num_actions, self.batch_size) # make copy of prestate Q-values as targets # It seems neccessary for cpu backend. targets = preq.asnumpyarray().copy() # clip rewards between -1 and 1 rewards = np.clip(rewards, self.min_reward, self.max_reward) # update Q-value targets for actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i] # copy targets to GPU memory self.targets.set(targets) # calculate errors deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) #assert np.count_nonzero(deltas.asnumpyarray()) == 32 # calculate cost, just in case cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1,1) # clip errors if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas) # perform back-propagation of gradients self.model.bprop(deltas) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.train_iterations += 1 # calculate statistics if self.callback: self.callback.on_train(cost[0,0]) def predict(self, states): # minibatch is full size, because Neon doesn't let change the minibatch size assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim) # calculate Q-values for the states self._setInput(states) qvalues = self.model.fprop(self.input, inference = True) assert qvalues.shape == (self.num_actions, self.batch_size) if logger.isEnabledFor(logging.DEBUG): logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0])) # transpose the result, so that batch size is first dimension return qvalues.T.asnumpyarray() def load_weights(self, load_path): self.model.load_params(load_path) def save_weights(self, save_path): self.model.save_params(save_path)
train = HDF5Iterator(filenames, [h['X'] for h in h5s], [h['y'] for h in h5s], ndata=(256 * 1024), validation=False, remove_history=True) valid = HDF5Iterator(filenames, [h['X'] for h in h5s], [h['y'] for h in h5s], ndata=1024, validation=True, remove_history=True) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding) callbacks = Callbacks(model, eval_set=valid, metric=TopKMisclassification(5), **args.callback_args) old_params = get_model_params(args.server_address) num_iterations = 1 while True: update_model(model, old_params) model.fit(train, optimizer=opt_gdm, num_epochs=1, cost=cost, callbacks=callbacks) model.save_params(os.path.join(args.workspace_dir, "iter_{}.pkl".format(num_iterations))) deltas = compute_deltas(old_params, model) old_params = put_deltas(args.server_address, deltas) num_iterations += 1
class DeepQNetwork: def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error self.min_reward = args.min_reward self.max_reward = args.max_reward self.batch_norm = args.batch_norm # create Neon backend self.be = gen_backend(backend=args.backend, batch_size=args.batch_size, rng_seed=args.random_seed, device_id=args.device_id, datatype=np.dtype(args.datatype).type, stochastic_round=args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length, ) + self.screen_dim + ( self.batch_size, ) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self._createLayers(num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate=args.learning_rate, decay_rate=args.decay_rate, stochastic_round=args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate=args.learning_rate, stochastic_round=args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay=args.decay_rate, stochastic_round=args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers=self._createLayers(num_actions)) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model self.callback = None def _createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append( Affine(nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_norm)) return layers def _setInput(self, states): # change order of axes to match what Neon expects states = np.transpose(states, axes=(1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.input.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.input, 255, self.input) def train(self, minibatch, epoch): # expand components of minibatch prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[ 0] == poststates.shape[0] == terminals.shape[0] if self.target_steps and self.train_iterations % self.target_steps == 0: # have to serialize also states for batch normalization to work pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) # feed-forward pass for poststates to get Q-values self._setInput(poststates) postq = self.target_model.fprop(self.input, inference=True) assert postq.shape == (self.num_actions, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # feed-forward pass for prestates self._setInput(prestates) preq = self.model.fprop(self.input, inference=False) assert preq.shape == (self.num_actions, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray() # clip rewards between -1 and 1 rewards = np.clip(rewards, self.min_reward, self.max_reward) # update Q-value targets for actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float( rewards[i]) + self.discount_rate * maxpostq[0, i] # copy targets to GPU memory self.targets.set(targets) # calculate errors deltas = self.cost.get_errors(preq, self.targets) assert deltas.shape == (self.num_actions, self.batch_size) #assert np.count_nonzero(deltas.asnumpyarray()) == 32 # calculate cost, just in case cost = self.cost.get_cost(preq, self.targets) assert cost.shape == (1, 1) # clip errors if self.clip_error: self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas) # perform back-propagation of gradients self.model.bprop(deltas) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.train_iterations += 1 # calculate statistics if self.callback: self.callback.on_train(cost.asnumpyarray()[0, 0]) def predict(self, states): # minibatch is full size, because Neon doesn't let change the minibatch size assert states.shape == (( self.batch_size, self.history_length, ) + self.screen_dim) # calculate Q-values for the states self._setInput(states) qvalues = self.model.fprop(self.input, inference=True) assert qvalues.shape == (self.num_actions, self.batch_size) if logger.isEnabledFor(logging.DEBUG): logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:, 0])) # transpose the result, so that batch size is first dimension return qvalues.T.asnumpyarray() def load_weights(self, load_path): self.model.load_params(load_path) def save_weights(self, save_path): self.model.save_params(save_path)
lunaModel = Model(layers=vgg_layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file lunaModel.load_params(args.model_file) # configure callbacks #callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args) callbacks = Callbacks(lunaModel, eval_set=valid_set, metric=Misclassification(), **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train_set, valid_set) lunaModel.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) lunaModel.save_params('LUNA16_VGG_model.prm') neon_logger.display('Finished training. Calculating error on the validation set...') neon_logger.display('Misclassification error (validation) = {:.2f}%'.format(lunaModel.eval(valid_set, metric=Misclassification())[0] * 100)) neon_logger.display('Precision/recall (validation) = {}'.format(lunaModel.eval(valid_set, metric=PrecisionRecall(num_classes=2)))) neon_logger.display('Calculating metrics on the test set. This could take a while...') neon_logger.display('Misclassification error (test) = {:.2f}%'.format(lunaModel.eval(test_set, metric=Misclassification())[0] * 100)) neon_logger.display('Precision/recall (test) = {}'.format(lunaModel.eval(test_set, metric=PrecisionRecall(num_classes=2))))
def train_mlp(): """ Train data and save scaling and network weights and biases to file to be used by forward prop phase on test data """ parser = NeonArgparser(__doc__) args = parser.parse_args() logger = logging.getLogger() logger.setLevel(args.log_thresh) # hyperparameters num_epochs = args.epochs #preprocessor std_scale = preprocessing.StandardScaler(with_mean=True,with_std=True) #std_scale = feature_scaler(type='Standardizer',with_mean=True,with_std=True) #number of non one-hot encoded features, including ground truth num_feat = 4 # load up the mnist data set # split into train and tests sets #load data from csv-files and rescale #training traindf = pd.DataFrame.from_csv('data/train.csv') ncols = traindf.shape[1] #tmpmat=std_scale.fit_transform(traindf.as_matrix()) #print std_scale.scale_ #print std_scale.mean_ tmpmat = traindf.as_matrix() #print tmpmat[:,1:num_feat] tmpmat[:,:num_feat] = std_scale.fit_transform(tmpmat[:,:num_feat]) X_train = tmpmat[:,1:] y_train = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1)) #validation validdf = pd.DataFrame.from_csv('data/validate.csv') ncols = validdf.shape[1] tmpmat = validdf.as_matrix() tmpmat[:,:num_feat] = std_scale.transform(tmpmat[:,:num_feat]) X_valid = tmpmat[:,1:] y_valid = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1)) #test testdf = pd.DataFrame.from_csv('data/test.csv') ncols = testdf.shape[1] tmpmat = testdf.as_matrix() tmpmat[:,:num_feat] = std_scale.transform(tmpmat[:,:num_feat]) X_test = tmpmat[:,1:] y_test = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1)) # setup a training set iterator train_set = CustomDataIterator(X_train, lshape=(X_train.shape[1]), y_c=y_train) # setup a validation data set iterator valid_set = CustomDataIterator(X_valid, lshape=(X_valid.shape[1]), y_c=y_valid) # setup a validation data set iterator test_set = CustomDataIterator(X_test, lshape=(X_test.shape[1]), y_c=y_test) # setup weight initialization function init_norm = Xavier() # setup model layers layers = [Affine(nout=X_train.shape[1], init=init_norm, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=X_train.shape[1]/2, init=init_norm, activation=Rectlin()), Linear(nout=1, init=init_norm)] # setup cost function as CrossEntropy cost = GeneralizedCost(costfunc=SmoothL1Loss()) # setup optimizer #schedule #schedule = ExpSchedule(decay=0.3) #optimizer = GradientDescentMomentum(0.0001, momentum_coef=0.9, stochastic_round=args.rounding, schedule=schedule) optimizer = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1.e-8) # initialize model object mlp = Model(layers=layers) # configure callbacks if args.callback_args['eval_freq'] is None: args.callback_args['eval_freq'] = 1 # configure callbacks callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args) callbacks.add_early_stop_callback(stop_func) callbacks.add_save_best_state_callback(os.path.join(args.data_dir, "early_stop-best_state.pkl")) # run fit mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) #evaluate model print('Evaluation Error = %.4f'%(mlp.eval(valid_set, metric=SmoothL1Metric()))) print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric()))) # Saving the model print 'Saving model parameters!' mlp.save_params("model/homeapp_model.prm") # Reloading saved model # This should go in run.py mlp=Model("model/homeapp_model.prm") print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric()))) # save the preprocessor vectors: np.savez("model/homeapp_preproc", mean=std_scale.mean_, std=std_scale.scale_) return 1
class DQNNeon(Learner): """ This class is an implementation of the DQN network based on Neon. The modules that interact with the agent, the replay memory and the statistic calls are implemented here, taking the individual requirements of the Lasagne framework into account. The code is adapted from: https://github.com/tambetm/simple_dqn Attributes: input_shape (tuple[int]): Dimension of the network input. dummy_batch (numpy.ndarray): Dummy batche used to calculate Q-values for single states. batch_norm (bool): Indicates if normalization is wanted for a certain layer (default=False). be (neon.backends.nervanagpu.NervanaGPU): Describes the backend for the Neon implementation. input (neon.backends.nervanagpu.GPUTensor): Definition of network input shape. targets(neon.backends.nervanagpu.GPUTensor): Definition of network output shape. model (neon.models.model.Model): Generated Neon model. target_model (neon.models.model.Model): Generated target Neon model. cost_func (neon.layers.layer.GeneralizedCost): Cost function for model training. callback (Statistics): Hook for the statistics object to pass train and test information. Note: More attributes of this class are defined in the base class Learner. """ def __init__(self, env, args, rng, name = "DQNNeon"): """ Initializes a network based on the Neon framework. Args: env (AtariEnv): The envirnoment in which the agent actuates. args (argparse.Namespace): All settings either with a default value or set via command line arguments. rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator. name (str): The name of the network object. Note: This function should always call the base class first to initialize the common values for the networks. """ _logger.info("Initializing new object of type " + str(type(self).__name__)) super(DQNNeon, self).__init__(env, args, rng, name) self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,) self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8) self.batch_norm = args.batch_norm self.be = gen_backend( backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.output_shape, self.batch_size)) # create model layers = self._create_layer() self.model = Model(layers = layers) self.cost_func = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost_func) self._set_optimizer() if not self.args.load_weights == None: self.load_weights(self.args.load_weights) # create target model if self.target_update_frequency: layers = self._create_layer() self.target_model = Model(layers) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) else: self.target_model = self.model self.callback = None _logger.debug("%s" % self) def _create_layer(self): """ Build a network consistent with the DeepMind Nature paper. """ _logger.debug("Output shape = %d" % self.output_shape) # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append( Affine( nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append( Affine( nout= self.output_shape, init = init_norm)) return layers def _set_optimizer(self): """ Initializes the selected optimization algorithm. """ _logger.debug("Optimizer = %s" % str(self.args.optimizer)) if self.args.optimizer == 'rmsprop': self.optimizer = RMSProp( learning_rate = self.args.learning_rate, decay_rate = self.args.decay_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adam': self.optimizer = Adam( learning_rate = self.args.learning_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adadelta': self.optimizer = Adadelta( decay = self.args.decay_rate, stochastic_round = self.args.stochastic_round) else: assert false, "Unknown optimizer" def _prepare_network_input(self, states): """ Transforms and normalizes the states from one minibatch. Args: states (): a set of states with the size of minibatch """ _logger.debug("Normalizing and transforming input") # change order of axes to match what Neon expects states = np.transpose(states, axes = (1, 2, 3, 0)) # copy() shouldn't be necessary here, but Neon doesn't work otherwise self.input.set(states.copy()) # normalize network input between 0 and 1 self.be.divide(self.input, self.grayscales, self.input) def train(self, minibatch, epoch): """ Prepare, perform and document a complete train step for one minibatch. Args: minibatch (numpy.ndarray): Mini-batch of states, shape=(batch_size,sequence_length,frame_width,frame_height) epoch (int): Current train epoch """ _logger.debug("Complete trainig step for one minibatch") prestates, actions, rewards, poststates, terminals = minibatch assert len(prestates.shape) == 4 assert len(poststates.shape) == 4 assert len(actions.shape) == 1 assert len(rewards.shape) == 1 assert len(terminals.shape) == 1 assert prestates.shape == poststates.shape assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0] # feed-forward pass for poststates to get Q-values self._prepare_network_input(poststates) postq = self.target_model.fprop(self.input, inference = True) assert postq.shape == (self.output_shape, self.batch_size) # calculate max Q-value for each poststate maxpostq = self.be.max(postq, axis=0).asnumpyarray() assert maxpostq.shape == (1, self.batch_size) # average maxpostq for stats maxpostq_avg = maxpostq.mean() # feed-forward pass for prestates self._prepare_network_input(prestates) preq = self.model.fprop(self.input, inference = False) assert preq.shape == (self.output_shape, self.batch_size) # make copy of prestate Q-values as targets targets = preq.asnumpyarray() # clip rewards between -1 and 1 rewards = np.clip(rewards, self.min_reward, self.max_reward) # update Q-value targets for each state only at actions taken for i, action in enumerate(actions): if terminals[i]: targets[action, i] = float(rewards[i]) else: targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i] # copy targets to GPU memory self.targets.set(targets) # calculate errors errors = self.cost_func.get_errors(preq, self.targets) assert errors.shape == (self.output_shape, self.batch_size) # average error where there is a error (should be 1 in every row) #TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.]) # clip errors if self.clip_error: self.be.clip(errors, -self.clip_error, self.clip_error, out = errors) # calculate cost, just in case cost = self.cost_func.get_cost(preq, self.targets) assert cost.shape == (1,1) # perform back-propagation of gradients self.model.bprop(errors) # perform optimization self.optimizer.optimize(self.model.layers_to_optimize, epoch) # increase number of weight updates (needed for target clone interval) self.update_iterations += 1 if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0: self._copy_theta() _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg))) # update statistics if self.callback: self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg) def get_Q(self, state): """ Calculates the Q-values for one mini-batch. Args: state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height). Returns: q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,) """ _logger.debug("State shape = %s" % str(state.shape)) # minibatch is full size, because Neon doesn't let change the minibatch size # so we need to run 32 forward steps to get the one we actually want self.dummy_batch[0] = state states = self.dummy_batch assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims) # calculate Q-values for the states self._prepare_network_input(states) qvalues = self.model.fprop(self.input, inference = True) assert qvalues.shape == (self.output_shape, self.batch_size) _logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0]))) return qvalues.asnumpyarray()[:,0] def _copy_theta(self): """ Copies the weights of the current network to the target network. """ _logger.debug("Copying weights") pdict = self.model.get_description(get_weights=True, keep_states=True) self.target_model.deserialize(pdict, load_states=True) def save_weights(self, target_dir, epoch): """ Saves the current network parameters to disk. Args: target_dir (str): Directory where the network parameters are stored for each episode. epoch (int): Current epoch. """ filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.net_type.lower()), str(self.args.optimizer.lower()), (epoch + 1)) self.model.save_params(os.path.join(target_dir, filename)) def load_weights(self, source_file): """ Loads the network parameters from a given file. Args: source_file (str): Complete path to a file with network parameters. """ self.model.load_params(source_file)
def test_model_serialize(backend_default, data): dataset = MNIST(path=data) (X_train, y_train), (X_test, y_test), nclass = dataset.load_data() train_set = ArrayIterator( [X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential([Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())]) path2 = Sequential([Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())]) layers = [MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp.initialize(train_set, cost=mlp.cost) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model mlp.save_params(tmp_save, keep_states=True) # Load model mlp = Model(tmp_save) mlp.initialize(train_set) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert allclose_with_out(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert allclose_with_out(_s, _s_e) else: assert allclose_with_out(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): assert type(p) == type(p_e) if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert allclose_with_out(_p, _p_e) elif isinstance(p, np.ndarray): assert allclose_with_out(p, p_e) else: assert p == p_e os.remove(tmp_save)
class ModelRunnerNeon(): def __init__(self, args, max_action_no, batch_dimension): self.args = args self.train_batch_size = args.train_batch_size self.discount_factor = args.discount_factor self.use_gpu_replay_mem = args.use_gpu_replay_mem self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size) self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0]) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((max_action_no, self.train_batch_size)) if self.use_gpu_replay_mem: self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8) self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8) else: self.history_buffer = np.zeros(batch_dimension, dtype=np.float32) self.train_net = Model(self.create_layers(max_action_no)) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.train_net.layers.layers: l.parallelism = 'Disabled' self.train_net.initialize(self.input_shape[:-1], self.cost) self.target_net = Model(self.create_layers(max_action_no)) # Bug fix for l in self.target_net.layers.layers: l.parallelism = 'Disabled' self.target_net.initialize(self.input_shape[:-1]) if self.args.optimizer == 'Adam': # Adam self.optimizer = Adam(beta_1=args.rms_decay, beta_2=args.rms_decay, learning_rate=args.learning_rate) else: # Neon RMSProp self.optimizer = RMSProp(decay_rate=args.rms_decay, learning_rate=args.learning_rate) self.max_action_no = max_action_no self.running = True def get_initializer(self, input_size): dnnInit = self.args.dnn_initializer if dnnInit == 'xavier': initializer = Xavier() elif dnnInit == 'fan_in': std_dev = 1.0 / math.sqrt(input_size) initializer = Uniform(low=-std_dev, high=std_dev) else: initializer = Gaussian(0, 0.01) return initializer def create_layers(self, max_action_no): layers = [] initializer = self.get_initializer(input_size = 4 * 8 * 8) layers.append(Conv(fshape=(8, 8, 32), strides=4, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 32 * 4 * 4) layers.append(Conv(fshape=(4, 4, 64), strides=2, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 64 * 3 * 3) layers.append(Conv(fshape=(3, 3, 64), strides=1, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 7 * 7 * 64) layers.append(Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin())) initializer = self.get_initializer(input_size = 512) layers.append(Affine(nout=max_action_no, init=initializer, bias=initializer)) return layers def clip_reward(self, reward): if reward > self.args.clip_reward_high: return self.args.clip_reward_high elif reward < self.args.clip_reward_low: return self.args.clip_reward_low else: return reward def set_input(self, data): if self.use_gpu_replay_mem: self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0)) self.input[:] = self.input_uint8 / 255 else: self.input.set(data.transpose(1, 2, 3, 0).copy()) self.be.divide(self.input, 255, self.input) def predict(self, history_buffer): self.set_input(history_buffer) output = self.train_net.fprop(self.input, inference=True) return output.T.asnumpyarray()[0] def print_weights(self): pass def train(self, minibatch, replay_memory, learning_rate, debug): if self.args.prioritized_replay == True: prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch else: prestates, actions, rewards, poststates, terminals = minibatch # Get Q*(s, a) with targetNet self.set_input(poststates) post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray() if self.args.double_dqn == True: # Get Q*(s, a) with trainNet post_qvalue2 = self.train_net.fprop(self.input, inference=True).T.asnumpyarray() # Get Q(s, a) with trainNet self.set_input(prestates) pre_qvalue = self.train_net.fprop(self.input, inference=False) label = pre_qvalue.asnumpyarray().copy() for i in range(0, self.train_batch_size): if self.args.clip_reward: reward = self.clip_reward(rewards[i]) else: reward = rewards[i] if terminals[i]: label[actions[i], i] = reward else: if self.args.double_dqn == True: max_index = np.argmax(post_qvalue2[i]) label[actions[i], i] = reward + self.discount_factor* post_qvalue[i][max_index] else: label[actions[i], i] = reward + self.discount_factor* np.max(post_qvalue[i]) # copy targets to GPU memory self.targets.set(label) delta = self.cost.get_errors(pre_qvalue, self.targets) if self.args.prioritized_replay == True: delta_value = delta.asnumpyarray() for i in range(self.train_batch_size): if debug: print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i]) replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i])) delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i] delta.set(delta_value.copy()) if self.args.clip_loss: self.be.clip(delta, -1.0, 1.0, out = delta) self.train_net.bprop(delta) self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0) def update_model(self): # have to serialize also states for batch normalization to work pdict = self.train_net.get_description(get_weights=True, keep_states=True) self.target_net.deserialize(pdict, load_states=True) #print ('Updated target model') def finish_train(self): self.running = False def load(self, file_name): self.train_net.load_params(file_name) self.update_model() def save(self, file_name): self.train_net.save_params(file_name)