示例#1
0
class MostCommonWordSense:

    def __init__(self, rounding, callback_args, epochs):
        # setup weight initialization function
        self.init = Gaussian(loc=0.0, scale=0.01)
        # setup optimizer
        self.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9,
                                                 stochastic_round=rounding)
        # setup cost function as CrossEntropy
        self.cost = GeneralizedCost(costfunc=SumSquared())
        self.epochs = epochs
        self.model = None
        self.callback_args = callback_args

    def build(self):
        # setup model layers
        layers = [Affine(nout=100, init=self.init, bias=self.init, activation=Rectlin()),
                  Affine(nout=2, init=self.init, bias=self.init, activation=Softmax())]

        # initialize model object
        self.model = Model(layers=layers)

    def fit(self, valid_set, train_set):
        # configure callbacks
        callbacks = Callbacks(self.model, eval_set=valid_set, **self.callback_args)
        self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs,
                       cost=self.cost, callbacks=callbacks)

    def save(self, save_path):
        self.model.save_params(save_path)

    def load(self, model_path):
        self.model = Model(model_path)

    def eval(self, valid_set):
        eval_rate = self.model.eval(valid_set, metric=Misclassification())
        return eval_rate

    def get_outputs(self, valid_set):
        return self.model.get_outputs(valid_set)
示例#2
0
class DQNNeon(Learner):
    """ This class is an implementation of the DQN network based on Neon.

    The modules that interact with the agent, the replay memory and the
    statistic calls are implemented here, taking the individual requirements
    of the Lasagne framework into account. The code is adapted from:
    https://github.com/tambetm/simple_dqn

    Attributes:
        input_shape (tuple[int]): Dimension of the network input.
        dummy_batch (numpy.ndarray): Dummy batche used to calculate Q-values for single states.
        batch_norm (bool): Indicates if normalization is wanted for a certain layer (default=False).
        be (neon.backends.nervanagpu.NervanaGPU): Describes the backend for the Neon implementation.
        input (neon.backends.nervanagpu.GPUTensor): Definition of network input shape.
        targets(neon.backends.nervanagpu.GPUTensor): Definition of network output shape.
        model (neon.models.model.Model): Generated Neon model.
        target_model (neon.models.model.Model): Generated target Neon model.
        cost_func (neon.layers.layer.GeneralizedCost): Cost function for model training.
        callback (Statistics): Hook for the statistics object to pass train and test information.

    Note:
        More attributes of this class are defined in the base class Learner.

    """

    def __init__(self, env, args, rng, name = "DQNNeon"):
        """ Initializes a network based on the Neon framework.

        Args:
            env (AtariEnv): The envirnoment in which the agent actuates.
            args (argparse.Namespace): All settings either with a default value or set via command line arguments.
            rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator.
            name (str): The name of the network object.

        Note:
            This function should always call the base class first to initialize
            the common values for the networks.
        """
        _logger.info("Initializing new object of type " + str(type(self).__name__))
        super(DQNNeon, self).__init__(env, args, rng, name)
        self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,)
        self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8)
        self.batch_norm = args.batch_norm

        self.be = gen_backend(
                backend = args.backend,
                batch_size = args.batch_size,
                rng_seed = args.random_seed,
                device_id = args.device_id,
                datatype = np.dtype(args.datatype).type,
                stochastic_round = args.stochastic_round)

        # prepare tensors once and reuse them
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.output_shape, self.batch_size))

        # create model
        layers = self._create_layer()
        self.model = Model(layers = layers)
        self.cost_func = GeneralizedCost(costfunc = SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost_func)

        self._set_optimizer()

        if not self.args.load_weights == None:
            self.load_weights(self.args.load_weights)

        # create target model
        if self.target_update_frequency:
            layers = self._create_layer()
            self.target_model = Model(layers)
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
        else:
            self.target_model = self.model

        self.callback = None
        _logger.debug("%s" % self)

    def _create_layer(self):
        """ Build a network consistent with the DeepMind Nature paper. """
        _logger.debug("Output shape = %d" % self.output_shape)
        # create network
        init_norm = Gaussian(loc=0.0, scale=0.01)
        layers = []
        # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
        layers.append(
                Conv((8, 8, 32),
                strides=4,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
        layers.append(
                Conv((4, 4, 64),
                strides=2,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
        layers.append(
                Conv((3, 3, 64),
                strides=1,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # The final hidden layer is fully-connected and consists of 512 rectifier units.
        layers.append(
                Affine(
                    nout=512,
                    init=init_norm,
                    activation=Rectlin(),
                    batch_norm=self.batch_norm))
        # The output layer is a fully-connected linear layer with a single output for each valid action.
        layers.append(
                Affine(
                    nout= self.output_shape,
                    init = init_norm))
        return layers

    def _set_optimizer(self):
        """ Initializes the selected optimization algorithm. """
        _logger.debug("Optimizer = %s" % str(self.args.optimizer))
        if self.args.optimizer == 'rmsprop':
            self.optimizer = RMSProp(
                    learning_rate = self.args.learning_rate,
                    decay_rate = self.args.decay_rate,
                    stochastic_round = self.args.stochastic_round)
        elif self.args.optimizer == 'adam':
            self.optimizer = Adam(
                    learning_rate = self.args.learning_rate,
                    stochastic_round = self.args.stochastic_round)
        elif self.args.optimizer == 'adadelta':
            self.optimizer = Adadelta(
                    decay = self.args.decay_rate,
                    stochastic_round = self.args.stochastic_round)
        else:
            assert false, "Unknown optimizer"

    def _prepare_network_input(self, states):
        """ Transforms and normalizes the states from one minibatch.

        Args:
            states (): a set of states with the size of minibatch
        """
        _logger.debug("Normalizing and transforming input")
        # change order of axes to match what Neon expects
        states = np.transpose(states, axes = (1, 2, 3, 0))
        # copy() shouldn't be necessary here, but Neon doesn't work otherwise
        self.input.set(states.copy())
        # normalize network input between 0 and 1
        self.be.divide(self.input, self.grayscales, self.input)

    def train(self, minibatch, epoch):
        """ Prepare, perform and document a complete train step for one minibatch.

        Args:
            minibatch (numpy.ndarray): Mini-batch of states, shape=(batch_size,sequence_length,frame_width,frame_height)
            epoch (int): Current train epoch
        """
        _logger.debug("Complete trainig step for one minibatch")
        prestates, actions, rewards, poststates, terminals = minibatch
        assert len(prestates.shape) == 4
        assert len(poststates.shape) == 4
        assert len(actions.shape) == 1
        assert len(rewards.shape) == 1
        assert len(terminals.shape) == 1
        assert prestates.shape == poststates.shape
        assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
        # feed-forward pass for poststates to get Q-values
        self._prepare_network_input(poststates)
        postq = self.target_model.fprop(self.input, inference = True)
        assert postq.shape == (self.output_shape, self.batch_size)
        # calculate max Q-value for each poststate
        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)
        # average maxpostq for stats
        maxpostq_avg = maxpostq.mean()
        # feed-forward pass for prestates
        self._prepare_network_input(prestates)
        preq = self.model.fprop(self.input, inference = False)
        assert preq.shape == (self.output_shape, self.batch_size)
        # make copy of prestate Q-values as targets
        targets = preq.asnumpyarray()
        # clip rewards between -1 and 1
        rewards = np.clip(rewards, self.min_reward, self.max_reward)
        # update Q-value targets for each state only at actions taken
        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
        # copy targets to GPU memory
        self.targets.set(targets)
        # calculate errors
        errors = self.cost_func.get_errors(preq, self.targets)
        assert errors.shape == (self.output_shape, self.batch_size)
        # average error where there is a error (should be 1 in every row)
        #TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.])
        # clip errors
        if self.clip_error:
            self.be.clip(errors, -self.clip_error, self.clip_error, out = errors)
        # calculate cost, just in case
        cost = self.cost_func.get_cost(preq, self.targets)
        assert cost.shape == (1,1)
        # perform back-propagation of gradients
        self.model.bprop(errors)
        # perform optimization
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)
        # increase number of weight updates (needed for target clone interval)
        self.update_iterations += 1
        if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0:
            self._copy_theta()
            if isinstance(cost, np.ndarray):
                _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost[0][0]), str(maxpostq_avg)))
            else:
                _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg)))
        # update statistics
        if self.callback:
            if isinstance(cost, np.ndarray):
                self.callback.from_learner(cost[0,0], maxpostq_avg)
            else:
                self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg)

    def get_Q(self, state):
        """ Calculates the Q-values for one mini-batch.

        Args:
            state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height).

        Returns:
            q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,)
        """
        _logger.debug("State shape = %s" % str(state.shape))
        # minibatch is full size, because Neon doesn't let change the minibatch size
        # so we need to run 32 forward steps to get the one we actually want
        self.dummy_batch[0] = state
        states = self.dummy_batch
        assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims)
        # calculate Q-values for the states
        self._prepare_network_input(states)
        qvalues = self.model.fprop(self.input, inference = True)
        assert qvalues.shape == (self.output_shape, self.batch_size)
        _logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0])))
        return qvalues.asnumpyarray()[:,0]

    def _copy_theta(self):
        """ Copies the weights of the current network to the target network. """
        _logger.debug("Copying weights")
        pdict = self.model.get_description(get_weights=True, keep_states=True)
        self.target_model.deserialize(pdict, load_states=True)

    def save_weights(self, target_dir, epoch):
        """ Saves the current network parameters to disk.

        Args:
            target_dir (str): Directory where the network parameters are stored for each episode.
            epoch (int): Current epoch.
        """
        filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.learner_type.lower()), str(self.args.optimizer.lower()), (epoch + 1))
        self.model.save_params(os.path.join(target_dir, filename))

    def load_weights(self, source_file):
        """ Loads the network parameters from a given file.

        Args:
            source_file (str): Complete path to a file with network parameters.
        """
        self.model.load_params(source_file)
示例#3
0
                                momentum_coef=0.9,
                                wdecay=0.0005)
opt_b = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9)
opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b}, name='multiopt')

# configure callbacks
callbacks = Callbacks(model,
                      eval_set=valid_set,
                      metric=Misclassification(),
                      **args.callback_args)
callbacks.add_callback(
    TrainByStageCallback(model, valid_set, Misclassification(),
                         max_patience=5))
num_prune = [5, 10, 25, 10]
callbacks.add_callback(
    FuzzyPruneCallback(num_states=100, num_prune=num_prune, model=model))

print('Original Accuracy = %.2f%%' %
      (100. - model.eval(valid_set, metric=Misclassification()) * 100))

logger.info('Training ...')
model.fit(train_set,
          optimizer=opt,
          num_epochs=250,
          cost=cost,
          callbacks=callbacks)
print('Accuracy = %.2f%%' %
      (100. - model.eval(valid_set, metric=Misclassification()) * 100))

model.save_params('./models/mnist/mnistfp.pkl')
示例#4
0
def test_model_serialize(backend_default, data):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data)

    train_set = ArrayIterator([X_train, X_train],
                              y_train,
                              nclass=nclass,
                              lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = Sequential([
        Conv((5, 5, 16),
             init=init_norm,
             bias=Constant(0),
             activation=Rectlin()),
        Pooling(2),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ])
    path2 = Sequential([
        Affine(nout=100,
               init=init_norm,
               bias=Constant(0),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ])
    layers = [
        MergeMultistream(layers=[path1, path2], merge="stack"),
        Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1,
                                            momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    mlp.initialize(train_set, cost=mlp.cost)
    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    mlp.save_params(tmp_save, keep_states=True)

    # Load model
    mlp = Model(tmp_save)

    mlp.initialize(train_set)
    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert np.allclose(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert np.allclose(_s, _s_e)
            else:
                assert np.allclose(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            assert type(p) == type(p_e)
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert np.allclose(_p, _p_e)
            elif isinstance(p, np.ndarray):
                assert np.allclose(p, p_e)
            else:
                assert p == p_e

    os.remove(tmp_save)
class NpSemanticSegClassifier:
    """
    NP Semantic Segmentation classifier model (based on Neon framework).

    Args:
        num_epochs(int): number of epochs to train the model
        **callback_args (dict): callback args keyword arguments to init a Callback for the model
        cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost
        optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is
        'neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)'
    """

    def __init__(self, num_epochs, callback_args,
                 optimizer=GradientDescentMomentum(0.07, momentum_coef=0.9)):
        """

        Args:
            num_epochs(int): number of epochs to train the model
            **callback_args (dict): callback args keyword arguments to init Callback for the model
            cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost
            optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is
            `neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)`
        """
        self.model = None
        self.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        self.optimizer = optimizer
        self.epochs = num_epochs
        self.callback_args = callback_args

    def build(self):
        """
        Build the model's layers
        """
        first_layer_dens = 64
        second_layer_dens = 64
        output_layer_dens = 2
        # setup weight initialization function
        init_norm = Gaussian(scale=0.01)
        # setup model layers
        layers = [Affine(nout=first_layer_dens, init=init_norm,
                         activation=Rectlin()),
                  Affine(nout=second_layer_dens, init=init_norm,
                         activation=Rectlin()),
                  Affine(nout=output_layer_dens, init=init_norm,
                         activation=Logistic(shortcut=True))]

        # initialize model object
        self.model = Model(layers=layers)

    def fit(self, test_set, train_set):
        """
        Train and fit the model on the datasets

        Args:
            test_set (:obj:`neon.data.ArrayIterators`): The test set
            train_set (:obj:`neon.data.ArrayIterators`): The train set
            args: callback_args and epochs from ArgParser input
        """
        # configure callbacks
        callbacks = Callbacks(self.model, eval_set=test_set, **self.callback_args)
        self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs, cost=self.cost,
                       callbacks=callbacks)

    def save(self, model_path):
        """
        Save the model's prm file in model_path location

        Args:
            model_path(str): local path for saving the model
        """
        self.model.save_params(model_path)

    def load(self, model_path):
        """
        Load pre-trained model's .prm file to NpSemanticSegClassifier object

        Args:
            model_path(str): local path for loading the model
        """
        self.model = Model(model_path)

    def eval(self, test_set):
        """
        Evaluate the model's test_set on error_rate, test_accuracy_rate and precision_recall_rate

        Args:
            test_set (ArrayIterator): The test set

        Returns:
            tuple(int): error_rate, test_accuracy_rate and precision_recall_rate
        """
        error_rate = self.model.eval(test_set, metric=Misclassification())
        test_accuracy_rate = self.model.eval(test_set, metric=Accuracy())
        precision_recall_rate = self.model.eval(test_set, metric=PrecisionRecall(2))
        return error_rate, test_accuracy_rate, precision_recall_rate

    def get_outputs(self, test_set):
        """
        Classify the dataset on the model

        Args:
            test_set (:obj:`neon.data.ArrayIterators`): The test set

        Returns:
            list(float): model's predictions
        """
        return self.model.get_outputs(test_set)
示例#6
0
文件: trainbot.py 项目: oomwoo/ubuntu
          Affine(nout=4, init=init_uni, activation=Softmax())]

cost = GeneralizedCost(costfunc=CrossEntropyMulti())

# Create model
mlp = Model(layers=layers)
callbacks = Callbacks(mlp, eval_set=test)  # Track cost function

# Train model
mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks)

# Check performance
print 'Misclassification error = %.1f%%' % (mlp.eval(test, metric=Misclassification())*100)

# Save trained model
mlp.save_params(param_file_name)


# Sanity check
from PIL import Image
import numpy as np
from neon.data.dataiterator import ArrayIterator

W = img_size
H = img_size
L = W*H*3
size = H, W
x_new = np.zeros((128, L), dtype=np.float32)


def load_sample(test_file_name):
示例#7
0
def ShiftAdaMax_with_Scale(LR=1):
    return ShiftAdaMax(learning_rate=LR_start * LR,
                       schedule=ShiftSchedule(2, shift_size=1))


optimizer = MultiOptimizer({
    'default': ShiftAdaMax_with_Scale(),
    'BinaryLinear_0': ShiftAdaMax_with_Scale(57.038),
    'BinaryLinear_1': ShiftAdaMax_with_Scale(73.9008),
    'BinaryLinear_2': ShiftAdaMax_with_Scale(73.9008),
    'BinaryLinear_3': ShiftAdaMax_with_Scale(52.3195)
})

# initialize model object
bnn = Model(layers=layers)

# configure callbacks
callbacks = Callbacks(bnn, eval_set=val_set, **args.callback_args)

# run fit
bnn.fit(train_set,
        optimizer=optimizer,
        num_epochs=args.epochs,
        cost=cost,
        callbacks=callbacks)
print('Misclassification error = %.1f%%' %
      (bnn.eval(val_set, metric=Misclassification()) * 100))

bnn.save_params("bin_model/final_model.prm")
示例#8
0
class ModelRunnerNeon():
    def __init__(self, args, max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem

        self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2],
                            batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension,
                                                dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)

        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':  # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                  beta_2=args.rms_decay,
                                  learning_rate=args.learning_rate)
        else:  # Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                     learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True

    def get_initializer(self, input_size):
        dnnInit = self.args.dnn_initializer
        if dnnInit == 'xavier':
            initializer = Xavier()
        elif dnnInit == 'fan_in':
            std_dev = 1.0 / math.sqrt(input_size)
            initializer = Uniform(low=-std_dev, high=std_dev)
        else:
            initializer = Gaussian(0, 0.01)
        return initializer

    def create_layers(self, max_action_no):
        layers = []

        initializer = self.get_initializer(input_size=4 * 8 * 8)
        layers.append(
            Conv(fshape=(8, 8, 32),
                 strides=4,
                 init=initializer,
                 bias=initializer,
                 activation=Rectlin()))

        initializer = self.get_initializer(input_size=32 * 4 * 4)
        layers.append(
            Conv(fshape=(4, 4, 64),
                 strides=2,
                 init=initializer,
                 bias=initializer,
                 activation=Rectlin()))

        initializer = self.get_initializer(input_size=64 * 3 * 3)
        layers.append(
            Conv(fshape=(3, 3, 64),
                 strides=1,
                 init=initializer,
                 bias=initializer,
                 activation=Rectlin()))

        initializer = self.get_initializer(input_size=7 * 7 * 64)
        layers.append(
            Affine(nout=512,
                   init=initializer,
                   bias=initializer,
                   activation=Rectlin()))

        initializer = self.get_initializer(input_size=512)
        layers.append(
            Affine(nout=max_action_no, init=initializer, bias=initializer))

        return layers

    def clip_reward(self, reward):
        if reward > self.args.clip_reward_high:
            return self.args.clip_reward_high
        elif reward < self.args.clip_reward_low:
            return self.args.clip_reward_low
        else:
            return reward

    def set_input(self, data):
        if self.use_gpu_replay_mem:
            self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0))
            self.input[:] = self.input_uint8 / 255
        else:
            self.input.set(data.transpose(1, 2, 3, 0).copy())
            self.be.divide(self.input, 255, self.input)

    def predict(self, history_buffer):
        self.set_input(history_buffer)
        output = self.train_net.fprop(self.input, inference=True)
        return output.T.asnumpyarray()[0]

    def print_weights(self):
        pass

    def train(self, minibatch, replay_memory, learning_rate, debug):
        if self.args.prioritized_replay == True:
            prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch
        else:
            prestates, actions, rewards, poststates, terminals = minibatch

        # Get Q*(s, a) with targetNet
        self.set_input(poststates)
        post_qvalue = self.target_net.fprop(self.input,
                                            inference=True).T.asnumpyarray()

        if self.args.double_dqn == True:
            # Get Q*(s, a) with trainNet
            post_qvalue2 = self.train_net.fprop(
                self.input, inference=True).T.asnumpyarray()

        # Get Q(s, a) with trainNet
        self.set_input(prestates)
        pre_qvalue = self.train_net.fprop(self.input, inference=False)

        label = pre_qvalue.asnumpyarray().copy()
        for i in range(0, self.train_batch_size):
            if self.args.clip_reward:
                reward = self.clip_reward(rewards[i])
            else:
                reward = rewards[i]
            if terminals[i]:
                label[actions[i], i] = reward
            else:
                if self.args.double_dqn == True:
                    max_index = np.argmax(post_qvalue2[i])
                    label[actions[i],
                          i] = reward + self.discount_factor * post_qvalue[i][
                              max_index]
                else:
                    label[actions[i],
                          i] = reward + self.discount_factor * np.max(
                              post_qvalue[i])

        # copy targets to GPU memory
        self.targets.set(label)

        delta = self.cost.get_errors(pre_qvalue, self.targets)

        if self.args.prioritized_replay == True:
            delta_value = delta.asnumpyarray()
            for i in range(self.train_batch_size):
                if debug:
                    print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (
                        i, weights[i], delta_value[actions[i], i],
                        weights[i] * delta_value[actions[i], i])
                replay_memory.update_td(heap_indexes[i],
                                        abs(delta_value[actions[i], i]))
                delta_value[actions[i],
                            i] = weights[i] * delta_value[actions[i], i]
            delta.set(delta_value.copy())

        if self.args.clip_loss:
            self.be.clip(delta, -1.0, 1.0, out=delta)

        self.train_net.bprop(delta)
        self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0)

    def update_model(self):
        # have to serialize also states for batch normalization to work
        pdict = self.train_net.get_description(get_weights=True,
                                               keep_states=True)
        self.target_net.deserialize(pdict, load_states=True)
        #print ('Updated target model')

    def finish_train(self):
        self.running = False

    def load(self, file_name):
        self.train_net.load_params(file_name)
        self.update_model()

    def save(self, file_name):
        self.train_net.save_params(file_name)
示例#9
0
    lunaModel.load_params(args.model_file)

# configure callbacks
if args.callback_args['eval_freq'] is None:
    args.callback_args['eval_freq'] = 1

# configure callbacks
callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args)
# add a callback that saves the best model state
callbacks.add_save_best_state_callback(
    'LUNA16_VGG_model_no_batch_sigmoid_pretrained.prm')

if args.deconv:
    callbacks.add_deconv_callback(train_set, valid_set)

lunaModel.fit(train_set,
              optimizer=opt,
              num_epochs=num_epochs,
              cost=cost,
              callbacks=callbacks)

lunaModel.save_params('LUNA16_VGG_model_no_batch_sigmoid_pretrained.prm')

neon_logger.display(
    'Calculating metrics on the test set. This could take a while...')
neon_logger.display('Misclassification error (test) = {:.2f}%'.format(
    lunaModel.eval(test_set, metric=Misclassification())[0] * 100))

neon_logger.display('Precision/recall (test) = {}'.format(
    lunaModel.eval(test_set, metric=PrecisionRecall(num_classes=2))))
示例#10
0
if args.model_file:
    import os
    assert os.path.exists(args.model_file), '%s not found' % args.model_file
    lunaModel.load_params(args.model_file)

# configure callbacks
#callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args)
callbacks = Callbacks(lunaModel,
                      eval_set=valid_set,
                      metric=Misclassification(),
                      **args.callback_args)

if args.deconv:
    callbacks.add_deconv_callback(train_set, valid_set)

lunaModel.fit(train_set,
              optimizer=opt_gdm,
              num_epochs=num_epochs,
              cost=cost,
              callbacks=callbacks)

lunaModel.save_params('LUNA16_simpleCNN_model.prm')

neon_logger.display(
    'Finished training. Calculating error on the validation set...')
neon_logger.display('Misclassification error = {:.2f}%'.format(
    lunaModel.eval(valid_set, metric=Misclassification())[0] * 100))

neon_logger.display('Precision/recall = {}'.format(
    lunaModel.eval(valid_set, metric=PrecisionRecall(num_classes=2))))
示例#11
0
if args.callback_args['eval_freq'] is None:
	args.callback_args['eval_freq'] = 1

# configure callbacks
callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args)

#callbacks.add_early_stop_callback(stop_func)
#callbacks.add_save_best_state_callback(os.path.join(args.data_dir, "early_stop-best_state.pkl"))

callbacks.add_early_stop_callback(stop_func)
callbacks.add_save_best_state_callback(os.path.join(args.data_dir, "early_stop-best_state.pkl"))

# run fit
mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)

#evaluate model
print('Evaluation Error = %.4f'%(mlp.eval(valid_set, metric=SmoothL1Metric())))
print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric())))

# Saving the model
print 'Saving model parameters!'
mlp.save_params("jobwait_model.prm")

# Reloading saved model
# This should go in run.py
mlp=Model("jobwait_model.prm")
print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric())))

# save the preprocessor vectors:
np.savez("jobwait_preproc", mean=std_scale.mean_, std=std_scale.scale_)
Y = data.train_label-1
X_test = data.test_data
Y_test = data.test_label-1
train_set = ArrayIterator(X=X, y=Y, nclass=11, lshape=(1,200,200))
test_set = ArrayIterator(X_test, None, nclass=11, lshape=(1,200,200))
init_uni = Uniform(low=-0.1, high=0.1)
layers = [Conv(fshape=(4,4,16), init=init_uni, activation=Rectlin()),
          Pooling(fshape=2, strides=2),
          Conv(fshape=(4,4,32), init=init_uni, activation=Rectlin()),
          Pooling(fshape=2, strides=2),
          Conv(fshape=(4,4,32), init=init_uni, activation=Rectlin()),
          Pooling(fshape=2, strides=2),
          Affine(nout=500, init=init_uni, activation=Rectlin()),
          Affine(nout=11, init=init_uni, activation=Softmax())]

model = Model(layers)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())
optimizer = GradientDescentMomentum(learning_rate=0.005,
                                    momentum_coef=0.9)
callbacks = Callbacks(model, train_set)

model.fit(dataset=train_set, cost=cost, optimizer=optimizer,  num_epochs=40, callbacks=callbacks)
model.save_params('model.pkl')
# out = model.get_outputs(test_set)
# row = len(Y_test)
# result = np.zeros((row,1))
# i=0
# while i<row:
# 	result[i] = out[i].argmax()
# 	i=i+1
# np.save('result.npy', result)
示例#13
0
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

from neon.optimizers import GradientDescentMomentum, RMSProp
optimizer = GradientDescentMomentum(learning_rate=0.005, momentum_coef=0.9)

# Set up callbacks. By default sets up a progress bar
from neon.callbacks.callbacks import Callbacks
callbacks = Callbacks(model, train_set)

model.fit(dataset=train_set,
          cost=cost,
          optimizer=optimizer,
          num_epochs=num_epochs,
          callbacks=callbacks)

model.save_params("cifar10_model.prm")

# Evaluate performance
from neon.transforms import Misclassification
error_pct = 100 * model.eval(test_set, metric=Misclassification())
print 'Misclassification error = %.1f%%' % error_pct

# Sanity check 1
# an image of a frog from wikipedia
# img_source = "https://upload.wikimedia.org/wikipedia/commons/thumb/5/55/Atelopus_zeteki1.jpg/440px-Atelopus_zeteki1.jpg"
# import urllib
# urllib.urlretrieve(img_source, filename="image.jpg")

from PIL import Image
import numpy as np
class NpSemanticSegClassifier:
    """
    NP Semantic Segmentation classifier model (based on Neon framework).

    Args:
        num_epochs(int): number of epochs to train the model
        **callback_args (dict): callback args keyword arguments to init a Callback for the model
        cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost
        optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is
        'neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)'
    """
    def __init__(self,
                 num_epochs,
                 callback_args,
                 optimizer=GradientDescentMomentum(0.07, momentum_coef=0.9)):
        """

        Args:
            num_epochs(int): number of epochs to train the model
            **callback_args (dict): callback args keyword arguments to init Callback for the model
            cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost
            optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is
            `neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)`
        """
        self.model = None
        self.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        self.optimizer = optimizer
        self.epochs = num_epochs
        self.callback_args = callback_args

    def build(self):
        """
        Build the model's layers
        """
        first_layer_dens = 64
        second_layer_dens = 64
        output_layer_dens = 2
        # setup weight initialization function
        init_norm = Gaussian(scale=0.01)
        # setup model layers
        layers = [
            Affine(nout=first_layer_dens, init=init_norm,
                   activation=Rectlin()),
            Affine(nout=second_layer_dens,
                   init=init_norm,
                   activation=Rectlin()),
            Affine(nout=output_layer_dens,
                   init=init_norm,
                   activation=Logistic(shortcut=True))
        ]

        # initialize model object
        self.model = Model(layers=layers)

    def fit(self, test_set, train_set):
        """
        Train and fit the model on the datasets

        Args:
            test_set (:obj:`neon.data.ArrayIterators`): The test set
            train_set (:obj:`neon.data.ArrayIterators`): The train set
            args: callback_args and epochs from ArgParser input
        """
        # configure callbacks
        callbacks = Callbacks(self.model,
                              eval_set=test_set,
                              **self.callback_args)
        self.model.fit(train_set,
                       optimizer=self.optimizer,
                       num_epochs=self.epochs,
                       cost=self.cost,
                       callbacks=callbacks)

    def save(self, model_path):
        """
        Save the model's prm file in model_path location

        Args:
            model_path(str): local path for saving the model
        """
        self.model.save_params(model_path)

    def load(self, model_path):
        """
        Load pre-trained model's .prm file to NpSemanticSegClassifier object

        Args:
            model_path(str): local path for loading the model
        """
        self.model = Model(model_path)

    def eval(self, test_set):
        """
        Evaluate the model's test_set on error_rate, test_accuracy_rate and precision_recall_rate

        Args:
            test_set (ArrayIterator): The test set

        Returns:
            tuple(int): error_rate, test_accuracy_rate and precision_recall_rate
        """
        error_rate = self.model.eval(test_set, metric=Misclassification())
        test_accuracy_rate = self.model.eval(test_set, metric=Accuracy())
        precision_recall_rate = self.model.eval(test_set,
                                                metric=PrecisionRecall(2))
        return error_rate, test_accuracy_rate, precision_recall_rate

    def get_outputs(self, test_set):
        """
        Classify the dataset on the model

        Args:
            test_set (:obj:`neon.data.ArrayIterators`): The test set

        Returns:
            list(float): model's predictions
        """
        return self.model.get_outputs(test_set)
示例#15
0
class DeepQNetwork:
    def __init__(self,
                 num_actions,
                 batch_size=32,
                 discount_rate=0.99,
                 history_length=4,
                 cols=64,
                 rows=64,
                 clip_error=1,
                 min_reward=-1,
                 max_reward=1,
                 batch_norm=False):
        self.num_actions = num_actions
        self.batch_size = batch_size
        self.discount_rate = discount_rate
        self.history_length = history_length
        self.board_dim = (cols, rows)
        self.clip_error = clip_error
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.batch_norm = batch_norm

        self.be = gen_backend(backend='gpu',
                              batch_size=self.batch_size,
                              datatype=np.dtype('float32').type)

        self.input_shape = (self.history_length, ) + self.board_dim + (
            self.batch_size, )
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # hack from simple_dqn "needed for convolutional networks"
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        layers = self._createLayers(self.num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # for l in self.model.layers.layers:
        # 	l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], cost=self.cost)
        self.optimizer = RMSProp(learning_rate=0.002,
                                 decay_rate=0.95,
                                 stochastic_round=True)

        self.train_iterations = 0
        self.target_model = Model(layers=self._createLayers(num_actions))
        # for l in self.target_model.layers.layers:
        # 	l.parallelism = 'Disabled'
        self.target_model.initialize(self.input_shape[:-1])

        self.callback = None

    def _createLayers(self, num_actions):
        init_xavier_conv = Xavier(local=True)
        init_xavier_affine = Xavier(local=False)
        layers = []
        layers.append(
            Conv((8, 8, 32),
                 strides=4,
                 init=init_xavier_conv,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        layers.append(
            Conv((4, 4, 64),
                 strides=2,
                 init=init_xavier_conv,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        layers.append(
            Conv((2, 2, 128),
                 strides=1,
                 init=init_xavier_conv,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        layers.append(
            Affine(nout=256,
                   init=init_xavier_affine,
                   activation=Rectlin(),
                   batch_norm=self.batch_norm))
        layers.append(Affine(nout=num_actions, init=init_xavier_affine))
        return layers

    def _setInput(self, states):
        states = np.transpose(states, axes=(1, 2, 3, 0))
        self.input.set(states.copy())
        self.be.add(self.input, 1, self.input)
        self.be.divide(self.input, 2, self.input)

    def update_target_network(self):
        pdict = self.model.get_description(get_weights=True, keep_states=True)
        self.target_model.deserialize(pdict, load_states=True)

    def train(self, minibatch, epoch):
        prestates, actions, rewards, poststates, terminals = minibatch

        self._setInput(poststates)
        postq = self.target_model.fprop(self.input, inference=True)
        assert postq.shape == (self.num_actions, self.batch_size)

        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)

        self._setInput(prestates)
        preq = self.model.fprop(self.input, inference=False)
        assert preq.shape == (self.num_actions, self.batch_size)

        targets = preq.asnumpyarray().copy()
        rewards = np.clip(rewards, -1, 1)

        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(
                    rewards[i]) + self.discount_rate * maxpostq[0, i]

        self.targets.set(targets)

        deltas = self.cost.get_errors(preq, self.targets)
        assert deltas.shape == (self.num_actions, self.batch_size)

        cost = self.cost.get_cost(preq, self.targets)
        assert cost.shape == (1, 1)

        if self.clip_error:
            self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas)

        self.model.bprop(deltas)
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)

        self.train_iterations += 1
        self.callback.on_train(cost[0, 0])

    def predict(self, states):
        assert states.shape == ((
            self.batch_size,
            self.history_length,
        ) + self.board_dim)

        self._setInput(states)
        qvalues = self.model.fprop(self.input, inference=True)
        assert qvalues.shape == (self.num_actions, self.batch_size)

        return qvalues.T.asnumpyarray()

    def load_weights(self, load_path):
        self.model.load_params(load_path)

    def save_weights(self, save_path):
        self.model.save_params(save_path)
示例#16
0
class WordseqRegressor():
    def __init__(self, pickle_model="", datadir=None):
        self.maxlen = 100
        self.n_words = 100000
        parser = NeonArgparser(__doc__)
        self.args = parser.parse_args()
        self.args.batch_size = self.batch_size = 2048  #
        self.args.deterministic = None
        self.args.rng_seed = 0
        print extract_valid_args(self.args, gen_backend)
        self.be = gen_backend(**extract_valid_args(self.args, gen_backend))

        embedding_dim = 100
        init_emb = Uniform(-0.1 / embedding_dim, 0.1 / embedding_dim)
        init_glorot = GlorotUniform()
        self.layers = [
            LookupTable(vocab_size=self.n_words,
                        embedding_dim=embedding_dim,
                        init=init_emb,
                        pad_idx=0,
                        update=True,
                        name="LookupTable"),
            Dropout(keep=0.5),
            BiLSTM(100,
                   init=init_glorot,
                   activation=Tanh(),
                   gate_activation=Logistic(),
                   reset_cells=True,
                   split_inputs=False,
                   name="BiLSTM"),
            RecurrentMean(),
            Affine(1,
                   init_glorot,
                   bias=init_glorot,
                   activation=Identity(),
                   name="Affine")
        ]

        self.wordbatch = wordbatch.WordBatch(normalize_text,
                                             n_words=self.n_words,
                                             extractors=[(wordbatch.WordSeq, {
                                                 "seq_maxlen":
                                                 self.maxlen
                                             })])

        if datadir == None:
            self.model = Model(self.layers)
            self.model.load_params(pickle_model)
            self.wordbatch = pkl.load(gzip.open(pickle_model + ".wb", 'rb'))
        else:
            self.train(datadir, pickle_model)

    def remove_unks(self, x):
        return [[self.n_words if w >= self.n_words else w for w in sen]
                for sen in x]

    def format_texts(self, texts):
        return self.remove_unks(self.wordbatch.transform(texts))

    class ThreadWithReturnValue(Thread):
        def __init__(self,
                     group=None,
                     target=None,
                     name=None,
                     args=(),
                     kwargs={},
                     Verbose=None):
            Thread.__init__(self, group, target, name, args, kwargs, Verbose)
            self._return = None

        def run(self):
            if self._Thread__target is not None:
                self._return = self._Thread__target(*self._Thread__args,
                                                    **self._Thread__kwargs)

        def join(self):
            Thread.join(self)
            return self._return

    def train(self, datadir, pickle_model=""):
        texts = []
        labels = []
        training_data = os.listdir(datadir)
        rcount = 0
        texts2 = []
        batchsize = 100000

        t = None
        for jsonfile in training_data:
            with open(datadir + "/" + jsonfile, u'r') as inputfile:
                for line in inputfile:
                    #if rcount > 1000000: break
                    try:
                        line = json.loads(line.strip())
                    except:
                        continue
                    for review in line["Reviews"]:
                        rcount += 1
                        if rcount % 100000 == 0: print rcount
                        if rcount % 8 != 0: continue
                        if "Overall" not in review["Ratings"]: continue
                        texts.append(review["Content"])
                        labels.append(
                            (float(review["Ratings"]["Overall"]) - 3) * 0.5)
                        if len(texts) % batchsize == 0:
                            if t != None: texts2.append(t.join())
                            t = self.ThreadWithReturnValue(
                                target=self.wordbatch.transform,
                                args=(texts, ))
                            t.start()
                            texts = []
        texts2.append(t.join())
        texts2.append(self.wordbatch.transform(texts))
        del (texts)
        texts = sp.vstack(texts2)

        self.wordbatch.dictionary_freeze = True

        train = [
            np.asarray(texts, dtype='int32'),
            np.asanyarray(labels, dtype='float32')
        ]
        train[1].shape = (train[1].shape[0], 1)

        num_epochs = 10
        cost = GeneralizedCost(costfunc=SumSquared())
        self.model = Model(layers=self.layers)
        optimizer = Adam(learning_rate=0.01)

        index_shuf = list(range(len(train[0])))
        random.shuffle(index_shuf)
        train[0] = np.asarray([train[0][x] for x in index_shuf], dtype='int32')
        train[1] = np.asarray([train[1][x] for x in index_shuf],
                              dtype='float32')
        train_iter = ArrayIterator(train[0],
                                   train[1],
                                   nclass=1,
                                   make_onehot=False)
        self.model.fit(train_iter,
                       optimizer=optimizer,
                       num_epochs=num_epochs,
                       cost=cost,
                       callbacks=Callbacks(self.model,
                                           **self.args.callback_args))

        if pickle_model != "":
            self.model.save_params(pickle_model)
            with gzip.open(pickle_model + ".wb", 'wb') as model_file:
                pkl.dump(self.wordbatch, model_file, protocol=2)

    def predict_batch(self, texts):
        input = np.array(self.format_texts(texts))
        output = np.zeros((texts.shape[0], 1))
        test = ArrayIterator(input, output, nclass=1, make_onehot=False)
        results = [row[0] for row in self.model.get_outputs(test)]
        return results
示例#17
0
from neon.layers import GeneralizedCost
from neon.transforms import CrossEntropyMulti
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

from neon.optimizers import GradientDescentMomentum, RMSProp
optimizer = GradientDescentMomentum(learning_rate=0.005,
                                    momentum_coef=0.9)

# Set up callbacks. By default sets up a progress bar
from neon.callbacks.callbacks import Callbacks
callbacks = Callbacks(model, train_set)

model.fit(dataset=train_set, cost=cost, optimizer=optimizer,  num_epochs=num_epochs, callbacks=callbacks)

model.save_params("cifar10_model.prm")

# Evaluate performance
from neon.transforms import Misclassification
error_pct = 100 * model.eval(test_set, metric=Misclassification())
print 'Misclassification error = %.1f%%' % error_pct


# Sanity check 1
# an image of a frog from wikipedia
# img_source = "https://upload.wikimedia.org/wikipedia/commons/thumb/5/55/Atelopus_zeteki1.jpg/440px-Atelopus_zeteki1.jpg"
# import urllib
# urllib.urlretrieve(img_source, filename="image.jpg")

from PIL import Image
import numpy as np
示例#18
0
class SequenceChunker(object):
    """
    Sequence chunker model (Neon based)

    Args:
        sentence_length (str): max sentence length
        token_vocab_size (int): word vocabulary size
        pos_vocab_size (int, optional): POS vocabulary size
        char_vocab_size (int, optional): characters vocabulary size
        max_char_word_length (int, optional): max word length in characters
        token_embedding_size (int, optional): word embedding dims
        pos_embedding_size (int, optional): POS embedding dims
        char_embedding_size (int, optional): character embedding dims
        num_labels (int, optional): number of output labels possible per token
        lstm_hidden_size (int, optional): LSTM hidden size
        num_lstm_layers (int, optional): number of LSTM layers
        use_external_embedding (bool, optional): input is provided as external word embedding
        dropout (float, optional): dropout rate
    """

    def __init__(self, sentence_length,
                 token_vocab_size,
                 pos_vocab_size=None,
                 char_vocab_size=None,
                 max_char_word_length=20,
                 token_embedding_size=None,
                 pos_embedding_size=None,
                 char_embedding_size=None,
                 num_labels=None,
                 lstm_hidden_size=100,
                 num_lstm_layers=1,
                 use_external_embedding=None,
                 dropout=0.5
                 ):

        init = GlorotUniform()
        tokens = []
        if use_external_embedding is None:
            tokens.append(LookupTable(vocab_size=token_vocab_size,
                                      embedding_dim=token_embedding_size,
                                      init=init,
                                      pad_idx=0))
        else:
            tokens.append(DataInput())
        tokens.append(Reshape((-1, sentence_length)))
        f_layers = [tokens]

        # add POS tag input
        if pos_vocab_size is not None and pos_embedding_size is not None:
            f_layers.append([
                LookupTable(vocab_size=pos_vocab_size,
                            embedding_dim=pos_embedding_size,
                            init=init,
                            pad_idx=0),
                Reshape((-1, sentence_length))
            ])

        # add Character RNN input
        if char_vocab_size is not None and char_embedding_size is not None:
            char_lut_layer = LookupTable(vocab_size=char_vocab_size,
                                         embedding_dim=char_embedding_size,
                                         init=init,
                                         pad_idx=0)
            char_nn = [char_lut_layer,
                       TimeDistBiLSTM(char_embedding_size, init, activation=Logistic(),
                                      gate_activation=Tanh(),
                                      reset_cells=True, reset_freq=max_char_word_length),
                       TimeDistributedRecurrentLast(timesteps=max_char_word_length),
                       Reshape((-1, sentence_length))]

            f_layers.append(char_nn)

        layers = []
        if len(f_layers) == 1:
            layers.append(f_layers[0][0])
        else:
            layers.append(MergeMultistream(layers=f_layers, merge="stack"))
            layers.append(Reshape((-1, sentence_length)))
        layers += [DeepBiLSTM(lstm_hidden_size, init, activation=Logistic(),
                              gate_activation=Tanh(),
                              reset_cells=True,
                              depth=num_lstm_layers),
                   Dropout(keep=dropout),
                   Affine(num_labels, init, bias=init, activation=Softmax())]
        self._model = Model(layers=layers)

    def fit(self, dataset, optimizer, cost, callbacks, epochs=10):
        """
        fit a model

        Args:
            dataset: train/test set of CONLL2000 dataset
            optimizer: optimizer (Neon based)
            cost: cost function (Neon based)
            callbacks: callbacks (Neon based)
            epochs (int, optional): number of epochs to train
        """
        self._model.fit(dataset,
                        optimizer=optimizer,
                        num_epochs=epochs,
                        cost=cost,
                        callbacks=callbacks)

    def predict(self, dataset):
        """
        predict output of given dataset

        Args:
            dataset: Neon based iterator

        Returns:
            prediction on given dataset
        """
        return self._model.get_outputs(dataset)

    def save(self, path):
        """
        Save model weights to path

        Args:
            path (str): path to weights file
        """
        self._model.save_params(path)

    def get_model(self):
        """
        Get model

        Returns:
            Neon model object
        """
        return self._model
示例#19
0
class SequenceChunker(object):
    """
    Sequence chunker model (Neon based)

    Args:
        sentence_length (str): max sentence length
        token_vocab_size (int): word vocabulary size
        pos_vocab_size (int, optional): POS vocabulary size
        char_vocab_size (int, optional): characters vocabulary size
        max_char_word_length (int, optional): max word length in characters
        token_embedding_size (int, optional): word embedding dims
        pos_embedding_size (int, optional): POS embedding dims
        char_embedding_size (int, optional): character embedding dims
        num_labels (int, optional): number of output labels possible per token
        lstm_hidden_size (int, optional): LSTM hidden size
        num_lstm_layers (int, optional): number of LSTM layers
        use_external_embedding (bool, optional): input is provided as external word embedding
        dropout (float, optional): dropout rate
    """
    def __init__(self,
                 sentence_length,
                 token_vocab_size,
                 pos_vocab_size=None,
                 char_vocab_size=None,
                 max_char_word_length=20,
                 token_embedding_size=None,
                 pos_embedding_size=None,
                 char_embedding_size=None,
                 num_labels=None,
                 lstm_hidden_size=100,
                 num_lstm_layers=1,
                 use_external_embedding=None,
                 dropout=0.5):

        init = GlorotUniform()
        tokens = []
        if use_external_embedding is None:
            tokens.append(
                LookupTable(vocab_size=token_vocab_size,
                            embedding_dim=token_embedding_size,
                            init=init,
                            pad_idx=0))
        else:
            tokens.append(DataInput())
        tokens.append(Reshape((-1, sentence_length)))
        f_layers = [tokens]

        # add POS tag input
        if pos_vocab_size is not None and pos_embedding_size is not None:
            f_layers.append([
                LookupTable(vocab_size=pos_vocab_size,
                            embedding_dim=pos_embedding_size,
                            init=init,
                            pad_idx=0),
                Reshape((-1, sentence_length))
            ])

        # add Character RNN input
        if char_vocab_size is not None and char_embedding_size is not None:
            char_lut_layer = LookupTable(vocab_size=char_vocab_size,
                                         embedding_dim=char_embedding_size,
                                         init=init,
                                         pad_idx=0)
            char_nn = [
                char_lut_layer,
                TimeDistBiLSTM(char_embedding_size,
                               init,
                               activation=Logistic(),
                               gate_activation=Tanh(),
                               reset_cells=True,
                               reset_freq=max_char_word_length),
                TimeDistributedRecurrentLast(timesteps=max_char_word_length),
                Reshape((-1, sentence_length))
            ]

            f_layers.append(char_nn)

        layers = []
        if len(f_layers) == 1:
            layers.append(f_layers[0][0])
        else:
            layers.append(MergeMultistream(layers=f_layers, merge="stack"))
            layers.append(Reshape((-1, sentence_length)))
        layers += [
            DeepBiLSTM(lstm_hidden_size,
                       init,
                       activation=Logistic(),
                       gate_activation=Tanh(),
                       reset_cells=True,
                       depth=num_lstm_layers),
            Dropout(keep=dropout),
            Affine(num_labels, init, bias=init, activation=Softmax())
        ]
        self._model = Model(layers=layers)

    def fit(self, dataset, optimizer, cost, callbacks, epochs=10):
        """
        fit a model

        Args:
            dataset: train/test set of CONLL2000 dataset
            optimizer: optimizer (Neon based)
            cost: cost function (Neon based)
            callbacks: callbacks (Neon based)
            epochs (int, optional): number of epochs to train
        """
        self._model.fit(dataset,
                        optimizer=optimizer,
                        num_epochs=epochs,
                        cost=cost,
                        callbacks=callbacks)

    def predict(self, dataset):
        """
        predict output of given dataset

        Args:
            dataset: Neon based iterator

        Returns:
            prediction on given dataset
        """
        return self._model.get_outputs(dataset)

    def save(self, path):
        """
        Save model weights to path

        Args:
            path (str): path to weights file
        """
        self._model.save_params(path)

    def get_model(self):
        """
        Get model

        Returns:
            Neon model object
        """
        return self._model
示例#20
0
文件: train.py 项目: thouis/go_policy
                     validation=False,
                     remove_history=False,
                     minimal_set=False,
                     next_N=3)
valid = HDF5Iterator(filenames,
                     ndata=(16 * 2014),
                     validation=True,
                     remove_history=False,
                     minimal_set=False,
                     next_N=1)

out1, out2, out3 = model.layers.get_terminal()

cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)),
                        GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)),
                        GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))])

schedule = ExpSchedule(decay=(1.0 / 50))  # halve the learning rate every 50 epochs
opt_gdm = GradientDescentMomentum(learning_rate=0.01,
                                  momentum_coef=0.9,
                                  stochastic_round=args.rounding,
                                  gradient_clip_value=1,
                                  gradient_clip_norm=5,
                                  wdecay=0.0001,
                                  schedule=schedule)

callbacks = Callbacks(model, eval_set=valid, metric=TopKMisclassification(5), **args.callback_args)
callbacks.add_save_best_state_callback(os.path.join(args.workspace_dir, "best_state_h5resnet.pkl"))
model.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
model.save_params(os.path.join(args.workspace_dir, "final_state_h5resnet.pkl"))
示例#21
0
]
model = Model(layers=layers)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

# define optimizer
opt_w = GradientDescentMomentum(learning_rate=0.01,
                                momentum_coef=0.9,
                                wdecay=0.0005)
opt_b = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9)
opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b}, name='multiopt')

# configure callbacks
callbacks = Callbacks(model,
                      eval_set=valid_set,
                      metric=Misclassification(),
                      **args.callback_args)
callbacks.add_callback(
    TrainByStageCallback(model, valid_set, Misclassification(),
                         max_patience=5))

logger.info('Training ...')
model.fit(train_set,
          optimizer=opt,
          num_epochs=250,
          cost=cost,
          callbacks=callbacks)
print('Accuracy = %.2f%%' %
      (100. - model.eval(valid_set, metric=Misclassification()) * 100))

model.save_params('./models/mnist/mnist_cnn.pkl')
示例#22
0
mlp = Model(layers=layers)
callbacks = Callbacks(mlp, eval_set=test)  # Track cost function

# Train model
mlp.fit(train,
        optimizer=opt_gdm,
        num_epochs=num_epochs,
        cost=cost,
        callbacks=callbacks)

# Check performance
print 'Misclassification error = %.1f%%' % (
    mlp.eval(test, metric=Misclassification()) * 100)

# Save trained model
mlp.save_params(param_file_name)

# Sanity check 1
from PIL import Image
import numpy as np
from neon.data.dataiterator import ArrayIterator

W = img_size
H = img_size
L = W * H * 3
size = H, W
x_new = np.zeros((128, L), dtype=np.float32)


def load_sample(test_file_name):
    image = Image.open(test_file_name)
示例#23
0
lunaModel = Model(layers=vgg_layers)

if args.model_file:
    import os
    assert os.path.exists(args.model_file), '%s not found' % args.model_file
    lunaModel.load_params(args.model_file)

# configure callbacks
#callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args)
callbacks = Callbacks(lunaModel, eval_set=valid_set, metric=Misclassification(), **args.callback_args)

if args.deconv:
    callbacks.add_deconv_callback(train_set, valid_set)

lunaModel.fit(train_set, optimizer=opt, num_epochs=num_epochs,
        cost=cost, callbacks=callbacks)

lunaModel.save_params('LUNA16_VGG_model_no_batch.prm')

# neon_logger.display('Finished training. Calculating error on the validation set...')
# neon_logger.display('Misclassification error (validation) = {:.2f}%'.format(lunaModel.eval(valid_set, metric=Misclassification())[0] * 100))

# neon_logger.display('Precision/recall (validation) = {}'.format(lunaModel.eval(valid_set, metric=PrecisionRecall(num_classes=2))))

# neon_logger.display('Calculating metrics on the test set. This could take a while...')
# neon_logger.display('Misclassification error (test) = {:.2f}%'.format(lunaModel.eval(test_set, metric=Misclassification())[0] * 100))

# neon_logger.display('Precision/recall (test) = {}'.format(lunaModel.eval(test_set, metric=PrecisionRecall(num_classes=2))))


示例#24
0
class DeepQNetwork:
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error
    self.min_reward = args.min_reward
    self.max_reward = args.max_reward
    self.batch_norm = args.batch_norm

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.input = self.be.empty(self.input_shape)
    self.input.lshape = self.input_shape # HACK: needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    # Bug fix
    for l in self.model.layers.layers:
      l.parallelism = 'Disabled'
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate,
          decay_rate = args.decay_rate,
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate,
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate,
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      # Bug fix
      for l in self.target_model.layers.layers:
        l.parallelism = 'Disabled'
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

    self.callback = None

  def _createLayers(self, num_actions):
    # create network
    init_norm = Gaussian(loc=0.0, scale=0.01)
    layers = []
    # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
    layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
    layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
    layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # The final hidden layer is fully-connected and consists of 512 rectifier units.
    layers.append(Affine(nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # The output layer is a fully-connected linear layer with a single output for each valid action.
    layers.append(Affine(nout=num_actions, init = init_norm))
    return layers

  def _setInput(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states, axes = (1, 2, 3, 0))
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    self.be.divide(self.input, 255, self.input)

  def train(self, minibatch, epoch):
    # expand components of minibatch
    prestates, actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 4
    assert len(poststates.shape) == 4
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]

    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # have to serialize also states for batch normalization to work
      pdict = self.model.get_description(get_weights=True, keep_states=True)
      self.target_model.deserialize(pdict, load_states=True)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    maxpostq = self.be.max(postq, axis=0).asnumpyarray()
    assert maxpostq.shape == (1, self.batch_size)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    # It seems neccessary for cpu backend.
    targets = preq.asnumpyarray().copy()

    # clip rewards between -1 and 1
    rewards = np.clip(rewards, self.min_reward, self.max_reward)

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      if terminals[i]:
        targets[action, i] = float(rewards[i])
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

    # calculate statistics
    if self.callback:
      self.callback.on_train(cost[0,0])

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_params(load_path)

  def save_weights(self, save_path):
    self.model.save_params(save_path)
示例#25
0
train = HDF5Iterator(filenames,
                     [h['X'] for h in h5s],
                     [h['y'] for h in h5s],
                     ndata=(256 * 1024),
                     validation=False,
                     remove_history=True)
valid = HDF5Iterator(filenames,
                     [h['X'] for h in h5s],
                     [h['y'] for h in h5s],
                     ndata=1024,
                     validation=True,
                     remove_history=True)

cost = GeneralizedCost(costfunc=CrossEntropyBinary())
opt_gdm = GradientDescentMomentum(learning_rate=0.01,
                                  momentum_coef=0.9,
                                  stochastic_round=args.rounding)

callbacks = Callbacks(model, eval_set=valid, metric=TopKMisclassification(5), **args.callback_args)

old_params = get_model_params(args.server_address)
num_iterations = 1
while True:
    update_model(model, old_params)
    model.fit(train, optimizer=opt_gdm, num_epochs=1, cost=cost, callbacks=callbacks)
    model.save_params(os.path.join(args.workspace_dir, "iter_{}.pkl".format(num_iterations)))

    deltas = compute_deltas(old_params, model)
    old_params = put_deltas(args.server_address, deltas)
    num_iterations += 1
示例#26
0
class DeepQNetwork:
    def __init__(self, num_actions, args):
        # remember parameters
        self.num_actions = num_actions
        self.batch_size = args.batch_size
        self.discount_rate = args.discount_rate
        self.history_length = args.history_length
        self.screen_dim = (args.screen_height, args.screen_width)
        self.clip_error = args.clip_error
        self.min_reward = args.min_reward
        self.max_reward = args.max_reward
        self.batch_norm = args.batch_norm

        # create Neon backend
        self.be = gen_backend(backend=args.backend,
                              batch_size=args.batch_size,
                              rng_seed=args.random_seed,
                              device_id=args.device_id,
                              datatype=np.dtype(args.datatype).type,
                              stochastic_round=args.stochastic_round)

        # prepare tensors once and reuse them
        self.input_shape = (self.history_length, ) + self.screen_dim + (
            self.batch_size, )
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        # create model
        layers = self._createLayers(num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost)
        if args.optimizer == 'rmsprop':
            self.optimizer = RMSProp(learning_rate=args.learning_rate,
                                     decay_rate=args.decay_rate,
                                     stochastic_round=args.stochastic_round)
        elif args.optimizer == 'adam':
            self.optimizer = Adam(learning_rate=args.learning_rate,
                                  stochastic_round=args.stochastic_round)
        elif args.optimizer == 'adadelta':
            self.optimizer = Adadelta(decay=args.decay_rate,
                                      stochastic_round=args.stochastic_round)
        else:
            assert false, "Unknown optimizer"

        # create target model
        self.target_steps = args.target_steps
        self.train_iterations = 0
        if self.target_steps:
            self.target_model = Model(layers=self._createLayers(num_actions))
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
            self.save_weights_prefix = args.save_weights_prefix
        else:
            self.target_model = self.model

        self.callback = None

    def _createLayers(self, num_actions):
        # create network
        init_norm = Gaussian(loc=0.0, scale=0.01)
        layers = []
        # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
        layers.append(
            Conv((8, 8, 32),
                 strides=4,
                 init=init_norm,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
        layers.append(
            Conv((4, 4, 64),
                 strides=2,
                 init=init_norm,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
        layers.append(
            Conv((3, 3, 64),
                 strides=1,
                 init=init_norm,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        # The final hidden layer is fully-connected and consists of 512 rectifier units.
        layers.append(
            Affine(nout=512,
                   init=init_norm,
                   activation=Rectlin(),
                   batch_norm=self.batch_norm))
        # The output layer is a fully-connected linear layer with a single output for each valid action.
        layers.append(Affine(nout=num_actions, init=init_norm))
        return layers

    def _setInput(self, states):
        # change order of axes to match what Neon expects
        states = np.transpose(states, axes=(1, 2, 3, 0))
        # copy() shouldn't be necessary here, but Neon doesn't work otherwise
        self.input.set(states.copy())
        # normalize network input between 0 and 1
        self.be.divide(self.input, 255, self.input)

    def train(self, minibatch, epoch):
        # expand components of minibatch
        prestates, actions, rewards, poststates, terminals = minibatch
        assert len(prestates.shape) == 4
        assert len(poststates.shape) == 4
        assert len(actions.shape) == 1
        assert len(rewards.shape) == 1
        assert len(terminals.shape) == 1
        assert prestates.shape == poststates.shape
        assert prestates.shape[0] == actions.shape[0] == rewards.shape[
            0] == poststates.shape[0] == terminals.shape[0]

        if self.target_steps and self.train_iterations % self.target_steps == 0:
            # have to serialize also states for batch normalization to work
            pdict = self.model.get_description(get_weights=True,
                                               keep_states=True)
            self.target_model.deserialize(pdict, load_states=True)

        # feed-forward pass for poststates to get Q-values
        self._setInput(poststates)
        postq = self.target_model.fprop(self.input, inference=True)
        assert postq.shape == (self.num_actions, self.batch_size)

        # calculate max Q-value for each poststate
        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)

        # feed-forward pass for prestates
        self._setInput(prestates)
        preq = self.model.fprop(self.input, inference=False)
        assert preq.shape == (self.num_actions, self.batch_size)

        # make copy of prestate Q-values as targets
        targets = preq.asnumpyarray()

        # clip rewards between -1 and 1
        rewards = np.clip(rewards, self.min_reward, self.max_reward)

        # update Q-value targets for actions taken
        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(
                    rewards[i]) + self.discount_rate * maxpostq[0, i]

        # copy targets to GPU memory
        self.targets.set(targets)

        # calculate errors
        deltas = self.cost.get_errors(preq, self.targets)
        assert deltas.shape == (self.num_actions, self.batch_size)
        #assert np.count_nonzero(deltas.asnumpyarray()) == 32

        # calculate cost, just in case
        cost = self.cost.get_cost(preq, self.targets)
        assert cost.shape == (1, 1)

        # clip errors
        if self.clip_error:
            self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas)

        # perform back-propagation of gradients
        self.model.bprop(deltas)

        # perform optimization
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)

        # increase number of weight updates (needed for target clone interval)
        self.train_iterations += 1

        # calculate statistics
        if self.callback:
            self.callback.on_train(cost.asnumpyarray()[0, 0])

    def predict(self, states):
        # minibatch is full size, because Neon doesn't let change the minibatch size
        assert states.shape == ((
            self.batch_size,
            self.history_length,
        ) + self.screen_dim)

        # calculate Q-values for the states
        self._setInput(states)
        qvalues = self.model.fprop(self.input, inference=True)
        assert qvalues.shape == (self.num_actions, self.batch_size)
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:, 0]))

        # transpose the result, so that batch size is first dimension
        return qvalues.T.asnumpyarray()

    def load_weights(self, load_path):
        self.model.load_params(load_path)

    def save_weights(self, save_path):
        self.model.save_params(save_path)
示例#27
0
lunaModel = Model(layers=vgg_layers)

if args.model_file:
    import os
    assert os.path.exists(args.model_file), '%s not found' % args.model_file
    lunaModel.load_params(args.model_file)

# configure callbacks
#callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args)
callbacks = Callbacks(lunaModel, eval_set=valid_set, metric=Misclassification(), **args.callback_args)

if args.deconv:
    callbacks.add_deconv_callback(train_set, valid_set)

lunaModel.fit(train_set, optimizer=opt, num_epochs=num_epochs,
        cost=cost, callbacks=callbacks)

lunaModel.save_params('LUNA16_VGG_model.prm')

neon_logger.display('Finished training. Calculating error on the validation set...')
neon_logger.display('Misclassification error (validation) = {:.2f}%'.format(lunaModel.eval(valid_set, metric=Misclassification())[0] * 100))

neon_logger.display('Precision/recall (validation) = {}'.format(lunaModel.eval(valid_set, metric=PrecisionRecall(num_classes=2))))

neon_logger.display('Calculating metrics on the test set. This could take a while...')
neon_logger.display('Misclassification error (test) = {:.2f}%'.format(lunaModel.eval(test_set, metric=Misclassification())[0] * 100))

neon_logger.display('Precision/recall (test) = {}'.format(lunaModel.eval(test_set, metric=PrecisionRecall(num_classes=2))))

示例#28
0
def train_mlp():
	"""
	Train data and save scaling and network weights and biases to file
	to be used by forward prop phase on test data
	"""
	parser = NeonArgparser(__doc__)
	
	args = parser.parse_args()
	
	logger = logging.getLogger()
	logger.setLevel(args.log_thresh)
	
	# hyperparameters
	num_epochs = args.epochs
	
	#preprocessor
	std_scale = preprocessing.StandardScaler(with_mean=True,with_std=True)
	#std_scale = feature_scaler(type='Standardizer',with_mean=True,with_std=True)
	
	#number of non one-hot encoded features, including ground truth
	num_feat = 4
	
	# load up the mnist data set
	# split into train and tests sets
	#load data from csv-files and rescale
	#training
	traindf = pd.DataFrame.from_csv('data/train.csv')
	ncols = traindf.shape[1]
	
	#tmpmat=std_scale.fit_transform(traindf.as_matrix())
	#print std_scale.scale_
	#print std_scale.mean_
	
	tmpmat = traindf.as_matrix()
	#print tmpmat[:,1:num_feat]
	
	tmpmat[:,:num_feat] = std_scale.fit_transform(tmpmat[:,:num_feat])
	X_train = tmpmat[:,1:]
	y_train = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1))
	
	#validation
	validdf = pd.DataFrame.from_csv('data/validate.csv')
	ncols = validdf.shape[1]
	tmpmat = validdf.as_matrix()
	tmpmat[:,:num_feat] = std_scale.transform(tmpmat[:,:num_feat])
	X_valid = tmpmat[:,1:]
	y_valid = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1))
	
	#test
	testdf = pd.DataFrame.from_csv('data/test.csv')
	ncols = testdf.shape[1]
	tmpmat = testdf.as_matrix()
	tmpmat[:,:num_feat] = std_scale.transform(tmpmat[:,:num_feat])
	X_test = tmpmat[:,1:]
	y_test = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1))
	
	# setup a training set iterator
	train_set = CustomDataIterator(X_train, lshape=(X_train.shape[1]), y_c=y_train)
	# setup a validation data set iterator
	valid_set = CustomDataIterator(X_valid, lshape=(X_valid.shape[1]), y_c=y_valid)
	# setup a validation data set iterator
	test_set = CustomDataIterator(X_test, lshape=(X_test.shape[1]), y_c=y_test)
	
	# setup weight initialization function
	init_norm = Xavier()
	
	# setup model layers
	layers = [Affine(nout=X_train.shape[1], init=init_norm, activation=Rectlin()),
	          Dropout(keep=0.5),
	          Affine(nout=X_train.shape[1]/2, init=init_norm, activation=Rectlin()),
			  Linear(nout=1, init=init_norm)]
	
	# setup cost function as CrossEntropy
	cost = GeneralizedCost(costfunc=SmoothL1Loss())
	
	# setup optimizer
	#schedule
	#schedule = ExpSchedule(decay=0.3)
	#optimizer = GradientDescentMomentum(0.0001, momentum_coef=0.9, stochastic_round=args.rounding, schedule=schedule)
	optimizer = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1.e-8)
	
	# initialize model object
	mlp = Model(layers=layers)
	
	# configure callbacks
	if args.callback_args['eval_freq'] is None:
		args.callback_args['eval_freq'] = 1
	
	# configure callbacks
	callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args)
	
	callbacks.add_early_stop_callback(stop_func)
	callbacks.add_save_best_state_callback(os.path.join(args.data_dir, "early_stop-best_state.pkl"))
	
	# run fit
	mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
	
	#evaluate model
	print('Evaluation Error = %.4f'%(mlp.eval(valid_set, metric=SmoothL1Metric())))
	print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric())))
	
	# Saving the model
	print 'Saving model parameters!'
	mlp.save_params("model/homeapp_model.prm")
	
	# Reloading saved model
	# This should go in run.py
	mlp=Model("model/homeapp_model.prm")
	print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric())))
	
	# save the preprocessor vectors:
	np.savez("model/homeapp_preproc", mean=std_scale.mean_, std=std_scale.scale_)

	return 1 
示例#29
0
class DQNNeon(Learner):
    """ This class is an implementation of the DQN network based on Neon.

    The modules that interact with the agent, the replay memory and the
    statistic calls are implemented here, taking the individual requirements
    of the Lasagne framework into account. The code is adapted from:
    https://github.com/tambetm/simple_dqn

    Attributes:
        input_shape (tuple[int]): Dimension of the network input.
        dummy_batch (numpy.ndarray): Dummy batche used to calculate Q-values for single states.
        batch_norm (bool): Indicates if normalization is wanted for a certain layer (default=False).
        be (neon.backends.nervanagpu.NervanaGPU): Describes the backend for the Neon implementation.
        input (neon.backends.nervanagpu.GPUTensor): Definition of network input shape.
        targets(neon.backends.nervanagpu.GPUTensor): Definition of network output shape.
        model (neon.models.model.Model): Generated Neon model.
        target_model (neon.models.model.Model): Generated target Neon model.
        cost_func (neon.layers.layer.GeneralizedCost): Cost function for model training.
        callback (Statistics): Hook for the statistics object to pass train and test information.

    Note:
        More attributes of this class are defined in the base class Learner.
    """

    def __init__(self, env, args, rng, name = "DQNNeon"):
        """ Initializes a network based on the Neon framework.

        Args:
            env (AtariEnv): The envirnoment in which the agent actuates.
            args (argparse.Namespace): All settings either with a default value or set via command line arguments.
            rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator.
            name (str): The name of the network object.

        Note:
            This function should always call the base class first to initialize
            the common values for the networks.
        """
        _logger.info("Initializing new object of type " + str(type(self).__name__))
        super(DQNNeon, self).__init__(env, args, rng, name)
        self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,)
        self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8)
        self.batch_norm = args.batch_norm

        self.be = gen_backend(
                backend = args.backend,
                batch_size = args.batch_size,
                rng_seed = args.random_seed,
                device_id = args.device_id,
                datatype = np.dtype(args.datatype).type,
                stochastic_round = args.stochastic_round)

        # prepare tensors once and reuse them
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.output_shape, self.batch_size))

        # create model
        layers = self._create_layer()
        self.model = Model(layers = layers)
        self.cost_func = GeneralizedCost(costfunc = SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost_func)

        self._set_optimizer()

        if not self.args.load_weights == None:
            self.load_weights(self.args.load_weights)

        # create target model
        if self.target_update_frequency:
            layers = self._create_layer()
            self.target_model = Model(layers)
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
        else:
            self.target_model = self.model

        self.callback = None
        _logger.debug("%s" % self)

    def _create_layer(self):
        """ Build a network consistent with the DeepMind Nature paper. """
        _logger.debug("Output shape = %d" % self.output_shape)
        # create network
        init_norm = Gaussian(loc=0.0, scale=0.01)
        layers = []
        # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
        layers.append(
                Conv((8, 8, 32),
                strides=4,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
        layers.append(
                Conv((4, 4, 64),
                strides=2,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
        layers.append(
                Conv((3, 3, 64),
                strides=1,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # The final hidden layer is fully-connected and consists of 512 rectifier units.
        layers.append(
                Affine(
                    nout=512,
                    init=init_norm,
                    activation=Rectlin(),
                    batch_norm=self.batch_norm))
        # The output layer is a fully-connected linear layer with a single output for each valid action.
        layers.append(
                Affine(
                    nout= self.output_shape,
                    init = init_norm))
        return layers

    def _set_optimizer(self):
        """ Initializes the selected optimization algorithm. """
        _logger.debug("Optimizer = %s" % str(self.args.optimizer))
        if self.args.optimizer == 'rmsprop':
            self.optimizer = RMSProp(
                    learning_rate = self.args.learning_rate,
                    decay_rate = self.args.decay_rate,
                    stochastic_round = self.args.stochastic_round)
        elif self.args.optimizer == 'adam':
            self.optimizer = Adam(
                    learning_rate = self.args.learning_rate,
                    stochastic_round = self.args.stochastic_round)
        elif self.args.optimizer == 'adadelta':
            self.optimizer = Adadelta(
                    decay = self.args.decay_rate,
                    stochastic_round = self.args.stochastic_round)
        else:
            assert false, "Unknown optimizer"

    def _prepare_network_input(self, states):
        """ Transforms and normalizes the states from one minibatch.

        Args:
            states (): a set of states with the size of minibatch
        """
        _logger.debug("Normalizing and transforming input")
        # change order of axes to match what Neon expects
        states = np.transpose(states, axes = (1, 2, 3, 0))
        # copy() shouldn't be necessary here, but Neon doesn't work otherwise
        self.input.set(states.copy())
        # normalize network input between 0 and 1
        self.be.divide(self.input, self.grayscales, self.input)

    def train(self, minibatch, epoch):
        """ Prepare, perform and document a complete train step for one minibatch.

        Args:
            minibatch (numpy.ndarray): Mini-batch of states, shape=(batch_size,sequence_length,frame_width,frame_height)
            epoch (int): Current train epoch
        """
        _logger.debug("Complete trainig step for one minibatch")
        prestates, actions, rewards, poststates, terminals = minibatch
        assert len(prestates.shape) == 4
        assert len(poststates.shape) == 4
        assert len(actions.shape) == 1
        assert len(rewards.shape) == 1
        assert len(terminals.shape) == 1
        assert prestates.shape == poststates.shape
        assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
        # feed-forward pass for poststates to get Q-values
        self._prepare_network_input(poststates)
        postq = self.target_model.fprop(self.input, inference = True)
        assert postq.shape == (self.output_shape, self.batch_size)
        # calculate max Q-value for each poststate
        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)
        # average maxpostq for stats
        maxpostq_avg = maxpostq.mean()
        # feed-forward pass for prestates
        self._prepare_network_input(prestates)
        preq = self.model.fprop(self.input, inference = False)
        assert preq.shape == (self.output_shape, self.batch_size)
        # make copy of prestate Q-values as targets
        targets = preq.asnumpyarray()
        # clip rewards between -1 and 1
        rewards = np.clip(rewards, self.min_reward, self.max_reward)
        # update Q-value targets for each state only at actions taken
        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
        # copy targets to GPU memory
        self.targets.set(targets)
        # calculate errors
        errors = self.cost_func.get_errors(preq, self.targets)
        assert errors.shape == (self.output_shape, self.batch_size)
        # average error where there is a error (should be 1 in every row)
        #TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.])
        # clip errors
        if self.clip_error:
            self.be.clip(errors, -self.clip_error, self.clip_error, out = errors)
        # calculate cost, just in case
        cost = self.cost_func.get_cost(preq, self.targets)
        assert cost.shape == (1,1)
        # perform back-propagation of gradients
        self.model.bprop(errors)
        # perform optimization
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)
        # increase number of weight updates (needed for target clone interval)
        self.update_iterations += 1
        if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0:
            self._copy_theta()
            _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg)))
        # update statistics
        if self.callback:
            self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg)

    def get_Q(self, state):
        """ Calculates the Q-values for one mini-batch.

        Args:
            state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height).

        Returns:
            q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,)
        """
        _logger.debug("State shape = %s" % str(state.shape))
        # minibatch is full size, because Neon doesn't let change the minibatch size
        # so we need to run 32 forward steps to get the one we actually want
        self.dummy_batch[0] = state
        states = self.dummy_batch
        assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims)
        # calculate Q-values for the states
        self._prepare_network_input(states)
        qvalues = self.model.fprop(self.input, inference = True)
        assert qvalues.shape == (self.output_shape, self.batch_size)
        _logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0])))
        return qvalues.asnumpyarray()[:,0]

    def _copy_theta(self):
        """ Copies the weights of the current network to the target network. """
        _logger.debug("Copying weights")
        pdict = self.model.get_description(get_weights=True, keep_states=True)
        self.target_model.deserialize(pdict, load_states=True)

    def save_weights(self, target_dir, epoch):
        """ Saves the current network parameters to disk.

        Args:
            target_dir (str): Directory where the network parameters are stored for each episode.
            epoch (int): Current epoch.
        """
        filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.net_type.lower()), str(self.args.optimizer.lower()), (epoch + 1))
        self.model.save_params(os.path.join(target_dir, filename))

    def load_weights(self, source_file):
        """ Loads the network parameters from a given file.

        Args:
            source_file (str): Complete path to a file with network parameters.
        """
        self.model.load_params(source_file)
示例#30
0
def test_model_serialize(backend_default, data):
    dataset = MNIST(path=data)
    (X_train, y_train), (X_test, y_test), nclass = dataset.load_data()
    train_set = ArrayIterator(
        [X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = Sequential([Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()),
                        Pooling(2),
                        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())])
    path2 = Sequential([Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()),
                        Dropout(keep=0.5),
                        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())])
    layers = [MergeMultistream(layers=[path1, path2], merge="stack"),
              Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()),
              Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    mlp.initialize(train_set, cost=mlp.cost)
    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    mlp.save_params(tmp_save, keep_states=True)

    # Load model
    mlp = Model(tmp_save)

    mlp.initialize(train_set)
    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert allclose_with_out(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert allclose_with_out(_s, _s_e)
            else:
                assert allclose_with_out(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            assert type(p) == type(p_e)
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert allclose_with_out(_p, _p_e)
            elif isinstance(p, np.ndarray):
                assert allclose_with_out(p, p_e)
            else:
                assert p == p_e

    os.remove(tmp_save)
示例#31
0
class ModelRunnerNeon():
    def __init__(self, args,  max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem
        
        self.be = gen_backend(backend='gpu',             
                         batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)
        
        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':        # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                            beta_2=args.rms_decay,
                                            learning_rate=args.learning_rate)
        else:		# Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                            learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True

    def get_initializer(self, input_size):
        dnnInit = self.args.dnn_initializer
        if dnnInit == 'xavier':
            initializer = Xavier()
        elif dnnInit == 'fan_in':
            std_dev = 1.0 / math.sqrt(input_size)
            initializer = Uniform(low=-std_dev, high=std_dev)
        else:
            initializer = Gaussian(0, 0.01)
        return initializer
            
    def create_layers(self, max_action_no):
        layers = []

        initializer = self.get_initializer(input_size = 4 * 8 * 8)
        layers.append(Conv(fshape=(8, 8, 32), strides=4, init=initializer, bias=initializer, activation=Rectlin()))

        initializer = self.get_initializer(input_size = 32 * 4 * 4)
        layers.append(Conv(fshape=(4, 4, 64), strides=2, init=initializer, bias=initializer, activation=Rectlin()))
        
        initializer = self.get_initializer(input_size = 64 * 3 * 3)
        layers.append(Conv(fshape=(3, 3, 64), strides=1, init=initializer, bias=initializer, activation=Rectlin()))
        
        initializer = self.get_initializer(input_size = 7 * 7 * 64)
        layers.append(Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin()))
        
        initializer = self.get_initializer(input_size = 512)
        layers.append(Affine(nout=max_action_no, init=initializer, bias=initializer))
        
        return layers        
        
    def clip_reward(self, reward):
        if reward > self.args.clip_reward_high:
            return self.args.clip_reward_high
        elif reward < self.args.clip_reward_low:
            return self.args.clip_reward_low
        else:
            return reward

    def set_input(self, data):
        if self.use_gpu_replay_mem:
            self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0))
            self.input[:] = self.input_uint8 / 255
        else:
            self.input.set(data.transpose(1, 2, 3, 0).copy())
            self.be.divide(self.input, 255, self.input)

    def predict(self, history_buffer):
        self.set_input(history_buffer)
        output  = self.train_net.fprop(self.input, inference=True)
        return output.T.asnumpyarray()[0]            

    def print_weights(self):
        pass

    def train(self, minibatch, replay_memory, learning_rate, debug):
        if self.args.prioritized_replay == True:
            prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch
        else:
            prestates, actions, rewards, poststates, terminals = minibatch
        
        # Get Q*(s, a) with targetNet
        self.set_input(poststates)
        post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray()
        
        if self.args.double_dqn == True:
            # Get Q*(s, a) with trainNet
            post_qvalue2 = self.train_net.fprop(self.input, inference=True).T.asnumpyarray()
        
        # Get Q(s, a) with trainNet
        self.set_input(prestates)
        pre_qvalue = self.train_net.fprop(self.input, inference=False)
        
        label = pre_qvalue.asnumpyarray().copy()
        for i in range(0, self.train_batch_size):
            if self.args.clip_reward:
                reward = self.clip_reward(rewards[i])
            else:
                reward = rewards[i]
            if terminals[i]:
                label[actions[i], i] = reward
            else:
                if self.args.double_dqn == True:
                    max_index = np.argmax(post_qvalue2[i])
                    label[actions[i], i] = reward + self.discount_factor* post_qvalue[i][max_index]
                else:
                    label[actions[i], i] = reward + self.discount_factor* np.max(post_qvalue[i])

        # copy targets to GPU memory
        self.targets.set(label)
    
        delta = self.cost.get_errors(pre_qvalue, self.targets)
        
        if self.args.prioritized_replay == True:
            delta_value = delta.asnumpyarray()
            for i in range(self.train_batch_size):
                if debug:
                    print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i]) 
                replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i]))
                delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i]
            delta.set(delta_value.copy())
          
        if self.args.clip_loss:
            self.be.clip(delta, -1.0, 1.0, out = delta)
                
        self.train_net.bprop(delta)
        self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0)

    def update_model(self):
        # have to serialize also states for batch normalization to work
        pdict = self.train_net.get_description(get_weights=True, keep_states=True)
        self.target_net.deserialize(pdict, load_states=True)
        #print ('Updated target model')

    def finish_train(self):
        self.running = False
    
    def load(self, file_name):
        self.train_net.load_params(file_name)
        self.update_model()
        
    def save(self, file_name):
        self.train_net.save_params(file_name)