Exemplo n.º 1
0
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error
    self.min_reward = args.min_reward
    self.max_reward = args.max_reward
    self.batch_norm = args.batch_norm

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.input = self.be.empty(self.input_shape)
    self.input.lshape = self.input_shape # HACK: needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    # Bug fix
    for l in self.model.layers.layers:
      l.parallelism = 'Disabled'
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.train_iterations = 0
    if args.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      # Bug fix
      for l in self.target_model.layers.layers:
        l.parallelism = 'Disabled'
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

    self.callback = None
Exemplo n.º 2
0
  def __init__(self, state_size, num_steers, num_speeds, args):
    # remember parameters
    self.state_size = state_size
    self.num_steers = num_steers
    self.num_speeds = num_speeds
    self.num_actions = num_steers + num_speeds
    self.num_layers = args.hidden_layers
    self.hidden_nodes = args.hidden_nodes
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    self.model = Model(layers = self._createLayers())
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers())
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model
Exemplo n.º 3
0
def run(train, test):
    init = Gaussian(scale=0.01)
    layers = [
        Conv((3, 3, 128),
             init=init,
             activation=Rectlin(),
             strides=dict(str_h=1, str_w=2)),
        Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()),
        Pooling(2, strides=2),
        Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()),
        DeepBiRNN(256,
                  init=init,
                  activation=Rectlin(),
                  reset_cells=True,
                  depth=3),
        RecurrentLast(),
        Affine(32, init=init, batch_norm=True, activation=Rectlin()),
        Affine(nout=common['nclasses'], init=init, activation=Softmax())
    ]

    model = Model(layers=layers)
    opt = Adadelta()
    metric = Misclassification()
    callbacks = Callbacks(model,
                          eval_set=test,
                          metric=metric,
                          **args.callback_args)
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())

    model.fit(train,
              optimizer=opt,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)
    return model
Exemplo n.º 4
0
def test_adadelta_wclip(backend_default):
    wclip = 0.5
    ada = Adadelta(param_clip_value=wclip)
    param = np.random.rand(200, 128)
    param2 = copy.deepcopy(param)
    grad = 0.01 * np.random.rand(200, 128)
    grad2 = grad / 128.
    states = [
        0.01 * np.random.rand(200, 128), 0.01 * np.random.rand(200, 128),
        0.01 * np.random.rand(200, 128)
    ]
    states2 = [
        copy.deepcopy(states[0]),
        copy.deepcopy(states[1]),
        copy.deepcopy(states[2])
    ]
    decay = ada.decay
    states2[0][:] = states2[0] * decay + (1. - decay) * grad2 * grad2
    states2[2][:] = np.sqrt(
        (states2[1] + float(ada.epsilon)) / (states2[0] + ada.epsilon)) * grad2
    states2[1][:] = states2[1] * decay + (1. - decay) * states2[2] * states2[2]
    param2[:] -= states2[2]
    np.clip(param2, -wclip, wclip, param2)
    param_list = [((wrap(param), wrap(grad)),
                   [wrap(states[0]),
                    wrap(states[1]),
                    wrap(states[2])])]
    compare_tensors(ada, param_list, param2, tol=1e-7)
Exemplo n.º 5
0
def main():
    # parse the command line arguments
    parser = NeonArgparser(__doc__)

    args = parser.parse_args()

    logger = logging.getLogger()
    logger.setLevel(args.log_thresh)

    #Set up batch iterator for training images
    train = ImgMaster(repo_dir='dataTmp_optFlow_BW',
                      set_name='train',
                      inner_size=240,
                      subset_pct=100)
    val = ImgMaster(repo_dir='dataTmp_optFlow_BW',
                    set_name='train',
                    inner_size=240,
                    subset_pct=100,
                    do_transforms=False)
    test = ImgMaster(repo_dir='dataTestTmp_optFlow_BW',
                     set_name='train',
                     inner_size=240,
                     subset_pct=100,
                     do_transforms=False)

    train.init_batch_provider()
    val.init_batch_provider()
    test.init_batch_provider()

    print "Constructing network..."
    #Create AlexNet architecture
    model = constuct_network()

    # Optimzer for model
    opt = Adadelta()

    # configure callbacks
    valmetric = TopKMisclassification(k=5)
    callbacks = Callbacks(model,
                          train,
                          eval_set=test,
                          metric=valmetric,
                          **args.callback_args)

    cost = GeneralizedCost(costfunc=CrossEntropyMulti())

    #flag = input("Press Enter if you want to begin training process.")
    print "Training network..."
    model.fit(train,
              optimizer=opt,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)
    mets = model.eval(test, metric=valmetric)

    print 'Validation set metrics:'
    print 'LogLoss: %.2f, Accuracy: %.1f %%0 (Top-1), %.1f %% (Top-5)' % (
        mets[0], (1.0 - mets[1]) * 100, (1.0 - mets[2]) * 100)
    return
Exemplo n.º 6
0
 def _set_optimizer(self):
     """ Initializes the selected optimization algorithm. """
     _logger.debug("Optimizer = %s" % str(self.args.optimizer))
     if self.args.optimizer == 'rmsprop':
         self.optimizer = RMSProp(
                 learning_rate = self.args.learning_rate,
                 decay_rate = self.args.decay_rate,
                 stochastic_round = self.args.stochastic_round)
     elif self.args.optimizer == 'adam':
         self.optimizer = Adam(
                 learning_rate = self.args.learning_rate,
                 stochastic_round = self.args.stochastic_round)
     elif self.args.optimizer == 'adadelta':
         self.optimizer = Adadelta(
                 decay = self.args.decay_rate,
                 stochastic_round = self.args.stochastic_round)
     else:
         assert false, "Unknown optimizer"
Exemplo n.º 7
0
 def _set_optimizer(self):
     """ Initializes the selected optimization algorithm. """
     _logger.debug("Optimizer = %s" % str(self.args.optimizer))
     if self.args.optimizer == 'rmsprop':
         self.optimizer = RMSProp(
                 learning_rate = self.args.learning_rate,
                 decay_rate = self.args.decay_rate,
                 stochastic_round = self.args.stochastic_round)
     elif self.args.optimizer == 'adam':
         self.optimizer = Adam(
                 learning_rate = self.args.learning_rate,
                 stochastic_round = self.args.stochastic_round)
     elif self.args.optimizer == 'adadelta':
         self.optimizer = Adadelta(
                 decay = self.args.decay_rate,
                 stochastic_round = self.args.stochastic_round)
     else:
         assert false, "Unknown optimizer"
Exemplo n.º 8
0
def test_multi_optimizer(backend_default):
    opt_gdm = GradientDescentMomentum(learning_rate=0.001,
                                      momentum_coef=0.9,
                                      wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64),
              strides=4,
              padding=3,
              init=init_one,
              bias=Constant(0),
              activation=Rectlin())
    l2 = Affine(nout=4096,
                init=init_one,
                bias=Constant(1),
                activation=Rectlin())
    l3 = LSTM(output_size=1000,
              init=init_one,
              activation=Logistic(),
              gate_activation=Tanh())
    l4 = GRU(output_size=100,
             init=init_one,
             activation=Logistic(),
             gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)

    opt = MultiOptimizer({
        'default': opt_gdm,
        'Bias': opt_ada,
        'Convolution': opt_adam,
        'Linear': opt_rms,
        'LSTM': opt_rms_1,
        'GRU': opt_rms_1
    })

    map_list = opt._map_optimizers(layer_list)
    assert map_list[opt_adam][0].__class__.__name__ == 'Convolution'
    assert map_list[opt_ada][0].__class__.__name__ == 'Bias'
    assert map_list[opt_rms][0].__class__.__name__ == 'Linear'
    assert map_list[opt_gdm][0].__class__.__name__ == 'Activation'
    assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Exemplo n.º 9
0
def test_multi_optimizer(backend_default_mkl):
    """
    A test for MultiOptimizer.
    """
    opt_gdm = GradientDescentMomentum(
        learning_rate=0.001, momentum_coef=0.9, wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64), strides=4, padding=3,
              init=init_one, bias=Constant(0), activation=Rectlin())
    l2 = Affine(nout=4096, init=init_one,
                bias=Constant(1), activation=Rectlin())
    l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh())
    l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)
    for l in layer_list:
        l.configure(in_obj=(16, 28, 28))
        l.allocate()
    # separate layer_list into two, the last two recurrent layers and the rest
    layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:]
    opt = MultiOptimizer({'default': opt_gdm,
                          'Bias': opt_ada,
                          'Convolution': opt_adam,
                          'Convolution_bias': opt_adam,
                          'Linear': opt_rms,
                          'LSTM': opt_rms_1,
                          'GRU': opt_rms_1})
    layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)]
    layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)]
    opt.optimize(layers_to_optimize1, 0)
    assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias'
    assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear'
    opt.optimize(layers_to_optimize2, 0)
    assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Exemplo n.º 10
0
def test_adadelta(backend):
    ada = Adadelta()
    param = np.random.rand(200, 128)
    param2 = copy.deepcopy(param)
    grad = 0.01 * np.random.rand(200, 128)
    states = [0.01 * np.random.rand(200, 128),
              0.01 * np.random.rand(200, 128),
              0.01 * np.random.rand(200, 128)]
    states2 = [copy.deepcopy(states[0]),
               copy.deepcopy(states[1]),
               copy.deepcopy(states[2])]
    decay = ada.decay
    states2[0][:] = states2[0] * decay + (1. - decay) * grad * grad
    states2[2][:] = np.sqrt(
        (states2[1] + ada.epsilon) / (states2[0] + ada.epsilon)) * grad
    states2[1][:] = states2[1] * decay + (1. - decay) * states2[2] * states2[2]
    param2[:] -= states2[2]
    param_list = [
        ((wrap(param), wrap(grad)), [wrap(states[0]), wrap(states[1]), wrap(states[2])])]
    compare_tensors(ada, param_list, param2, tol=1e-7)
Exemplo n.º 11
0
layers = [
    Conv((5, 5, 64),
         init=init,
         activation=Rectlin(),
         strides=dict(str_h=2, str_w=4)),
    Pooling(2, strides=2),
    Conv((5, 5, 64),
         init=init,
         batch_norm=True,
         activation=Rectlin(),
         strides=dict(str_h=1, str_w=2)),
    BiRNN(256, init=init, activation=Rectlin(), reset_cells=True),
    RecurrentMean(),
    Affine(128, init=init, batch_norm=True, activation=Rectlin()),
    Affine(nout=common['nclasses'], init=init, activation=Softmax())
]

model = Model(layers=layers)
opt = Adadelta()
metric = Misclassification()
callbacks = Callbacks(model, eval_set=val, metric=metric, **args.callback_args)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.fit(train,
          optimizer=opt,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)
print('Misclassification error = %.1f%%' %
      (model.eval(val, metric=metric) * 100))
Exemplo n.º 12
0
class DQNNeon(Learner):
    """ This class is an implementation of the DQN network based on Neon.

    The modules that interact with the agent, the replay memory and the
    statistic calls are implemented here, taking the individual requirements
    of the Lasagne framework into account. The code is adapted from:
    https://github.com/tambetm/simple_dqn

    Attributes:
        input_shape (tuple[int]): Dimension of the network input.
        dummy_batch (numpy.ndarray): Dummy batche used to calculate Q-values for single states.
        batch_norm (bool): Indicates if normalization is wanted for a certain layer (default=False).
        be (neon.backends.nervanagpu.NervanaGPU): Describes the backend for the Neon implementation.
        input (neon.backends.nervanagpu.GPUTensor): Definition of network input shape.
        targets(neon.backends.nervanagpu.GPUTensor): Definition of network output shape.
        model (neon.models.model.Model): Generated Neon model.
        target_model (neon.models.model.Model): Generated target Neon model.
        cost_func (neon.layers.layer.GeneralizedCost): Cost function for model training.
        callback (Statistics): Hook for the statistics object to pass train and test information.

    Note:
        More attributes of this class are defined in the base class Learner.
    """

    def __init__(self, env, args, rng, name = "DQNNeon"):
        """ Initializes a network based on the Neon framework.

        Args:
            env (AtariEnv): The envirnoment in which the agent actuates.
            args (argparse.Namespace): All settings either with a default value or set via command line arguments.
            rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator.
            name (str): The name of the network object.

        Note:
            This function should always call the base class first to initialize
            the common values for the networks.
        """
        _logger.info("Initializing new object of type " + str(type(self).__name__))
        super(DQNNeon, self).__init__(env, args, rng, name)
        self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,)
        self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8)
        self.batch_norm = args.batch_norm

        self.be = gen_backend(
                backend = args.backend,
                batch_size = args.batch_size,
                rng_seed = args.random_seed,
                device_id = args.device_id,
                datatype = np.dtype(args.datatype).type,
                stochastic_round = args.stochastic_round)

        # prepare tensors once and reuse them
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.output_shape, self.batch_size))

        # create model
        layers = self._create_layer()
        self.model = Model(layers = layers)
        self.cost_func = GeneralizedCost(costfunc = SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost_func)

        self._set_optimizer()

        if not self.args.load_weights == None:
            self.load_weights(self.args.load_weights)

        # create target model
        if self.target_update_frequency:
            layers = self._create_layer()
            self.target_model = Model(layers)
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
        else:
            self.target_model = self.model

        self.callback = None
        _logger.debug("%s" % self)

    def _create_layer(self):
        """ Build a network consistent with the DeepMind Nature paper. """
        _logger.debug("Output shape = %d" % self.output_shape)
        # create network
        init_norm = Gaussian(loc=0.0, scale=0.01)
        layers = []
        # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
        layers.append(
                Conv((8, 8, 32),
                strides=4,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
        layers.append(
                Conv((4, 4, 64),
                strides=2,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
        layers.append(
                Conv((3, 3, 64),
                strides=1,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # The final hidden layer is fully-connected and consists of 512 rectifier units.
        layers.append(
                Affine(
                    nout=512,
                    init=init_norm,
                    activation=Rectlin(),
                    batch_norm=self.batch_norm))
        # The output layer is a fully-connected linear layer with a single output for each valid action.
        layers.append(
                Affine(
                    nout= self.output_shape,
                    init = init_norm))
        return layers

    def _set_optimizer(self):
        """ Initializes the selected optimization algorithm. """
        _logger.debug("Optimizer = %s" % str(self.args.optimizer))
        if self.args.optimizer == 'rmsprop':
            self.optimizer = RMSProp(
                    learning_rate = self.args.learning_rate,
                    decay_rate = self.args.decay_rate,
                    stochastic_round = self.args.stochastic_round)
        elif self.args.optimizer == 'adam':
            self.optimizer = Adam(
                    learning_rate = self.args.learning_rate,
                    stochastic_round = self.args.stochastic_round)
        elif self.args.optimizer == 'adadelta':
            self.optimizer = Adadelta(
                    decay = self.args.decay_rate,
                    stochastic_round = self.args.stochastic_round)
        else:
            assert false, "Unknown optimizer"

    def _prepare_network_input(self, states):
        """ Transforms and normalizes the states from one minibatch.

        Args:
            states (): a set of states with the size of minibatch
        """
        _logger.debug("Normalizing and transforming input")
        # change order of axes to match what Neon expects
        states = np.transpose(states, axes = (1, 2, 3, 0))
        # copy() shouldn't be necessary here, but Neon doesn't work otherwise
        self.input.set(states.copy())
        # normalize network input between 0 and 1
        self.be.divide(self.input, self.grayscales, self.input)

    def train(self, minibatch, epoch):
        """ Prepare, perform and document a complete train step for one minibatch.

        Args:
            minibatch (numpy.ndarray): Mini-batch of states, shape=(batch_size,sequence_length,frame_width,frame_height)
            epoch (int): Current train epoch
        """
        _logger.debug("Complete trainig step for one minibatch")
        prestates, actions, rewards, poststates, terminals = minibatch
        assert len(prestates.shape) == 4
        assert len(poststates.shape) == 4
        assert len(actions.shape) == 1
        assert len(rewards.shape) == 1
        assert len(terminals.shape) == 1
        assert prestates.shape == poststates.shape
        assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
        # feed-forward pass for poststates to get Q-values
        self._prepare_network_input(poststates)
        postq = self.target_model.fprop(self.input, inference = True)
        assert postq.shape == (self.output_shape, self.batch_size)
        # calculate max Q-value for each poststate
        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)
        # average maxpostq for stats
        maxpostq_avg = maxpostq.mean()
        # feed-forward pass for prestates
        self._prepare_network_input(prestates)
        preq = self.model.fprop(self.input, inference = False)
        assert preq.shape == (self.output_shape, self.batch_size)
        # make copy of prestate Q-values as targets
        targets = preq.asnumpyarray()
        # clip rewards between -1 and 1
        rewards = np.clip(rewards, self.min_reward, self.max_reward)
        # update Q-value targets for each state only at actions taken
        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
        # copy targets to GPU memory
        self.targets.set(targets)
        # calculate errors
        errors = self.cost_func.get_errors(preq, self.targets)
        assert errors.shape == (self.output_shape, self.batch_size)
        # average error where there is a error (should be 1 in every row)
        #TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.])
        # clip errors
        if self.clip_error:
            self.be.clip(errors, -self.clip_error, self.clip_error, out = errors)
        # calculate cost, just in case
        cost = self.cost_func.get_cost(preq, self.targets)
        assert cost.shape == (1,1)
        # perform back-propagation of gradients
        self.model.bprop(errors)
        # perform optimization
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)
        # increase number of weight updates (needed for target clone interval)
        self.update_iterations += 1
        if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0:
            self._copy_theta()
            _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg)))
        # update statistics
        if self.callback:
            self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg)

    def get_Q(self, state):
        """ Calculates the Q-values for one mini-batch.

        Args:
            state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height).

        Returns:
            q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,)
        """
        _logger.debug("State shape = %s" % str(state.shape))
        # minibatch is full size, because Neon doesn't let change the minibatch size
        # so we need to run 32 forward steps to get the one we actually want
        self.dummy_batch[0] = state
        states = self.dummy_batch
        assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims)
        # calculate Q-values for the states
        self._prepare_network_input(states)
        qvalues = self.model.fprop(self.input, inference = True)
        assert qvalues.shape == (self.output_shape, self.batch_size)
        _logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0])))
        return qvalues.asnumpyarray()[:,0]

    def _copy_theta(self):
        """ Copies the weights of the current network to the target network. """
        _logger.debug("Copying weights")
        pdict = self.model.get_description(get_weights=True, keep_states=True)
        self.target_model.deserialize(pdict, load_states=True)

    def save_weights(self, target_dir, epoch):
        """ Saves the current network parameters to disk.

        Args:
            target_dir (str): Directory where the network parameters are stored for each episode.
            epoch (int): Current epoch.
        """
        filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.net_type.lower()), str(self.args.optimizer.lower()), (epoch + 1))
        self.model.save_params(os.path.join(target_dir, filename))

    def load_weights(self, source_file):
        """ Loads the network parameters from a given file.

        Args:
            source_file (str): Complete path to a file with network parameters.
        """
        self.model.load_params(source_file)
Exemplo n.º 13
0
class DQNNeon(Learner):
    """ This class is an implementation of the DQN network based on Neon.

    The modules that interact with the agent, the replay memory and the
    statistic calls are implemented here, taking the individual requirements
    of the Lasagne framework into account. The code is adapted from:
    https://github.com/tambetm/simple_dqn

    Attributes:
        input_shape (tuple[int]): Dimension of the network input.
        dummy_batch (numpy.ndarray): Dummy batche used to calculate Q-values for single states.
        batch_norm (bool): Indicates if normalization is wanted for a certain layer (default=False).
        be (neon.backends.nervanagpu.NervanaGPU): Describes the backend for the Neon implementation.
        input (neon.backends.nervanagpu.GPUTensor): Definition of network input shape.
        targets(neon.backends.nervanagpu.GPUTensor): Definition of network output shape.
        model (neon.models.model.Model): Generated Neon model.
        target_model (neon.models.model.Model): Generated target Neon model.
        cost_func (neon.layers.layer.GeneralizedCost): Cost function for model training.
        callback (Statistics): Hook for the statistics object to pass train and test information.

    Note:
        More attributes of this class are defined in the base class Learner.

    """

    def __init__(self, env, args, rng, name = "DQNNeon"):
        """ Initializes a network based on the Neon framework.

        Args:
            env (AtariEnv): The envirnoment in which the agent actuates.
            args (argparse.Namespace): All settings either with a default value or set via command line arguments.
            rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator.
            name (str): The name of the network object.

        Note:
            This function should always call the base class first to initialize
            the common values for the networks.
        """
        _logger.info("Initializing new object of type " + str(type(self).__name__))
        super(DQNNeon, self).__init__(env, args, rng, name)
        self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,)
        self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8)
        self.batch_norm = args.batch_norm

        self.be = gen_backend(
                backend = args.backend,
                batch_size = args.batch_size,
                rng_seed = args.random_seed,
                device_id = args.device_id,
                datatype = np.dtype(args.datatype).type,
                stochastic_round = args.stochastic_round)

        # prepare tensors once and reuse them
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.output_shape, self.batch_size))

        # create model
        layers = self._create_layer()
        self.model = Model(layers = layers)
        self.cost_func = GeneralizedCost(costfunc = SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost_func)

        self._set_optimizer()

        if not self.args.load_weights == None:
            self.load_weights(self.args.load_weights)

        # create target model
        if self.target_update_frequency:
            layers = self._create_layer()
            self.target_model = Model(layers)
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
        else:
            self.target_model = self.model

        self.callback = None
        _logger.debug("%s" % self)

    def _create_layer(self):
        """ Build a network consistent with the DeepMind Nature paper. """
        _logger.debug("Output shape = %d" % self.output_shape)
        # create network
        init_norm = Gaussian(loc=0.0, scale=0.01)
        layers = []
        # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
        layers.append(
                Conv((8, 8, 32),
                strides=4,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
        layers.append(
                Conv((4, 4, 64),
                strides=2,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
        layers.append(
                Conv((3, 3, 64),
                strides=1,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # The final hidden layer is fully-connected and consists of 512 rectifier units.
        layers.append(
                Affine(
                    nout=512,
                    init=init_norm,
                    activation=Rectlin(),
                    batch_norm=self.batch_norm))
        # The output layer is a fully-connected linear layer with a single output for each valid action.
        layers.append(
                Affine(
                    nout= self.output_shape,
                    init = init_norm))
        return layers

    def _set_optimizer(self):
        """ Initializes the selected optimization algorithm. """
        _logger.debug("Optimizer = %s" % str(self.args.optimizer))
        if self.args.optimizer == 'rmsprop':
            self.optimizer = RMSProp(
                    learning_rate = self.args.learning_rate,
                    decay_rate = self.args.decay_rate,
                    stochastic_round = self.args.stochastic_round)
        elif self.args.optimizer == 'adam':
            self.optimizer = Adam(
                    learning_rate = self.args.learning_rate,
                    stochastic_round = self.args.stochastic_round)
        elif self.args.optimizer == 'adadelta':
            self.optimizer = Adadelta(
                    decay = self.args.decay_rate,
                    stochastic_round = self.args.stochastic_round)
        else:
            assert false, "Unknown optimizer"

    def _prepare_network_input(self, states):
        """ Transforms and normalizes the states from one minibatch.

        Args:
            states (): a set of states with the size of minibatch
        """
        _logger.debug("Normalizing and transforming input")
        # change order of axes to match what Neon expects
        states = np.transpose(states, axes = (1, 2, 3, 0))
        # copy() shouldn't be necessary here, but Neon doesn't work otherwise
        self.input.set(states.copy())
        # normalize network input between 0 and 1
        self.be.divide(self.input, self.grayscales, self.input)

    def train(self, minibatch, epoch):
        """ Prepare, perform and document a complete train step for one minibatch.

        Args:
            minibatch (numpy.ndarray): Mini-batch of states, shape=(batch_size,sequence_length,frame_width,frame_height)
            epoch (int): Current train epoch
        """
        _logger.debug("Complete trainig step for one minibatch")
        prestates, actions, rewards, poststates, terminals = minibatch
        assert len(prestates.shape) == 4
        assert len(poststates.shape) == 4
        assert len(actions.shape) == 1
        assert len(rewards.shape) == 1
        assert len(terminals.shape) == 1
        assert prestates.shape == poststates.shape
        assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
        # feed-forward pass for poststates to get Q-values
        self._prepare_network_input(poststates)
        postq = self.target_model.fprop(self.input, inference = True)
        assert postq.shape == (self.output_shape, self.batch_size)
        # calculate max Q-value for each poststate
        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)
        # average maxpostq for stats
        maxpostq_avg = maxpostq.mean()
        # feed-forward pass for prestates
        self._prepare_network_input(prestates)
        preq = self.model.fprop(self.input, inference = False)
        assert preq.shape == (self.output_shape, self.batch_size)
        # make copy of prestate Q-values as targets
        targets = preq.asnumpyarray()
        # clip rewards between -1 and 1
        rewards = np.clip(rewards, self.min_reward, self.max_reward)
        # update Q-value targets for each state only at actions taken
        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
        # copy targets to GPU memory
        self.targets.set(targets)
        # calculate errors
        errors = self.cost_func.get_errors(preq, self.targets)
        assert errors.shape == (self.output_shape, self.batch_size)
        # average error where there is a error (should be 1 in every row)
        #TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.])
        # clip errors
        if self.clip_error:
            self.be.clip(errors, -self.clip_error, self.clip_error, out = errors)
        # calculate cost, just in case
        cost = self.cost_func.get_cost(preq, self.targets)
        assert cost.shape == (1,1)
        # perform back-propagation of gradients
        self.model.bprop(errors)
        # perform optimization
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)
        # increase number of weight updates (needed for target clone interval)
        self.update_iterations += 1
        if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0:
            self._copy_theta()
            if isinstance(cost, np.ndarray):
                _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost[0][0]), str(maxpostq_avg)))
            else:
                _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg)))
        # update statistics
        if self.callback:
            if isinstance(cost, np.ndarray):
                self.callback.from_learner(cost[0,0], maxpostq_avg)
            else:
                self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg)

    def get_Q(self, state):
        """ Calculates the Q-values for one mini-batch.

        Args:
            state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height).

        Returns:
            q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,)
        """
        _logger.debug("State shape = %s" % str(state.shape))
        # minibatch is full size, because Neon doesn't let change the minibatch size
        # so we need to run 32 forward steps to get the one we actually want
        self.dummy_batch[0] = state
        states = self.dummy_batch
        assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims)
        # calculate Q-values for the states
        self._prepare_network_input(states)
        qvalues = self.model.fprop(self.input, inference = True)
        assert qvalues.shape == (self.output_shape, self.batch_size)
        _logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0])))
        return qvalues.asnumpyarray()[:,0]

    def _copy_theta(self):
        """ Copies the weights of the current network to the target network. """
        _logger.debug("Copying weights")
        pdict = self.model.get_description(get_weights=True, keep_states=True)
        self.target_model.deserialize(pdict, load_states=True)

    def save_weights(self, target_dir, epoch):
        """ Saves the current network parameters to disk.

        Args:
            target_dir (str): Directory where the network parameters are stored for each episode.
            epoch (int): Current epoch.
        """
        filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.learner_type.lower()), str(self.args.optimizer.lower()), (epoch + 1))
        self.model.save_params(os.path.join(target_dir, filename))

    def load_weights(self, source_file):
        """ Loads the network parameters from a given file.

        Args:
            source_file (str): Complete path to a file with network parameters.
        """
        self.model.load_params(source_file)
Exemplo n.º 14
0
    Affine(2, init=init_uni, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyBinary())

lunaModel = Model(layers=layers)

modelFileName = 'LUNA16_CADIMI_subset{}.prm'.format(SUBSET)

# If model file exists, then load the it and start from there.
# if (os.path.isfile(modelFileName)):
#   lunaModel = Model(modelFileName)

# Nesterov accelerated gradient descent with a learning rate of 0.01, a decay of 10^-3 and a momentum of 0.9
#opt = GradientDescentMomentum(0.01, 0.9, wdecay=0.001, nesterov=True)
opt = Adadelta(decay=0.95, epsilon=1e-6)

# configure callbacks
if args.callback_args['eval_freq'] is None:
    args.callback_args['eval_freq'] = 1

# configure callbacks
callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args)
# add a callback that saves the best model state
callbacks.add_save_best_state_callback(modelFileName)

lunaModel.fit(train_set,
              optimizer=opt,
              num_epochs=num_epochs,
              cost=cost,
              callbacks=callbacks)
Exemplo n.º 15
0
class DeepQNetwork:
    def __init__(self, num_actions, args):
        # remember parameters
        self.num_actions = num_actions
        self.batch_size = args.batch_size
        self.discount_rate = args.discount_rate
        self.history_length = args.history_length
        self.screen_dim = (args.screen_height, args.screen_width)
        self.clip_error = args.clip_error
        self.min_reward = args.min_reward
        self.max_reward = args.max_reward
        self.batch_norm = args.batch_norm

        # create Neon backend
        self.be = gen_backend(backend=args.backend,
                              batch_size=args.batch_size,
                              rng_seed=args.random_seed,
                              device_id=args.device_id,
                              datatype=np.dtype(args.datatype).type,
                              stochastic_round=args.stochastic_round)

        # prepare tensors once and reuse them
        self.input_shape = (self.history_length, ) + self.screen_dim + (
            self.batch_size, )
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        # create model
        layers = self._createLayers(num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost)
        if args.optimizer == 'rmsprop':
            self.optimizer = RMSProp(learning_rate=args.learning_rate,
                                     decay_rate=args.decay_rate,
                                     stochastic_round=args.stochastic_round)
        elif args.optimizer == 'adam':
            self.optimizer = Adam(learning_rate=args.learning_rate,
                                  stochastic_round=args.stochastic_round)
        elif args.optimizer == 'adadelta':
            self.optimizer = Adadelta(decay=args.decay_rate,
                                      stochastic_round=args.stochastic_round)
        else:
            assert false, "Unknown optimizer"

        # create target model
        self.target_steps = args.target_steps
        self.train_iterations = 0
        if self.target_steps:
            self.target_model = Model(layers=self._createLayers(num_actions))
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
            self.save_weights_prefix = args.save_weights_prefix
        else:
            self.target_model = self.model

        self.callback = None

    def _createLayers(self, num_actions):
        # create network
        init_norm = Gaussian(loc=0.0, scale=0.01)
        layers = []
        # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
        layers.append(
            Conv((8, 8, 32),
                 strides=4,
                 init=init_norm,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
        layers.append(
            Conv((4, 4, 64),
                 strides=2,
                 init=init_norm,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
        layers.append(
            Conv((3, 3, 64),
                 strides=1,
                 init=init_norm,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        # The final hidden layer is fully-connected and consists of 512 rectifier units.
        layers.append(
            Affine(nout=512,
                   init=init_norm,
                   activation=Rectlin(),
                   batch_norm=self.batch_norm))
        # The output layer is a fully-connected linear layer with a single output for each valid action.
        layers.append(Affine(nout=num_actions, init=init_norm))
        return layers

    def _setInput(self, states):
        # change order of axes to match what Neon expects
        states = np.transpose(states, axes=(1, 2, 3, 0))
        # copy() shouldn't be necessary here, but Neon doesn't work otherwise
        self.input.set(states.copy())
        # normalize network input between 0 and 1
        self.be.divide(self.input, 255, self.input)

    def train(self, minibatch, epoch):
        # expand components of minibatch
        prestates, actions, rewards, poststates, terminals = minibatch
        assert len(prestates.shape) == 4
        assert len(poststates.shape) == 4
        assert len(actions.shape) == 1
        assert len(rewards.shape) == 1
        assert len(terminals.shape) == 1
        assert prestates.shape == poststates.shape
        assert prestates.shape[0] == actions.shape[0] == rewards.shape[
            0] == poststates.shape[0] == terminals.shape[0]

        if self.target_steps and self.train_iterations % self.target_steps == 0:
            # have to serialize also states for batch normalization to work
            pdict = self.model.get_description(get_weights=True,
                                               keep_states=True)
            self.target_model.deserialize(pdict, load_states=True)

        # feed-forward pass for poststates to get Q-values
        self._setInput(poststates)
        postq = self.target_model.fprop(self.input, inference=True)
        assert postq.shape == (self.num_actions, self.batch_size)

        # calculate max Q-value for each poststate
        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)

        # feed-forward pass for prestates
        self._setInput(prestates)
        preq = self.model.fprop(self.input, inference=False)
        assert preq.shape == (self.num_actions, self.batch_size)

        # make copy of prestate Q-values as targets
        targets = preq.asnumpyarray()

        # clip rewards between -1 and 1
        rewards = np.clip(rewards, self.min_reward, self.max_reward)

        # update Q-value targets for actions taken
        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(
                    rewards[i]) + self.discount_rate * maxpostq[0, i]

        # copy targets to GPU memory
        self.targets.set(targets)

        # calculate errors
        deltas = self.cost.get_errors(preq, self.targets)
        assert deltas.shape == (self.num_actions, self.batch_size)
        #assert np.count_nonzero(deltas.asnumpyarray()) == 32

        # calculate cost, just in case
        cost = self.cost.get_cost(preq, self.targets)
        assert cost.shape == (1, 1)

        # clip errors
        if self.clip_error:
            self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas)

        # perform back-propagation of gradients
        self.model.bprop(deltas)

        # perform optimization
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)

        # increase number of weight updates (needed for target clone interval)
        self.train_iterations += 1

        # calculate statistics
        if self.callback:
            self.callback.on_train(cost.asnumpyarray()[0, 0])

    def predict(self, states):
        # minibatch is full size, because Neon doesn't let change the minibatch size
        assert states.shape == ((
            self.batch_size,
            self.history_length,
        ) + self.screen_dim)

        # calculate Q-values for the states
        self._setInput(states)
        qvalues = self.model.fprop(self.input, inference=True)
        assert qvalues.shape == (self.num_actions, self.batch_size)
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:, 0]))

        # transpose the result, so that batch size is first dimension
        return qvalues.T.asnumpyarray()

    def load_weights(self, load_path):
        self.model.load_params(load_path)

    def save_weights(self, save_path):
        self.model.save_params(save_path)
Exemplo n.º 16
0
class DeepQNetwork:
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error
    self.min_reward = args.min_reward
    self.max_reward = args.max_reward
    self.batch_norm = args.batch_norm

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.input = self.be.empty(self.input_shape)
    self.input.lshape = self.input_shape # HACK: needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    # Bug fix
    for l in self.model.layers.layers:
      l.parallelism = 'Disabled'
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate,
          decay_rate = args.decay_rate,
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate,
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate,
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      # Bug fix
      for l in self.target_model.layers.layers:
        l.parallelism = 'Disabled'
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

    self.callback = None

  def _createLayers(self, num_actions):
    # create network
    init_norm = Gaussian(loc=0.0, scale=0.01)
    layers = []
    # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
    layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
    layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
    layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # The final hidden layer is fully-connected and consists of 512 rectifier units.
    layers.append(Affine(nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # The output layer is a fully-connected linear layer with a single output for each valid action.
    layers.append(Affine(nout=num_actions, init = init_norm))
    return layers

  def _setInput(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states, axes = (1, 2, 3, 0))
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    self.be.divide(self.input, 255, self.input)

  def train(self, minibatch, epoch):
    # expand components of minibatch
    prestates, actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 4
    assert len(poststates.shape) == 4
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]

    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # have to serialize also states for batch normalization to work
      pdict = self.model.get_description(get_weights=True, keep_states=True)
      self.target_model.deserialize(pdict, load_states=True)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    maxpostq = self.be.max(postq, axis=0).asnumpyarray()
    assert maxpostq.shape == (1, self.batch_size)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    # It seems neccessary for cpu backend.
    targets = preq.asnumpyarray().copy()

    # clip rewards between -1 and 1
    rewards = np.clip(rewards, self.min_reward, self.max_reward)

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      if terminals[i]:
        targets[action, i] = float(rewards[i])
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

    # calculate statistics
    if self.callback:
      self.callback.on_train(cost[0,0])

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_params(load_path)

  def save_weights(self, save_path):
    self.model.save_params(save_path)
Exemplo n.º 17
0
subm_config = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           'whale_subm.cfg')
config_files = [subm_config] if os.path.exists(subm_config) else []
parser = NeonArgparser(__doc__, default_config_files=config_files)
parser.add_argument('--submission_file',
                    help='where to write prediction output')
args = parser.parse_args()

model, cost_obj = create_network()

assert 'all' in args.manifest, "Missing train manifest"
assert 'test' in args.manifest, "Missing test manifest"
assert args.submission_file is not None, "Must supply a submission file to output scores to"

neon_logger.display('Performing train and test in submission mode')
train = make_train_loader(args.manifest['all'],
                          args.manifest_root,
                          model.be,
                          noise_file=args.manifest.get('noise'))
test = make_test_loader(args.manifest['test'], args.manifest_root, model.be)

model.fit(dataset=train,
          cost=cost_obj,
          optimizer=Adadelta(),
          num_epochs=args.epochs,
          callbacks=Callbacks(model, **args.callback_args))

preds = model.get_outputs(test)
np.savetxt(args.submission_file, preds[:, 1], fmt='%.5f')
Exemplo n.º 18
0
parser = NeonArgparser(__doc__)
parser.add_argument('-tw',
                    '--test_data_dir',
                    default='',
                    help='directory in which to find test images')
parser.add_argument('-iw', '--image_width', default=384, help='image width')
args = parser.parse_args()
imwidth = int(args.image_width)

train = ClassifierLoader(repo_dir=args.data_dir,
                         inner_size=imwidth,
                         set_name='train',
                         do_transforms=False)
train.init_batch_provider()
init = Gaussian(scale=0.01)
opt = Adadelta(decay=0.9)
common = dict(init=init, batch_norm=True, activation=Rectlin())

layers = []
nchan = 64
layers.append(Conv((2, 2, nchan), strides=2, **common))
for idx in range(5):
    if nchan > 1024:
        nchan = 1024
    layers.append(Conv((3, 3, nchan), strides=1, **common))
    layers.append(Pooling(2, strides=2))
    nchan *= 2
#layers.append(Affine(nout=4096, init=init, activation=Rectlin(), batch_norm=True))
layers.append(DropoutBinary(keep=0.2))
layers.append(Affine(nout=447, init=init, activation=Softmax()))
Exemplo n.º 19
0
class DeepQNetwork:
  def __init__(self, state_size, num_actions, args):
    # remember parameters
    self.state_size = state_size
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error
    self.action_count = np.zeros(21)

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert False, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

  def _createLayers(self, num_actions):
    # create network
    init_norm = Gaussian(loc=0.0, scale=0.01)
    layers = []
    # The final hidden layer is fully-connected and consists of 512 rectifier units.
    layers.append(Affine(nout=64, init=init_norm, bias=init_norm, activation=Rectlin()))
    # The output layer is a fully-connected linear layer with a single output for each valid action.
    layers.append(Affine(nout=num_actions, init=init_norm, bias=init_norm))
    return layers

  def _setInput(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states)
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    # self.be.divide(self.input, 255, self.input)

  def train(self, minibatch, epoch):
    # expand components of minibatch
    prestates, actions, speed_actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 2
    assert len(poststates.shape) == 2
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
    #print "WE ARE ACTUALLY TRAINING IN HERE"
    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # HACK: serialize network to disk and read it back to clone
      filename = self.save_weights_prefix + "_target.pkl"
      save_obj(self.model.serialize(keep_states = False), filename)
      self.target_model.load_weights(filename)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    postq = postq.asnumpyarray()
    maxpostq = np.max(postq, axis=0)
    #print maxpostq.shape
    assert maxpostq.shape == (self.batch_size,)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    targets = preq.asnumpyarray().copy()

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      self.action_count[action] += 1
      if terminals[i]:
        targets[action, i] = float(rewards[i])
        if rewards[i] == -1000:
            print "######################### action ", action, "should never be sampled again"
        print "sampled_terminal"
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[i]
        #targets[i,action] = float(rewards[i]) + self.discount_rate * maxpostq[i]
    #print "action count", self.action_count
    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32
    print "nonzero deltas", np.count_nonzero(deltas.asnumpyarray())

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)
    print "cost:", cost.asnumpyarray()

    # clip errors
    #if self.clip_error:
    #  self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == (self.batch_size, self.state_size)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_weights(load_path)

  def save_weights(self, save_path):
    save_obj(self.model.serialize(keep_states = True), save_path)
Exemplo n.º 20
0
class DeepQNetwork:
  def __init__(self, state_size, num_steers, num_speeds, args):
    # remember parameters
    self.state_size = state_size
    self.num_steers = num_steers
    self.num_speeds = num_speeds
    self.num_actions = num_steers + num_speeds
    self.num_layers = args.hidden_layers
    self.hidden_nodes = args.hidden_nodes
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    self.model = Model(layers = self._createLayers())
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers())
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

  def _createLayers(self):
    # create network
    init_norm = Gaussian(loc=0.0, scale=0.01)
    layers = []
    for i in xrange(self.num_layers):
        layers.append(Affine(nout=self.hidden_nodes, init=init_norm, activation=Rectlin()))
    layers.append(Affine(nout=self.num_actions, init = init_norm))
    return layers

  def _setInput(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states)
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    #self.be.divide(self.input, 200, self.input)

  def train(self, minibatch, epoch = 0):
    # expand components of minibatch
    prestates, steers, speeds, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 2
    assert len(poststates.shape) == 2
    assert len(steers.shape) == 1
    assert len(speeds.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == steers.shape[0] == speeds.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]

    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # HACK: serialize network to disk and read it back to clone
      filename = self.save_weights_prefix + "_target.pkl"
      save_obj(self.model.serialize(keep_states = False), filename)
      self.target_model.load_weights(filename)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    postq = postq.asnumpyarray()
    maxsteerq = np.max(postq[:self.num_steers,:], axis=0)
    assert maxsteerq.shape == (self.batch_size,), "size: %s" % str(maxsteerq.shape)
    maxspeedq = np.max(postq[-self.num_speeds:,:], axis=0)
    assert maxspeedq.shape == (self.batch_size,)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    # HACK: copy() was needed to make it work on CPU
    targets = preq.asnumpyarray().copy()

    # update Q-value targets for actions taken
    for i, (steer, speed) in enumerate(zip(steers, speeds)):
      if terminals[i]:
        targets[steer, i] = float(rewards[i])
        targets[self.num_steers + speed, i] = float(rewards[i])
      else:
        targets[steer, i] = float(rewards[i]) + self.discount_rate * maxsteerq[i]
        targets[self.num_steers + speed, i] = float(rewards[i]) + self.discount_rate * maxspeedq[i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 2 * self.batch_size, str(np.count_nonzero(deltas.asnumpyarray()))

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)
    #print "cost:", cost.asnumpyarray()

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    '''
    if np.any(rewards < 0):
        preqq = preq.asnumpyarray().copy()
        self._setInput(prestates)
        qvalues = self.model.fprop(self.input, inference = True).asnumpyarray().copy()
        indexes = rewards < 0
        print "indexes:", indexes
        print "preq:", preqq[:, indexes].T
        print "preq':", qvalues[:, indexes].T
        print "diff:", (qvalues[:, indexes]-preqq[:, indexes]).T
        print "steers:", steers[indexes]
        print "speeds:", speeds[indexes]
        print "rewards:", rewards[indexes]
        print "terminals:", terminals[indexes]
        print "preq[0]:", preqq[:, 0]
        print "preq[0]':", qvalues[:, 0]
        print "diff:", qvalues[:, 0] - preqq[:, 0]
        print "deltas:", deltas.asnumpyarray()[:, indexes].T
        raw_input("Press Enter to continue...")
    '''

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == (self.batch_size, self.state_size)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_weights(load_path)

  def save_weights(self, save_path):
    save_obj(self.model.serialize(keep_states = True), save_path)