Exemplo n.º 1
0
def test_multi_optimizer(backend_default):
    opt_gdm = GradientDescentMomentum(learning_rate=0.001,
                                      momentum_coef=0.9,
                                      wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64),
              strides=4,
              padding=3,
              init=init_one,
              bias=Constant(0),
              activation=Rectlin())
    l2 = Affine(nout=4096,
                init=init_one,
                bias=Constant(1),
                activation=Rectlin())
    l3 = LSTM(output_size=1000,
              init=init_one,
              activation=Logistic(),
              gate_activation=Tanh())
    l4 = GRU(output_size=100,
             init=init_one,
             activation=Logistic(),
             gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)

    opt = MultiOptimizer({
        'default': opt_gdm,
        'Bias': opt_ada,
        'Convolution': opt_adam,
        'Linear': opt_rms,
        'LSTM': opt_rms_1,
        'GRU': opt_rms_1
    })

    map_list = opt._map_optimizers(layer_list)
    assert map_list[opt_adam][0].__class__.__name__ == 'Convolution'
    assert map_list[opt_ada][0].__class__.__name__ == 'Bias'
    assert map_list[opt_rms][0].__class__.__name__ == 'Linear'
    assert map_list[opt_gdm][0].__class__.__name__ == 'Activation'
    assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Exemplo n.º 2
0
    def __init__(self,
                 num_actions,
                 batch_size=32,
                 discount_rate=0.99,
                 history_length=4,
                 cols=64,
                 rows=64,
                 clip_error=1,
                 min_reward=-1,
                 max_reward=1,
                 batch_norm=False):
        self.num_actions = num_actions
        self.batch_size = batch_size
        self.discount_rate = discount_rate
        self.history_length = history_length
        self.board_dim = (cols, rows)
        self.clip_error = clip_error
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.batch_norm = batch_norm

        self.be = gen_backend(backend='gpu',
                              batch_size=self.batch_size,
                              datatype=np.dtype('float32').type)

        self.input_shape = (self.history_length, ) + self.board_dim + (
            self.batch_size, )
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # hack from simple_dqn "needed for convolutional networks"
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        layers = self._createLayers(self.num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # for l in self.model.layers.layers:
        # 	l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], cost=self.cost)
        self.optimizer = RMSProp(learning_rate=0.002,
                                 decay_rate=0.95,
                                 stochastic_round=True)

        self.train_iterations = 0
        self.target_model = Model(layers=self._createLayers(num_actions))
        # for l in self.target_model.layers.layers:
        # 	l.parallelism = 'Disabled'
        self.target_model.initialize(self.input_shape[:-1])

        self.callback = None
Exemplo n.º 3
0
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error
    self.min_reward = args.min_reward
    self.max_reward = args.max_reward
    self.batch_norm = args.batch_norm

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.input = self.be.empty(self.input_shape)
    self.input.lshape = self.input_shape # HACK: needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    # Bug fix
    for l in self.model.layers.layers:
      l.parallelism = 'Disabled'
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.train_iterations = 0
    if args.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      # Bug fix
      for l in self.target_model.layers.layers:
        l.parallelism = 'Disabled'
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

    self.callback = None
Exemplo n.º 4
0
def test_multi_optimizer(backend_default_mkl):
    """
    A test for MultiOptimizer.
    """
    opt_gdm = GradientDescentMomentum(
        learning_rate=0.001, momentum_coef=0.9, wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64), strides=4, padding=3,
              init=init_one, bias=Constant(0), activation=Rectlin())
    l2 = Affine(nout=4096, init=init_one,
                bias=Constant(1), activation=Rectlin())
    l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh())
    l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)
    for l in layer_list:
        l.configure(in_obj=(16, 28, 28))
        l.allocate()
    # separate layer_list into two, the last two recurrent layers and the rest
    layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:]
    opt = MultiOptimizer({'default': opt_gdm,
                          'Bias': opt_ada,
                          'Convolution': opt_adam,
                          'Convolution_bias': opt_adam,
                          'Linear': opt_rms,
                          'LSTM': opt_rms_1,
                          'GRU': opt_rms_1})
    layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)]
    layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)]
    opt.optimize(layers_to_optimize1, 0)
    assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias'
    assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear'
    opt.optimize(layers_to_optimize2, 0)
    assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Exemplo n.º 5
0
    def __init__(self, args, max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem

        self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2],
                            batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension,
                                                dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)

        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':  # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                  beta_2=args.rms_decay,
                                  learning_rate=args.learning_rate)
        else:  # Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                     learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True
Exemplo n.º 6
0
    def __init__(self, num_actions, args):
        # remember parameters
        self.num_actions = num_actions
        self.batch_size = args.batch_size
        self.discount_rate = args.discount_rate
        self.history_length = args.history_length
        self.screen_dim = (args.screen_height, args.screen_width)
        self.clip_error = args.clip_error

        # create Neon backend
        self.be = gen_backend(backend=args.backend,
                              batch_size=args.batch_size,
                              rng_seed=args.random_seed,
                              device_id=args.device_id,
                              default_dtype=np.dtype(args.datatype).type,
                              stochastic_round=args.stochastic_round)

        # prepare tensors once and reuse them
        self.input_shape = (self.history_length, ) + self.screen_dim + (
            self.batch_size, )
        self.tensor = self.be.empty(self.input_shape)
        self.tensor.lshape = self.input_shape  # needed for convolutional networks
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        # create model
        layers = self.createLayers(num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        self.model.initialize(self.tensor.shape[:-1], self.cost)
        self.optimizer = RMSProp(learning_rate=args.learning_rate,
                                 decay_rate=args.rmsprop_decay_rate,
                                 stochastic_round=args.stochastic_round)

        # create target model
        self.target_steps = args.target_steps
        self.train_iterations = 0
        if self.target_steps:
            self.target_model = Model(layers=self.createLayers(num_actions))
            self.target_model.initialize(self.tensor.shape[:-1])
            self.save_weights_path = args.save_weights_path
        else:
            self.target_model = self.model

        self.callback = None
Exemplo n.º 7
0
    def __init__(self, args,  max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem
        
        self.be = gen_backend(backend='gpu',             
                         batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)
        
        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':        # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                            beta_2=args.rms_decay,
                                            learning_rate=args.learning_rate)
        else:		# Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                            learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True
Exemplo n.º 8
0
def test_rmsprop(backend):
    rms = RMSProp()
    param = np.random.rand(200, 128)
    param2 = copy.deepcopy(param)
    grad = 0.01 * np.random.rand(200, 128)
    states = [0.01 * np.random.rand(200, 128)]
    state = states[0]
    decay = rms.decay_rate
    denom = np.sqrt(decay * state + np.square(grad) * (1.0 - decay) + rms.epsilon) + rms.epsilon
    param2[:] -= grad * rms.learning_rate / denom
    param_list = [((wrap(param), wrap(grad)), [wrap(states[0])])]
    compare_tensors(rms, param_list, param2, tol=1e-7)
Exemplo n.º 9
0
  def __init__(self, state_size, num_steers, num_speeds, args):
    # remember parameters
    self.state_size = state_size
    self.num_steers = num_steers
    self.num_speeds = num_speeds
    self.num_actions = num_steers + num_speeds
    self.num_layers = args.hidden_layers
    self.hidden_nodes = args.hidden_nodes
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    self.model = Model(layers = self._createLayers())
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers())
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model
Exemplo n.º 10
0
def test_rmsprop_wclip(backend_default):
    wclip = 0.5
    rms = RMSProp(param_clip_value=wclip)
    param = np.random.rand(200, 128)
    param2 = copy.deepcopy(param)
    grad = 0.01 * np.random.rand(200, 128)
    grad2 = grad / 128.
    states = [0.01 * np.random.rand(200, 128)]
    state = states[0]
    decay = rms.decay_rate
    denom = np.sqrt(decay * state + np.square(grad2) * (1.0 - decay) + rms.epsilon) + rms.epsilon
    param2[:] -= grad2 * float(rms.learning_rate) / denom
    np.clip(param2, -wclip, wclip, param2)
    param_list = [((wrap(param), wrap(grad)), [wrap(states[0])])]
    compare_tensors(rms, param_list, param2, tol=1e-7)
Exemplo n.º 11
0
 def _set_optimizer(self):
     """ Initializes the selected optimization algorithm. """
     _logger.debug("Optimizer = %s" % str(self.args.optimizer))
     if self.args.optimizer == 'rmsprop':
         self.optimizer = RMSProp(
                 learning_rate = self.args.learning_rate,
                 decay_rate = self.args.decay_rate,
                 stochastic_round = self.args.stochastic_round)
     elif self.args.optimizer == 'adam':
         self.optimizer = Adam(
                 learning_rate = self.args.learning_rate,
                 stochastic_round = self.args.stochastic_round)
     elif self.args.optimizer == 'adadelta':
         self.optimizer = Adadelta(
                 decay = self.args.decay_rate,
                 stochastic_round = self.args.stochastic_round)
     else:
         assert false, "Unknown optimizer"
Exemplo n.º 12
0
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 default_dtype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.tensor = self.be.empty(self.input_shape)
    self.tensor.lshape = self.input_shape # needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self.createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.tensor.shape[:-1], self.cost)
    self.optimizer = RMSProp(learning_rate = args.learning_rate, 
        decay_rate = args.rmsprop_decay_rate, 
        stochastic_round = args.stochastic_round)

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self.createLayers(num_actions))
      self.target_model.initialize(self.tensor.shape[:-1])
      self.save_weights_path = args.save_weights_path
    else:
      self.target_model = self.model

    self.callback = None
Exemplo n.º 13
0
class DeepQNetwork:
    def __init__(self,
                 num_actions,
                 batch_size=32,
                 discount_rate=0.99,
                 history_length=4,
                 cols=64,
                 rows=64,
                 clip_error=1,
                 min_reward=-1,
                 max_reward=1,
                 batch_norm=False):
        self.num_actions = num_actions
        self.batch_size = batch_size
        self.discount_rate = discount_rate
        self.history_length = history_length
        self.board_dim = (cols, rows)
        self.clip_error = clip_error
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.batch_norm = batch_norm

        self.be = gen_backend(backend='gpu',
                              batch_size=self.batch_size,
                              datatype=np.dtype('float32').type)

        self.input_shape = (self.history_length, ) + self.board_dim + (
            self.batch_size, )
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # hack from simple_dqn "needed for convolutional networks"
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        layers = self._createLayers(self.num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # for l in self.model.layers.layers:
        # 	l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], cost=self.cost)
        self.optimizer = RMSProp(learning_rate=0.002,
                                 decay_rate=0.95,
                                 stochastic_round=True)

        self.train_iterations = 0
        self.target_model = Model(layers=self._createLayers(num_actions))
        # for l in self.target_model.layers.layers:
        # 	l.parallelism = 'Disabled'
        self.target_model.initialize(self.input_shape[:-1])

        self.callback = None

    def _createLayers(self, num_actions):
        init_xavier_conv = Xavier(local=True)
        init_xavier_affine = Xavier(local=False)
        layers = []
        layers.append(
            Conv((8, 8, 32),
                 strides=4,
                 init=init_xavier_conv,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        layers.append(
            Conv((4, 4, 64),
                 strides=2,
                 init=init_xavier_conv,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        layers.append(
            Conv((2, 2, 128),
                 strides=1,
                 init=init_xavier_conv,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        layers.append(
            Affine(nout=256,
                   init=init_xavier_affine,
                   activation=Rectlin(),
                   batch_norm=self.batch_norm))
        layers.append(Affine(nout=num_actions, init=init_xavier_affine))
        return layers

    def _setInput(self, states):
        states = np.transpose(states, axes=(1, 2, 3, 0))
        self.input.set(states.copy())
        self.be.add(self.input, 1, self.input)
        self.be.divide(self.input, 2, self.input)

    def update_target_network(self):
        pdict = self.model.get_description(get_weights=True, keep_states=True)
        self.target_model.deserialize(pdict, load_states=True)

    def train(self, minibatch, epoch):
        prestates, actions, rewards, poststates, terminals = minibatch

        self._setInput(poststates)
        postq = self.target_model.fprop(self.input, inference=True)
        assert postq.shape == (self.num_actions, self.batch_size)

        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)

        self._setInput(prestates)
        preq = self.model.fprop(self.input, inference=False)
        assert preq.shape == (self.num_actions, self.batch_size)

        targets = preq.asnumpyarray().copy()
        rewards = np.clip(rewards, -1, 1)

        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(
                    rewards[i]) + self.discount_rate * maxpostq[0, i]

        self.targets.set(targets)

        deltas = self.cost.get_errors(preq, self.targets)
        assert deltas.shape == (self.num_actions, self.batch_size)

        cost = self.cost.get_cost(preq, self.targets)
        assert cost.shape == (1, 1)

        if self.clip_error:
            self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas)

        self.model.bprop(deltas)
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)

        self.train_iterations += 1
        self.callback.on_train(cost[0, 0])

    def predict(self, states):
        assert states.shape == ((
            self.batch_size,
            self.history_length,
        ) + self.board_dim)

        self._setInput(states)
        qvalues = self.model.fprop(self.input, inference=True)
        assert qvalues.shape == (self.num_actions, self.batch_size)

        return qvalues.T.asnumpyarray()

    def load_weights(self, load_path):
        self.model.load_params(load_path)

    def save_weights(self, save_path):
        self.model.save_params(save_path)
Exemplo n.º 14
0
class DeepQNetwork:
    def __init__(self, num_actions, args):
        # remember parameters
        self.num_actions = num_actions
        self.batch_size = args.batch_size
        self.discount_rate = args.discount_rate
        self.history_length = args.history_length
        self.screen_dim = (args.screen_height, args.screen_width)
        self.clip_error = args.clip_error

        # create Neon backend
        self.be = gen_backend(backend=args.backend,
                              batch_size=args.batch_size,
                              rng_seed=args.random_seed,
                              device_id=args.device_id,
                              default_dtype=np.dtype(args.datatype).type,
                              stochastic_round=args.stochastic_round)

        # prepare tensors once and reuse them
        self.input_shape = (self.history_length, ) + self.screen_dim + (
            self.batch_size, )
        self.tensor = self.be.empty(self.input_shape)
        self.tensor.lshape = self.input_shape  # needed for convolutional networks
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        # create model
        layers = self.createLayers(num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        self.model.initialize(self.tensor.shape[:-1], self.cost)
        self.optimizer = RMSProp(learning_rate=args.learning_rate,
                                 decay_rate=args.rmsprop_decay_rate,
                                 stochastic_round=args.stochastic_round)

        # create target model
        self.target_steps = args.target_steps
        self.train_iterations = 0
        if self.target_steps:
            self.target_model = Model(layers=self.createLayers(num_actions))
            self.target_model.initialize(self.tensor.shape[:-1])
            self.save_weights_path = args.save_weights_path
        else:
            self.target_model = self.model

        self.callback = None

    def createLayers(self, num_actions):
        # create network
        init_norm = Gaussian(loc=0.0, scale=0.01)
        layers = []
        # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
        layers.append(
            Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin()))
        # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
        layers.append(
            Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin()))
        # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
        layers.append(
            Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin()))
        # The final hidden layer is fully-connected and consists of 512 rectifier units.
        layers.append(Affine(nout=512, init=init_norm, activation=Rectlin()))
        # The output layer is a fully-connected linear layer with a single output for each valid action.
        layers.append(Affine(nout=num_actions, init=init_norm))
        return layers

    def setTensor(self, states):
        # change order of axes to match what Neon expects
        states = np.transpose(states, axes=(1, 2, 3, 0))
        # copy() shouldn't be necessary here, but Neon doesn't work otherwise
        self.tensor.set(states.copy())
        # normalize network input between 0 and 1
        self.be.divide(self.tensor, 255, self.tensor)

    def train(self, minibatch, epoch):
        # expand components of minibatch
        prestates, actions, rewards, poststates, terminals = minibatch
        assert len(prestates.shape) == 4
        assert len(poststates.shape) == 4
        assert len(actions.shape) == 1
        assert len(rewards.shape) == 1
        assert len(terminals.shape) == 1
        assert prestates.shape == poststates.shape
        assert prestates.shape[0] == actions.shape[0] == rewards.shape[
            0] == poststates.shape[0] == terminals.shape[0]

        if self.target_steps and self.train_iterations % self.target_steps == 0:
            # HACK: push something through network, so that weights exist
            self.model.fprop(self.tensor)
            # HACK: serialize network to disk and read it back to clone
            filename = os.path.join(self.save_weights_path,
                                    "target_network.pkl")
            save_obj(self.model.serialize(keep_states=False), filename)
            self.target_model.load_weights(filename)

        # feed-forward pass for poststates to get Q-values
        self.setTensor(poststates)
        postq = self.target_model.fprop(self.tensor, inference=True)
        assert postq.shape == (self.num_actions, self.batch_size)

        # calculate max Q-value for each poststate
        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)

        # feed-forward pass for prestates
        self.setTensor(prestates)
        preq = self.model.fprop(self.tensor, inference=False)
        assert preq.shape == (self.num_actions, self.batch_size)

        # make copy of prestate Q-values as targets
        targets = preq.asnumpyarray()

        # update Q-value targets for actions taken
        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(
                    rewards[i]) + self.discount_rate * maxpostq[0, i]

        # copy targets to GPU memory
        self.targets.set(targets)

        # calculate errors
        deltas = self.cost.get_errors(preq, self.targets)
        assert deltas.shape == (self.num_actions, self.batch_size)
        #assert np.count_nonzero(deltas.asnumpyarray()) == 32

        # calculate cost, just in case
        cost = self.cost.get_cost(preq, self.targets)
        assert cost.shape == (1, 1)

        # clip errors
        if self.clip_error:
            self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas)

        # perform back-propagation of gradients
        self.model.bprop(deltas)

        # perform optimization
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)

        # increase number of weight updates (needed for target clone interval)
        self.train_iterations += 1

        # calculate statistics
        if self.callback:
            self.callback.on_train(cost.asnumpyarray()[0, 0])

    def predict(self, states):
        # minibatch is full size, because Neon doesn't let change the minibatch size
        assert states.shape == ((
            self.batch_size,
            self.history_length,
        ) + self.screen_dim)

        # calculate Q-values for the states
        self.setTensor(states)
        qvalues = self.model.fprop(self.tensor, inference=True)
        assert qvalues.shape == (self.num_actions, self.batch_size)
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:, 0]))

        # find the action with highest q-value
        actions = self.be.argmax(qvalues, axis=0)
        assert actions.shape == (1, self.batch_size)

        # take only the first result
        return actions.asnumpyarray()[0, 0]

    def getMeanQ(self, states):
        assert states.shape == ((
            self.batch_size,
            self.history_length,
        ) + self.screen_dim)

        # calculate Q-values for the states
        self.setTensor(states)
        qvalues = self.model.fprop(self.tensor, inference=True)
        assert qvalues.shape == (self.num_actions, self.batch_size)

        # take maximum Q-value for each state
        actions = self.be.max(qvalues, axis=0)
        assert actions.astensor().shape == (1, self.batch_size)

        # calculate mean Q-value of all states
        meanq = self.be.mean(actions, axis=1)
        assert meanq.astensor().shape == (1, 1)

        # return the mean
        return meanq.asnumpyarray()[0, 0]

    def load_weights(self, load_path):
        self.model.load_weights(load_path)

    def save_weights(self, save_path):
        save_obj(self.model.serialize(keep_states=True), save_path)
Exemplo n.º 15
0
# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# initialize model
layers = []
layers.append(Affine(nout=100, init=init_norm, bias=Constant(0),
                     activation=Rectlin()))
layers.append(Affine(nout=10, init=init_norm, bias=Constant(0),
                     activation=Logistic(shortcut=True),
                     name='special_linear'))

cost = GeneralizedCost(costfunc=CrossEntropyBinary())
mlp = Model(layers=layers)

# fit and validate
optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)
optimizer_two = RMSProp()

# all bias layers and the last linear layer will use
# optimizer_two. all other layers will use optimizer_one.
opt = MultiOptimizer({'default': optimizer_one,
                      'Bias': optimizer_two,
                      'special_linear': optimizer_two})

# configure callbacks
callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args)

mlp.fit(train_set, optimizer=opt, num_epochs=args.epochs,
        cost=cost, callbacks=callbacks)
Exemplo n.º 16
0
            Affine(train_set.nfeatures, init, bias=init, activation=Identity())
        ]
    else:
        layers = [
            LSTM(hidden,
                 init,
                 activation=Logistic(),
                 gate_activation=Tanh(),
                 reset_cells=True),
            RecurrentLast(),
            Affine(train_set.nfeatures, init, bias=init, activation=Identity())
        ]

    model = Model(layers=layers)
    cost = GeneralizedCost(MeanSquared())
    optimizer = RMSProp(stochastic_round=args.rounding)

    callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

    # fit model
    model.fit(train_set,
              optimizer=optimizer,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)

    # =======visualize how the model does on validation set==============
    # run the trained model on train and valid dataset and see how the outputs match
    train_output = model.get_outputs(train_set).reshape(
        -1, train_set.nfeatures)
    valid_output = model.get_outputs(valid_set).reshape(
Exemplo n.º 17
0
    MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
    Dropout(keep=0.5),
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True),
    Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]

cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))

# configure callbacks
checkpoint_model_path = "~/image_caption2.pickle"
if args.callback_args['save_path'] is None:
    args.callback_args['save_path'] = checkpoint_model_path

if args.callback_args['serialize'] is None:
    args.callback_args['serialize'] = 1

model = Model(layers=layers)

callbacks = Callbacks(model, train_set, **args.callback_args)

opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, gradient_clip_value=1)

# train model
model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks)

# load model (if exited) and evaluate bleu score on test set
model.load_params(checkpoint_model_path)
test_set = ImageCaptionTest(path=data_path)
sents, targets = test_set.predict(model)
test_set.bleu_score(sents, targets)
Exemplo n.º 18
0
cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))
checkpoint_model_path = "~/image_caption2.pickle"
checkpoint_schedule = range(num_epochs)

model = Model(layers=layers)

callbacks = Callbacks(model,
                      train_set,
                      output_file=args.output_file,
                      progress_bar=args.progress_bar)
callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path)

opt = RMSProp(decay_rate=0.997,
              learning_rate=0.0005,
              epsilon=1e-8,
              clip_gradients=True,
              gradient_limit=1.0)

# train model
model.fit(train_set,
          optimizer=opt,
          num_epochs=num_epochs,
          cost=cost,
          callbacks=callbacks)

# load model (if exited) and evaluate bleu score on test set
model.load_weights(checkpoint_model_path)
test_set = ImageCaptionTest(path=data_path)
sents, targets = test_set.predict(model)
test_set.bleu_score(sents, targets)
Exemplo n.º 19
0
# setup model layers

layers = [
    Conv((5, 5, 16), init=init_norm, activation=Rectlin()),
    Pooling(2),
    Conv((5, 5, 32), init=init_norm, activation=Rectlin()),
    Pooling(2),
    Conv((3, 3, 32), init=init_norm, activation=Rectlin()),
    Pooling(2),
    Affine(nout=100, init=init_norm, activation=Rectlin()),
    Linear(nout=4, init=init_norm)
]

model = Model(layers=layers)

# cost = GeneralizedCost(costfunc=CrossEntropyBinary())
cost = GeneralizedCost(costfunc=SumSquared())
# fit and validate
optimizer = RMSProp()

# configure callbacks
callbacks = Callbacks(model, eval_set=eval_set, eval_freq=1)

model.fit(train_set,
          cost=cost,
          optimizer=optimizer,
          num_epochs=10,
          callbacks=callbacks)
y_test = model.get_outputs(test_set)
Exemplo n.º 20
0
class ModelRunnerNeon():
    def __init__(self, args, max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem

        self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2],
                            batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension,
                                                dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)

        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':  # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                  beta_2=args.rms_decay,
                                  learning_rate=args.learning_rate)
        else:  # Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                     learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True

    def get_initializer(self, input_size):
        dnnInit = self.args.dnn_initializer
        if dnnInit == 'xavier':
            initializer = Xavier()
        elif dnnInit == 'fan_in':
            std_dev = 1.0 / math.sqrt(input_size)
            initializer = Uniform(low=-std_dev, high=std_dev)
        else:
            initializer = Gaussian(0, 0.01)
        return initializer

    def create_layers(self, max_action_no):
        layers = []

        initializer = self.get_initializer(input_size=4 * 8 * 8)
        layers.append(
            Conv(fshape=(8, 8, 32),
                 strides=4,
                 init=initializer,
                 bias=initializer,
                 activation=Rectlin()))

        initializer = self.get_initializer(input_size=32 * 4 * 4)
        layers.append(
            Conv(fshape=(4, 4, 64),
                 strides=2,
                 init=initializer,
                 bias=initializer,
                 activation=Rectlin()))

        initializer = self.get_initializer(input_size=64 * 3 * 3)
        layers.append(
            Conv(fshape=(3, 3, 64),
                 strides=1,
                 init=initializer,
                 bias=initializer,
                 activation=Rectlin()))

        initializer = self.get_initializer(input_size=7 * 7 * 64)
        layers.append(
            Affine(nout=512,
                   init=initializer,
                   bias=initializer,
                   activation=Rectlin()))

        initializer = self.get_initializer(input_size=512)
        layers.append(
            Affine(nout=max_action_no, init=initializer, bias=initializer))

        return layers

    def clip_reward(self, reward):
        if reward > self.args.clip_reward_high:
            return self.args.clip_reward_high
        elif reward < self.args.clip_reward_low:
            return self.args.clip_reward_low
        else:
            return reward

    def set_input(self, data):
        if self.use_gpu_replay_mem:
            self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0))
            self.input[:] = self.input_uint8 / 255
        else:
            self.input.set(data.transpose(1, 2, 3, 0).copy())
            self.be.divide(self.input, 255, self.input)

    def predict(self, history_buffer):
        self.set_input(history_buffer)
        output = self.train_net.fprop(self.input, inference=True)
        return output.T.asnumpyarray()[0]

    def print_weights(self):
        pass

    def train(self, minibatch, replay_memory, learning_rate, debug):
        if self.args.prioritized_replay == True:
            prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch
        else:
            prestates, actions, rewards, poststates, terminals = minibatch

        # Get Q*(s, a) with targetNet
        self.set_input(poststates)
        post_qvalue = self.target_net.fprop(self.input,
                                            inference=True).T.asnumpyarray()

        if self.args.double_dqn == True:
            # Get Q*(s, a) with trainNet
            post_qvalue2 = self.train_net.fprop(
                self.input, inference=True).T.asnumpyarray()

        # Get Q(s, a) with trainNet
        self.set_input(prestates)
        pre_qvalue = self.train_net.fprop(self.input, inference=False)

        label = pre_qvalue.asnumpyarray().copy()
        for i in range(0, self.train_batch_size):
            if self.args.clip_reward:
                reward = self.clip_reward(rewards[i])
            else:
                reward = rewards[i]
            if terminals[i]:
                label[actions[i], i] = reward
            else:
                if self.args.double_dqn == True:
                    max_index = np.argmax(post_qvalue2[i])
                    label[actions[i],
                          i] = reward + self.discount_factor * post_qvalue[i][
                              max_index]
                else:
                    label[actions[i],
                          i] = reward + self.discount_factor * np.max(
                              post_qvalue[i])

        # copy targets to GPU memory
        self.targets.set(label)

        delta = self.cost.get_errors(pre_qvalue, self.targets)

        if self.args.prioritized_replay == True:
            delta_value = delta.asnumpyarray()
            for i in range(self.train_batch_size):
                if debug:
                    print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (
                        i, weights[i], delta_value[actions[i], i],
                        weights[i] * delta_value[actions[i], i])
                replay_memory.update_td(heap_indexes[i],
                                        abs(delta_value[actions[i], i]))
                delta_value[actions[i],
                            i] = weights[i] * delta_value[actions[i], i]
            delta.set(delta_value.copy())

        if self.args.clip_loss:
            self.be.clip(delta, -1.0, 1.0, out=delta)

        self.train_net.bprop(delta)
        self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0)

    def update_model(self):
        # have to serialize also states for batch normalization to work
        pdict = self.train_net.get_description(get_weights=True,
                                               keep_states=True)
        self.target_net.deserialize(pdict, load_states=True)
        #print ('Updated target model')

    def finish_train(self):
        self.running = False

    def load(self, file_name):
        self.train_net.load_params(file_name)
        self.update_model()

    def save(self, file_name):
        self.train_net.save_params(file_name)
Exemplo n.º 21
0
# model initialization
if rlayer_type == 'lstm':
    rlayer = LSTM(hidden_size, init, Logistic(), Tanh())
elif rlayer_type == 'gru':
    rlayer = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic())
else:
    raise NotImplementedError('%s layer not implemented' % rlayer_type)

layers = [
    rlayer,
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

optimizer = RMSProp(clip_gradients=clip_gradients, stochastic_round=args.rounding)

# configure callbacks
callbacks = Callbacks(model, train_set, output_file=args.output_file,
                      valid_set=valid_set, valid_freq=args.validation_freq,
                      progress_bar=args.progress_bar)

# train model
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=num_epochs,
          cost=cost,
          callbacks=callbacks)
Exemplo n.º 22
0
class DeepQNetwork:
  def __init__(self, num_actions, args):
    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 default_dtype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # create model
    layers = self.createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.optimizer = RMSProp(learning_rate = args.learning_rate, 
        decay_rate = args.rmsprop_decay_rate, 
        stochastic_round = args.stochastic_round)

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self.createLayers(num_actions))
      self.save_weights_path = args.save_weights_path
    else:
      self.target_model = self.model

    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.tensor = self.be.empty(self.input_shape)
    self.tensor.lshape = self.input_shape # needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    self.callback = None

  def createLayers(self, num_actions):
    # create network
    init_norm = Gaussian(loc=0.0, scale=0.01)
    layers = []
    # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
    layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin()))
    # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
    layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin()))
    # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
    layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin()))
    # The final hidden layer is fully-connected and consists of 512 rectifier units.
    layers.append(Affine(nout=512, init=init_norm, activation=Rectlin()))
    # The output layer is a fully-connected linear layer with a single output for each valid action.
    layers.append(Affine(nout = num_actions, init = init_norm))
    return layers

  def setTensor(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states, axes = (1, 2, 3, 0))
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.tensor.set(states.copy())
    # normalize network input between 0 and 1
    self.be.divide(self.tensor, 255, self.tensor)

  def train(self, minibatch, epoch):
    # expand components of minibatch
    prestates, actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 4
    assert len(poststates.shape) == 4
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]

    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # HACK: push something through network, so that weights exist
      self.model.fprop(self.tensor)
      # HACK: serialize network to disk and read it back to clone
      filename = os.path.join(self.save_weights_path, "target_network.pkl")
      save_obj(self.model.serialize(keep_states = False), filename)
      self.target_model.load_weights(filename)

    # feed-forward pass for poststates to get Q-values
    self.setTensor(poststates)
    postq = self.target_model.fprop(self.tensor, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    maxpostq = self.be.max(postq, axis=0).asnumpyarray()
    assert maxpostq.shape == (1, self.batch_size)

    # feed-forward pass for prestates
    self.setTensor(prestates)
    preq = self.model.fprop(self.tensor, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    targets = preq.asnumpyarray()

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      if terminals[i]:
        targets[action, i] = float(rewards[i])
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

    # calculate statistics
    if self.callback:
      self.callback.on_train(cost.asnumpyarray()[0,0])

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self.setTensor(states)
    qvalues = self.model.fprop(self.tensor, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # find the action with highest q-value
    actions = self.be.argmax(qvalues, axis = 0)
    assert actions.shape == (1, self.batch_size)

    # take only the first result
    return actions.asnumpyarray()[0,0]

  def getMeanQ(self, states):
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self.setTensor(states)
    qvalues = self.model.fprop(self.tensor, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    
    # take maximum Q-value for each state
    actions = self.be.max(qvalues, axis = 0)
    assert actions.astensor().shape == (1, self.batch_size)
    
    # calculate mean Q-value of all states
    meanq = self.be.mean(actions, axis = 1)
    assert meanq.astensor().shape == (1, 1)

    # return the mean
    return meanq.asnumpyarray()[0,0]

  def load_weights(self, load_path):
    self.model.load_weights(load_path)

  def save_weights(self, save_path):
    save_obj(self.model.serialize(keep_states = True), save_path)
Exemplo n.º 23
0
from neon.optimizers import RMSProp
from neon.transforms import Misclassification
from neon.callbacks.callbacks import Callbacks
from network import create_network
from data import make_train_loader, make_val_loader

eval_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'whale_eval.cfg')
config_files = [eval_config] if os.path.exists(eval_config) else []
parser = NeonArgparser(__doc__, default_config_files=config_files)
args = parser.parse_args()

model, cost_obj = create_network()

assert 'train' in args.manifest, "Missing train manifest"
assert 'val' in args.manifest, "Missing val manifest"

train = make_train_loader(args.manifest['train'], args.manifest_root, model.be,
                          noise_file=args.manifest.get('noise'))

neon_logger.display('Performing train and test in validation mode')
val = make_val_loader(args.manifest['val'], args.manifest_root, model.be)
metric = Misclassification()

model.fit(dataset=train,
          cost=cost_obj,
          optimizer=RMSProp(learning_rate=1e-4),
          num_epochs=args.epochs,
          callbacks=Callbacks(model, eval_set=val, metric=metric, **args.callback_args))

neon_logger.display('Misclassification error = %.1f%%' % (model.eval(val, metric=metric) * 100))
Exemplo n.º 24
0
train_set = Text(time_steps, train_path)
valid_set = Text(time_steps, valid_path, vocab=train_set.vocab)

# weight initialization
init = Uniform(low=-0.08, high=0.08)

# model initialization
layers = [
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh()),
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]
model = Model(layers=layers)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

optimizer = RMSProp(gradient_clip_value=gradient_clip_value,
                    stochastic_round=args.rounding)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# fit and validate
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)


def sample(prob):
    """
    Sample index from probability distribution
Exemplo n.º 25
0
                  init,
                  activation=Tanh(),
                  gate_activation=Logistic())

layers = [
    rlayer1, rlayer2,
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

learning_rate_sched = Schedule(list(range(10, args.epochs)), .97)
optimizer = RMSProp(gradient_clip_value=gradient_clip_value,
                    stochastic_round=args.rounding,
                    schedule=learning_rate_sched)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# train model
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)

# get predictions
ypred = model.get_outputs(valid_set)
shape = (valid_set.nbatches, args.batch_size, time_steps)
Exemplo n.º 26
0
                           gen_model=args.gmodel,
                           cost_type='wasserstein',
                           im_size=64,
                           n_chan=3,
                           n_noise=100,
                           n_gen_ftr=64,
                           n_dis_ftr=64,
                           depth=4,
                           n_extra_layers=4,
                           batch_norm=True,
                           dis_iters=5,
                           wgan_param_clamp=0.01,
                           wgan_train_sched=True)

# setup optimizer
optimizer = RMSProp(learning_rate=5e-5, decay_rate=0.99, epsilon=1e-8)

# setup data provider
train = make_loader(args.manifest['train'], args.manifest_root, model.be,
                    args.subset_pct, random_seed)

# configure callbacks
callbacks = Callbacks(model, **args.callback_args)
fdir = ensure_dirs_exist(
    os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/'))
fname = os.path.splitext(os.path.basename(__file__))[0] +\
    '_[' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ']'
im_args = dict(filename=os.path.join(fdir, fname),
               hw=64,
               num_samples=args.batch_size,
               nchan=3,
Exemplo n.º 27
0
class ModelRunnerNeon():
    def __init__(self, args,  max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem
        
        self.be = gen_backend(backend='gpu',             
                         batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)
        
        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':        # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                            beta_2=args.rms_decay,
                                            learning_rate=args.learning_rate)
        else:		# Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                            learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True

    def get_initializer(self, input_size):
        dnnInit = self.args.dnn_initializer
        if dnnInit == 'xavier':
            initializer = Xavier()
        elif dnnInit == 'fan_in':
            std_dev = 1.0 / math.sqrt(input_size)
            initializer = Uniform(low=-std_dev, high=std_dev)
        else:
            initializer = Gaussian(0, 0.01)
        return initializer
            
    def create_layers(self, max_action_no):
        layers = []

        initializer = self.get_initializer(input_size = 4 * 8 * 8)
        layers.append(Conv(fshape=(8, 8, 32), strides=4, init=initializer, bias=initializer, activation=Rectlin()))

        initializer = self.get_initializer(input_size = 32 * 4 * 4)
        layers.append(Conv(fshape=(4, 4, 64), strides=2, init=initializer, bias=initializer, activation=Rectlin()))
        
        initializer = self.get_initializer(input_size = 64 * 3 * 3)
        layers.append(Conv(fshape=(3, 3, 64), strides=1, init=initializer, bias=initializer, activation=Rectlin()))
        
        initializer = self.get_initializer(input_size = 7 * 7 * 64)
        layers.append(Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin()))
        
        initializer = self.get_initializer(input_size = 512)
        layers.append(Affine(nout=max_action_no, init=initializer, bias=initializer))
        
        return layers        
        
    def clip_reward(self, reward):
        if reward > self.args.clip_reward_high:
            return self.args.clip_reward_high
        elif reward < self.args.clip_reward_low:
            return self.args.clip_reward_low
        else:
            return reward

    def set_input(self, data):
        if self.use_gpu_replay_mem:
            self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0))
            self.input[:] = self.input_uint8 / 255
        else:
            self.input.set(data.transpose(1, 2, 3, 0).copy())
            self.be.divide(self.input, 255, self.input)

    def predict(self, history_buffer):
        self.set_input(history_buffer)
        output  = self.train_net.fprop(self.input, inference=True)
        return output.T.asnumpyarray()[0]            

    def print_weights(self):
        pass

    def train(self, minibatch, replay_memory, learning_rate, debug):
        if self.args.prioritized_replay == True:
            prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch
        else:
            prestates, actions, rewards, poststates, terminals = minibatch
        
        # Get Q*(s, a) with targetNet
        self.set_input(poststates)
        post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray()
        
        if self.args.double_dqn == True:
            # Get Q*(s, a) with trainNet
            post_qvalue2 = self.train_net.fprop(self.input, inference=True).T.asnumpyarray()
        
        # Get Q(s, a) with trainNet
        self.set_input(prestates)
        pre_qvalue = self.train_net.fprop(self.input, inference=False)
        
        label = pre_qvalue.asnumpyarray().copy()
        for i in range(0, self.train_batch_size):
            if self.args.clip_reward:
                reward = self.clip_reward(rewards[i])
            else:
                reward = rewards[i]
            if terminals[i]:
                label[actions[i], i] = reward
            else:
                if self.args.double_dqn == True:
                    max_index = np.argmax(post_qvalue2[i])
                    label[actions[i], i] = reward + self.discount_factor* post_qvalue[i][max_index]
                else:
                    label[actions[i], i] = reward + self.discount_factor* np.max(post_qvalue[i])

        # copy targets to GPU memory
        self.targets.set(label)
    
        delta = self.cost.get_errors(pre_qvalue, self.targets)
        
        if self.args.prioritized_replay == True:
            delta_value = delta.asnumpyarray()
            for i in range(self.train_batch_size):
                if debug:
                    print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i]) 
                replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i]))
                delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i]
            delta.set(delta_value.copy())
          
        if self.args.clip_loss:
            self.be.clip(delta, -1.0, 1.0, out = delta)
                
        self.train_net.bprop(delta)
        self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0)

    def update_model(self):
        # have to serialize also states for batch normalization to work
        pdict = self.train_net.get_description(get_weights=True, keep_states=True)
        self.target_net.deserialize(pdict, load_states=True)
        #print ('Updated target model')

    def finish_train(self):
        self.running = False
    
    def load(self, file_name):
        self.train_net.load_params(file_name)
        self.update_model()
        
    def save(self, file_name):
        self.train_net.save_params(file_name)