示例#1
0
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error
    self.min_reward = args.min_reward
    self.max_reward = args.max_reward
    self.batch_norm = args.batch_norm

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.input = self.be.empty(self.input_shape)
    self.input.lshape = self.input_shape # HACK: needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    # Bug fix
    for l in self.model.layers.layers:
      l.parallelism = 'Disabled'
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.train_iterations = 0
    if args.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      # Bug fix
      for l in self.target_model.layers.layers:
        l.parallelism = 'Disabled'
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

    self.callback = None
示例#2
0
    def __init__(self, env, args, rng, name = "DQNNeon"):
        """ Initializes a network based on the Neon framework.

        Args:
            env (AtariEnv): The envirnoment in which the agent actuates.
            args (argparse.Namespace): All settings either with a default value or set via command line arguments.
            rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator.
            name (str): The name of the network object.

        Note:
            This function should always call the base class first to initialize
            the common values for the networks.
        """
        _logger.info("Initializing new object of type " + str(type(self).__name__))
        super(DQNNeon, self).__init__(env, args, rng, name)
        self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,)
        self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8)
        self.batch_norm = args.batch_norm

        self.be = gen_backend(
                backend = args.backend,
                batch_size = args.batch_size,
                rng_seed = args.random_seed,
                device_id = args.device_id,
                datatype = np.dtype(args.datatype).type,
                stochastic_round = args.stochastic_round)

        # prepare tensors once and reuse them
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.output_shape, self.batch_size))

        # create model
        layers = self._create_layer()
        self.model = Model(layers = layers)
        self.cost_func = GeneralizedCost(costfunc = SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost_func)

        self._set_optimizer()

        if not self.args.load_weights == None:
            self.load_weights(self.args.load_weights)

        # create target model
        if self.target_update_frequency:
            layers = self._create_layer()
            self.target_model = Model(layers)
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
        else:
            self.target_model = self.model

        self.callback = None
        _logger.debug("%s" % self)
示例#3
0
def test_sum_squared_derivative(backend_default):
    outputs = np.array([0.5, 1.0, 0.0, 0.0001]).reshape((4, 1))
    targets = np.array(([0.5, 0.0, 1.0, 0.2])).reshape((4, 1))
    expected_result = (outputs - targets) / outputs.shape[1]
    compare_tensors(SumSquared(),
                    outputs,
                    targets,
                    expected_result,
                    deriv=True,
                    tol=1e-8)
示例#4
0
 def __init__(self, rounding, callback_args, epochs):
     # setup weight initialization function
     self.init = Gaussian(loc=0.0, scale=0.01)
     # setup optimizer
     self.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9,
                                              stochastic_round=rounding)
     # setup cost function as CrossEntropy
     self.cost = GeneralizedCost(costfunc=SumSquared())
     self.epochs = epochs
     self.model = None
     self.callback_args = callback_args
示例#5
0
  def __init__(self, state_size, num_steers, num_speeds, args):
    # remember parameters
    self.state_size = state_size
    self.num_steers = num_steers
    self.num_speeds = num_speeds
    self.num_actions = num_steers + num_speeds
    self.num_layers = args.hidden_layers
    self.hidden_nodes = args.hidden_nodes
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    self.model = Model(layers = self._createLayers())
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers())
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model
示例#6
0
    def __init__(self,
                 num_actions,
                 batch_size=32,
                 discount_rate=0.99,
                 history_length=4,
                 cols=64,
                 rows=64,
                 clip_error=1,
                 min_reward=-1,
                 max_reward=1,
                 batch_norm=False):
        self.num_actions = num_actions
        self.batch_size = batch_size
        self.discount_rate = discount_rate
        self.history_length = history_length
        self.board_dim = (cols, rows)
        self.clip_error = clip_error
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.batch_norm = batch_norm

        self.be = gen_backend(backend='gpu',
                              batch_size=self.batch_size,
                              datatype=np.dtype('float32').type)

        self.input_shape = (self.history_length, ) + self.board_dim + (
            self.batch_size, )
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # hack from simple_dqn "needed for convolutional networks"
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        layers = self._createLayers(self.num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # for l in self.model.layers.layers:
        # 	l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], cost=self.cost)
        self.optimizer = RMSProp(learning_rate=0.002,
                                 decay_rate=0.95,
                                 stochastic_round=True)

        self.train_iterations = 0
        self.target_model = Model(layers=self._createLayers(num_actions))
        # for l in self.target_model.layers.layers:
        # 	l.parallelism = 'Disabled'
        self.target_model.initialize(self.input_shape[:-1])

        self.callback = None
示例#7
0
文件: util.py 项目: yw774/neon
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab):
    """
    Return regressor to map word2vec to RNN word space

    Function modified from:
    https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py
    """
    # Gather all words from word2vec that appear in wordvecs
    d = defaultdict(lambda: 0)
    for w in w2v_vocab.keys():
        d[w] = 1
    shared = OrderedDict()
    count = 0

    for w in list(orig_wordvecs.keys())[:-2]:
        if d[w] > 0:
            shared[w] = count
            count += 1

    # Get the vectors for all words in 'shared'
    w2v = np.zeros((len(shared), 300), dtype='float32')
    sg = np.zeros((len(shared), 620), dtype='float32')
    for w in shared.keys():
        w2v[shared[w]] = w2v_W[w2v_vocab[w]]
        sg[shared[w]] = orig_wordvecs[w]

    train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False)

    layers = [
        Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)),
        Bias(init=Constant(0.0))
    ]
    clf = Model(layers=layers)

    # regression model is trained using default global batch size
    cost = GeneralizedCost(costfunc=SumSquared())
    opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0)
    callbacks = Callbacks(clf)

    clf.fit(train_set,
            num_epochs=20,
            optimizer=opt,
            cost=cost,
            callbacks=callbacks)
    return clf
示例#8
0
    def __init__(self, num_actions, args):
        # remember parameters
        self.num_actions = num_actions
        self.batch_size = args.batch_size
        self.discount_rate = args.discount_rate
        self.history_length = args.history_length
        self.screen_dim = (args.screen_height, args.screen_width)
        self.clip_error = args.clip_error

        # create Neon backend
        self.be = gen_backend(backend=args.backend,
                              batch_size=args.batch_size,
                              rng_seed=args.random_seed,
                              device_id=args.device_id,
                              default_dtype=np.dtype(args.datatype).type,
                              stochastic_round=args.stochastic_round)

        # prepare tensors once and reuse them
        self.input_shape = (self.history_length, ) + self.screen_dim + (
            self.batch_size, )
        self.tensor = self.be.empty(self.input_shape)
        self.tensor.lshape = self.input_shape  # needed for convolutional networks
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        # create model
        layers = self.createLayers(num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        self.model.initialize(self.tensor.shape[:-1], self.cost)
        self.optimizer = RMSProp(learning_rate=args.learning_rate,
                                 decay_rate=args.rmsprop_decay_rate,
                                 stochastic_round=args.stochastic_round)

        # create target model
        self.target_steps = args.target_steps
        self.train_iterations = 0
        if self.target_steps:
            self.target_model = Model(layers=self.createLayers(num_actions))
            self.target_model.initialize(self.tensor.shape[:-1])
            self.save_weights_path = args.save_weights_path
        else:
            self.target_model = self.model

        self.callback = None
示例#9
0
    def __init__(self, args, max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem

        self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2],
                            batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension,
                                                dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)

        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':  # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                  beta_2=args.rms_decay,
                                  learning_rate=args.learning_rate)
        else:  # Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                     learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True
示例#10
0
def test_sum_squared_limits(backend_default):
    outputs = np.array([0.5, 1.0, 0.0, 0.0001]).reshape((4, 1))
    targets = np.array(([0.5, 0.0, 1.0, 0.2])).reshape((4, 1))
    expected_result = np.sum((outputs - targets) ** 2, axis=0, keepdims=True) / 2.
    compare_tensors(SumSquared(), outputs, targets, expected_result, tol=1e-7)
示例#11
0
# setup model layers

layers = [
    Conv((5, 5, 16), init=init_norm, activation=Rectlin()),
    Pooling(2),
    Conv((5, 5, 32), init=init_norm, activation=Rectlin()),
    Pooling(2),
    Conv((3, 3, 32), init=init_norm, activation=Rectlin()),
    Pooling(2),
    Affine(nout=100, init=init_norm, activation=Rectlin()),
    Linear(nout=4, init=init_norm)
]

model = Model(layers=layers)

# cost = GeneralizedCost(costfunc=CrossEntropyBinary())
cost = GeneralizedCost(costfunc=SumSquared())
# fit and validate
optimizer = RMSProp()

# configure callbacks
callbacks = Callbacks(model, eval_set=eval_set, eval_freq=1)

model.fit(train_set,
          cost=cost,
          optimizer=optimizer,
          num_epochs=10,
          callbacks=callbacks)
y_test = model.get_outputs(test_set)
示例#12
0
def test_sum_squared(backend):
    outputs = np.array([0.5, 0.9, 0.1, 0.0001]).reshape((4, 1))
    targets = np.array([0.5, 0.99, 0.01, 0.2]).reshape((4, 1))
    expected_result = np.sum(
        (outputs - targets)**2, axis=0, keepdims=True) / 2.
    compare_tensors(SumSquared(), outputs, targets, expected_result, tol=1e-8)