Exemplo n.º 1
0
def default(task, config):
    defaults = dict(subsample=2,
                    frame_skip=4,
                    history=4,
                    delta=False,
                    frame_max=2,
                    noop_max=30)
    config = mp.utility.use_attrdicts(mp.utility.merge_dicts(defaults, config))
    policy = mp.Sequential(task)
    policy.add(mp.step.Image)
    if config.noop_max:
        policy.add(mp.step.RandomStart, config.noop_max)
    if config.frame_skip > 1:
        policy.add(mp.step.Skip, config.frame_skip)
    if config.frame_max:
        policy.add(mp.step.Maximum, config.frame_max)
    if config.history > 1:
        channels = policy.above_task.observs.shape[-1]
        policy.add(mp.step.Grayscale, (0.299, 0.587, 0.114)[:channels])
    if config.subsample > 1:
        sub = config.subsample
        amount = (sub, sub) if config.history > 1 else (sub, sub, 1)
        policy.add(mp.step.Subsample, amount)
    if config.delta:
        policy.add(mp.step.Delta)
    if config.history > 1:
        policy.add(mp.step.History, config.history)
    policy.add(mp.step.Normalize)
    policy.add(mp.step.ClampReward)
    return policy
Exemplo n.º 2
0
 def train_policies(self):
     heads = []
     for _ in range(self.config.heads):
         policy = mp.Sequential(self.task)
         policy.add(self._create_preprocess())
         policy.add(Head, self)
         heads.append(policy)
     return heads
Exemplo n.º 3
0
def dqn_2015(task, config=None):
    policy = mp.Sequential(task)
    policy.add(mp.step.Image)
    policy.add(mp.step.RandomStart, 30)
    policy.add(mp.step.Skip, 4)
    policy.add(mp.step.Maximum, 2)
    policy.add(mp.step.Grayscale, (0.299, 0.587, 0.114))
    policy.add(mp.step.Subsample, (2, 2))
    policy.add(mp.step.History, 4)
    policy.add(mp.step.Normalize)
    policy.add(mp.step.ClampReward)
    return policy
Exemplo n.º 4
0
 def train_policies(self):
     trainers = []
     for _ in range(self.config.learners):
         config = mp.utility.AttrDict(self.config.copy())
         # TODO: Use single model to share RMSProp statistics. Does RMSProp
         # use statistics in compute_gradients() or apply_gradients()?
         model = mp.model.Model(self._create_network, threads=1)
         model.weights = self.model.weights
         policy = mp.Sequential(self.task)
         policy.add(self._create_preprocess())
         policy.add(Train, config, self, model)
         trainers.append(policy)
     return trainers
Exemplo n.º 5
0
 def policy(self):
     # TODO: Why doesn't self.task work here?
     policy = mp.Sequential(self._preprocess.task)
     policy.add(self._preprocess)
     policy.add(self)
     return policy
Exemplo n.º 6
0
 def _create_preprocess(self):
     policy = mp.Sequential(self.task)
     preprocess = getattr(mp.part.preprocess, self.config.preprocess)
     policy.add(preprocess, self.config.preprocess_config)
     policy.add(mp.step.EpsilonGreedy, **self.config.epsilon)
     return policy
Exemplo n.º 7
0
 def _create_preprocess(self):
     policy = mp.Sequential(self.task)
     preprocess = getattr(mp.part.preprocess, self.config.preprocess)
     policy.add(preprocess, self.config.preprocess_config)
     return policy
Exemplo n.º 8
0
 def test_policy(self):
     policy = mp.Sequential(self.task)
     policy.add(self._preprocess)
     policy.add(Test, self.model, self)
     return policy
Exemplo n.º 9
0
 def _prepend_score_step(self, policy):
     combined = mp.Sequential(policy.task)
     combined.add(mp.step.Score)
     combined.add(policy)
     return combined