Пример #1
0
def create_env(flags):
    return wrap_pytorch(
        wrap_deepmind(
            make_atari(flags.env),
            clip_rewards=False,
            frame_stack=True,
            scale=False,
        ))
Пример #2
0
def create_deepmind_env(flags):
    return atari_wrappers.wrap_pytorch(
        atari_wrappers.wrap_deepmind(
            atari_wrappers.make_atari(flags.env),
            clip_rewards=False,
            frame_stack=True,
            scale=False,
        ))
Пример #3
0
    current_model.reset_noise()
    target_model.reset_noise()
    
    return loss

def plot(frame_idx, rewards, losses):
    print('frame %s. reward: %s' % (frame_idx, np.mean(rewards[-10:])))

from atari_wrappers import make_atari, wrap_deepmind, wrap_pytorch


env_id = "PongNoFrameskip-v4"
env    = make_atari(env_id)
env    = wrap_deepmind(env)
env    = wrap_pytorch(env)

class RainbowCnnDQN(nn.Module):
    def __init__(self, input_shape, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowCnnDQN, self).__init__()
        
        self.input_shape   = input_shape
        self.num_actions  = num_actions
        self.num_atoms    = num_atoms
        self.Vmin         = Vmin
        self.Vmax         = Vmax
        
        self.features = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
Пример #4
0
def create_env(flags):
    return atari_wrappers.wrap_pytorch(
        atari_wrappers.wrap_interp(atari_wrappers.make_atari(flags.env), ))