Пример #1
0
    '%Y-%m-%dT%H-%M-%S')
save_dir.mkdir(parents=True)

checkpoint = None  # Path('checkpoints/2020-10-21T18-25-27/mario.chkpt')

# Add in check to see if GPU is avaliable (BM)
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Using GPU!")
else:
    device = torch.device("cpu")
    print("Using CPU!")

mario = Mario(state_dim=(4, 84, 84),
              action_dim=env.action_space.n,
              save_dir=save_dir,
              device=device,
              checkpoint=checkpoint)

logger = MetricLogger(save_dir)

# Make number of episodes a variable to pass in (BM)
if len(sys.argv) > 1:
    episodes = int(sys.argv[1])
    print_e = 20
else:
    episodes = 10
    print_e = 1

### for Loop that train the model num_episodes times by playing the game
for e in range(episodes):
Пример #2
0
    ['right', 'A']]
)

env = SkipFrame(env, skip=4)
env = GrayScaleObservation(env, keep_dim=False)
env = ResizeObservation(env, shape=84)
env = TransformObservation(env, f=lambda x: x / 255.)
env = FrameStack(env, num_stack=4)

env.reset()

save_dir = Path('checkpoints') / datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S')
save_dir.mkdir(parents=True)

checkpoint = Path('checkpoints/trained_mario.chkpt')
mario = Mario(state_dim=(4, 84, 84), action_dim=env.action_space.n, save_dir=save_dir, checkpoint=checkpoint)
mario.exploration_rate = mario.exploration_rate_min

logger = MetricLogger(save_dir)

episodes = 100

for e in range(episodes):

    state = env.reset()

    while True:

        env.render()

        action = mario.act(state)