Exemplo n.º 1
0
import torch
from neural_net import NeuralNet
import gym

model = NeuralNet()
model.load_state_dict(torch.load("savedmodel"))

env = gym.make("LunarLander-v2")

for episodes in range(10):
    observation = env.reset()
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    for t in range(1000):
        # render the environment
        env.render()
        data = observation.tolist()
        data.append(reward)
        action = model(torch.tensor(data).float())
        action = (action == action.max()).nonzero().tolist()[0][0]
        observation, reward, done, info = env.step(int(action))
        print(action, observation, reward, info)
        if done:
            print("Episode finished after {} timestep".format((t + 1)))
            break
env.close()