Exemplo n.º 1
0
def main():
    agent = Agent()
    agent.load()

    total_reward = 0
    obs = env.reset()
    env.render()
    for _ in range(10000):
        act = agent.predict(obs)
        obs, reward, done, _ = env.step(act)
        total_reward += reward
        env.render()
        if done:
            print(f'total_reward: {total_reward}')
            env.close()
            break
Exemplo n.º 2
0
                               ]  # a, b = [deque([]), deque([])]
agent_obs = [None] * flags.num_agents  # [None, None]
agent_obs_buffer = [None] * flags.num_agents
agent_action_buffer = [2] * flags.num_agents
max_steps = flags.episode_length
start_time = time.time()

# Load an RL agent and initialize it from checkpoint if necessary
# independent dqn/ppo -->每个人obs不同,同一个model
if flags.agent_type == "dqn":
    agent = DQN_Agent(state_size, action_size, flags.num_agents)
elif flags.agent_type == "ppo":
    agent = PPO_Agent(state_size, action_size, flags.num_agents)

if flags.load_model:
    start, eps = agent.load(project_root / 'checkpoints', 0, 1.0)
else:
    start, eps = 0, 1

if not flags.train:
    eps = 0.0

# Helper function to detect collisions
ACTIONS = {0: "up", 1: "right", 2: "down", 3: "left", 4: "stop"}


def obs_wrapper(obss):
    '''
    utility: [list] -> [array]
    '''
    return np.array(obss)
Exemplo n.º 3
0
crop_start = (15, 30)
crop_end = (200, 125)
starting_epsilon = 0.05 if LOAD_MODEL else 1.0

env = gym.make('SpaceInvaders-v0')
brain = Agent(gamma=0.95,
              epsilon=0.05,
              lr=0.003,
              input_dims=input_dims,
              batch_size=batch_size,
              n_actions=n_actions,
              max_mem_size=5000,
              save_path='models/')

if LOAD_MODEL:
    brain.load()
else:
    # load memory with random games
    while brain.mem_cntr < brain.mem_size:
        observation = env.reset()
        observation = preprocess(observation, crop_start, crop_end)
        done = False
        while not done:
            # 0 no action, 1 fire, 2 move right, 3 move left, 4 move right fire, 5 move left fire
            action = env.action_space.sample()
            observation_, reward, done, info = env.step(action)
            observation_ = preprocess(observation_, crop_start, crop_end)
            if done and info['ale.lives'] == 0:
                reward = -100
            brain.store_transition(observation, action, reward, observation_,
                                   int(done))
Exemplo n.º 4
0
        env.render()

        while True:
            action = self.agent.act(states)
            states, _, done, _ = env.step(action)
            env.render()
            if done:
                break
            time.sleep(0.01)

seed = 3721
env = SingleLngEnv(
    n_loc=10,
    n_steps=1000,
    fuel_cost=0.1,
    price_sigma=0.1,
    price_daily_vol=0.02,
    price_theta=0.01,
    max_distance=30.0,
    normalize=True
)

# from ddpg import Agent
from dqn import Agent
agent = Agent(state_size=env.n_loc * 3 + 2, action_size=env.n_loc, random_seed=seed)
agent.load()
agent.train(env, 1000, 1000)

solver = DdpgLngSolver(env, agent, 3721)
solver.solve()