示例#1
0
    config.n_episodes = 40000
    config.max_t = 1000
    config.buffer_size = 100000
    config.batch_size = 200
    config.gamma = 0.99
    config.tau = 0.001
    config.lr_actor = 0.0001
    config.lr_critic = 0.0001
    config.n_agents = n_agents
    config.state_size = state_size * state_multiplier
    config.action_size = action_size
    config.learn_start = 10000
    config.max_action = 1  # maximum value allowed for each action
    config.memory = ExperienceReplayMemory(config.buffer_size, seed)
    config.update_every = 2
    config.device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")
    config_file = open(os.path.join(log_dir, "config.json"), "w+")
    config_file.write(
        json.dumps(json.loads(
            jsonpickle.encode(config, unpicklable=False, max_depth=1)),
                   indent=4,
                   sort_keys=True))
    config_file.close()

    scores = []
    scores_std = []
    scores_avg = []
    scores_window = deque(maxlen=100)

    agent = MultiAgent(config)
示例#2
0
    config.n_episodes = 10
    config.max_t = 1000
    config.buffer_size = 100000
    config.batch_size = 200
    config.gamma = 0.99
    config.tau = 0.001
    config.lr_actor = 0.0001
    config.lr_critic = 0.001
    config.n_agents = n_agents
    config.state_size = state_size * state_multiplier
    config.action_size = action_size
    config.learn_start = 3000
    config.max_action = 1
    config.memory = ExperienceReplayMemory(config.buffer_size, rand_seed)
    config.update_every = 2
    config.device = device
    rand_seed = 0
    scores = []
    scores_std = []
    scores_avg = []
    scores_window = deque(maxlen=100)

    agent = MultiAgent(config)
    agent.load("./save/checkpoint_success.pth")
    global_steps = 0
    noise_scheduler = config.noise_scheduler
    for i_episode in range(config.n_episodes):
        env_info = env.reset(train_mode=False)[env.brain_names[0]]
        states = torch.tensor(env_info.vector_observations,
                              dtype=torch.float,
                              device=device)