示例#1
0
    score_book = {}
    value_loss_book = {}
    actor_loss_book = {}
    critic_1_loss_book = {}
    critic_2_loss_book = {}
    critic_loss_book = {}

    for trial_num in range(n_trials):
        print('\nTrial num:', trial_num + 1)
        agent = Agent(input_dims=env.observation_space.shape,
                      layer1_size=200,
                      layer2_size=200,
                      env=env,
                      n_actions=env.action_space.shape[0],
                      alpha=alpha,
                      beta=beta,
                      gamma=gamma,
                      max_size=max_size,
                      tau=tau,
                      ent_alpha=ent_alpha,
                      batch_size=batch_size,
                      reward_scale=reward_scale,
                      chkpt_dir=chkpt_dir)
        score_history = []
        loss = []
        value_loss = []
        actor_loss = []
        critic_1_loss = []
        critic_2_loss = []
        critic_loss = []

        if load_checkpoint:
示例#2
0
    perturbations = [0.3, 0.5, 0.7, 0.9]
    
    for perturbation in perturbations:
        reward_history = []    
        score_book = {}
        value_loss_book = {}
        actor_loss_book = {}
        critic_1_loss_book = {}
        critic_2_loss_book = {}
        critic_loss_book = {}

        print("Perturbation amount: {}".format(perturbation))
        for trial_num in range(n_trials):
            print('\nTrial num:', trial_num+1)
            agent = Agent(input_dims=env.observation_space.shape, layer1_size=256, layer2_size=256,
                        env=env, n_actions=env.action_space.n, alpha=alpha, beta=beta, 
                        gamma=gamma, max_size=max_size, tau=tau, ent_alpha=ent_alpha, batch_size=batch_size,
                        reward_scale = reward_scale, chkpt_dir=chkpt_dir)
            score_history = []
            loss = []
            value_loss = []
            actor_loss = []
            critic_1_loss = []
            critic_2_loss = []
            critic_loss = []

            if load_checkpoint:
                agent.load_models()

            for i in tqdm(range(n_games)):
                
                if render:
示例#3
0
if __name__ == '__main__':
    #=========================Enable GPU usage===========================
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

    #========Operational Instructions=========
    render = True
    debugging = False
    training = True
    load_checkpoint = True

    #============================Constants================
    n_episodes = 7000
    env = CarRacing(obstacles=False)
    agent = Agent(env)
    total_steps = 0
    if load_checkpoint:
        total_rewards = list(agent.load_rewards('total'))
        avg_rewards = list(agent.load_rewards('avg'))
        episode_offset = len(total_rewards)
        agent.load_models()
        agent.load_buffer()
    else:
        total_rewards = []
        avg_rewards = []

    #================Main loop===============================
    for episode in range(n_episodes):
        if load_checkpoint: episode += episode_offset
        env.reset()