Exemplo n.º 1
0
def main():
    arglist = parse_args()
    N_episode = arglist.episodes
    if arglist.render_mode:
        N_episode = 1
    reward_list = []
    prey_reward_list = []
    predator_reward_list = []
    if arglist.game == "Pong-2p-v0":
        with U.single_threaded_session():
            model = make_model(arglist,
                               action_space=1,
                               scope="pong",
                               model_path=arglist.model_path,
                               load_model=arglist.use_model)
            print('model size', model.param_count)
            env = make_env(arglist)
            U.initialize()
            for i in range(N_episode):
                reward, steps_taken = pong_simulate(model, env, arglist)
                if arglist.render_mode:
                    print("terminal reward", reward, "average steps taken",
                          np.mean(steps_taken) + 1)
                else:
                    pass
                reward_list.append(reward[0])
            if not arglist.render_mode:
                print("seed", arglist.seed, "average_reward",
                      np.mean(reward_list), "stdev", np.std(reward_list))

    if arglist.game == "prey_predator":
        with U.single_threaded_session():
            prey_model = make_model(arglist,
                                    action_space=2,
                                    scope="prey",
                                    model_path=arglist.model_path,
                                    load_model=arglist.use_model)
            predator_model = make_model(arglist,
                                        action_space=2,
                                        scope="predator",
                                        model_path=arglist.model_path,
                                        load_model=arglist.use_model)
            print('model size', prey_model.param_count,
                  predator_model.param_count)
            env = make_env(arglist)
            U.initialize()
            for i in range(N_episode):
                prey_reward, predator_reward, steps_taken = pp_simulate(
                    [prey_model, predator_model], env, arglist)
                if arglist.render_mode:
                    print("terminal reward", rewards, "average steps taken",
                          np.mean(steps_taken) + 1)
                else:
                    pass
                prey_reward_list.append(prey_reward)
                predator_reward_list.append(predator_reward)
            if not arglist.render_mode:
                print("Seed", arglist.seed, "prey average_reward",
                      np.mean(prey_reward_list), "predator average_reward",
                      np.mean(predator_reward_list))
Exemplo n.º 2
0
def python_env(env=None, merge_env=None):
    if merge_env is None:
        merge_env = {}
    global python_site_env

    # Use site.addsitedir() to determine which search paths would be considered
    # if 'third_party/python_packages' was a site-packages directory.
    # PATH is also updated, so windows can find the DLLs that ship with pywin32.
    if python_site_env is None:
        python_site_env = {}
        temp = os.environ["PATH"], sys.path
        os.environ["PATH"], sys.path = "", []
        site.addsitedir(os.path.join(libdeno_path,
                                     "build"))  # Modifies PATH and sys.path.
        site.addsitedir(python_packages_path)  # Modifies PATH and sys.path.
        python_site_env = {"PATH": os.environ["PATH"], "PYTHONPATH": sys.path}
        os.environ["PATH"], sys.path = temp

    # Make a new environment object.
    env = make_env(env=env, merge_env=merge_env)
    # Apply PATH and PYTHONPATH from the site-packages environment.
    add_env_path(python_site_env["PATH"], env=env, key="PATH")
    add_env_path(python_site_env["PYTHONPATH"], env=env, key="PYTHONPATH")

    return env
Exemplo n.º 3
0
def google_env(env=None, merge_env={}, depot_tools_path=depot_tools_path):
    env = make_env(env=env, merge_env=merge_env)
    # Depot_tools to be in the PATH, before Python.
    path_prefix = depot_tools_path + os.path.pathsep
    if not env['PATH'].startswith(path_prefix):
        env['PATH'] = path_prefix + env['PATH']
    # We're not using Google's internal infrastructure.
    if os.name == 'nt' and not 'DEPOT_TOOLS_WIN_TOOLCHAIN' in env:
        env['DEPOT_TOOLS_WIN_TOOLCHAIN'] = "0"
    return env
Exemplo n.º 4
0
def python_env(env=None, merge_env=None):
    if merge_env is None:
        merge_env = {}
    global python_site_env

    # Use site.addsitedir() to determine which search paths would be considered
    # if 'third_party/python_packages' was a site-packages directory.
    # PATH is also updated, so windows can find the DLLs that ship with pywin32.
    if python_site_env is None:
        python_site_env = {}
        temp = os.environ["PATH"], sys.path
        os.environ["PATH"], sys.path = "", []
        site.addsitedir(python_packages_path)  # Modifies PATH and sys.path.
        python_site_env = {"PATH": os.environ["PATH"], "PYTHONPATH": sys.path}
        os.environ["PATH"], sys.path = temp

    # Make a new environment object.
    env = make_env(env=env, merge_env=merge_env)
    # Apply PATH and PYTHONPATH from the site-packages environment.
    add_env_path(python_site_env["PATH"], env=env, key="PATH")
    add_env_path(python_site_env["PYTHONPATH"], env=env, key="PYTHONPATH")

    return env
Exemplo n.º 5
0
def google_env(env=None, merge_env={}, depot_tools_path=depot_tools_path):
    env = make_env(env=env, merge_env=merge_env)
    # Depot_tools to be in the PATH, before Python.
    path_prefix = depot_tools_path + os.path.pathsep
    if not env['PATH'].startswith(path_prefix):
        env['PATH'] = path_prefix + env['PATH']

    if os.name == "nt":  # Windows-only enviroment tweaks.
        # We're not using Google's internal infrastructure.
        if os.name == "nt" and not "DEPOT_TOOLS_WIN_TOOLCHAIN" in env:
            env["DEPOT_TOOLS_WIN_TOOLCHAIN"] = "0"

        # The 'setup_toolchain.py' script does a good job finding the Windows
        # SDK. Unfortunately, if any of the environment variables below are set
        # (as vcvarsall.bat typically would), setup_toolchain absorbs them too,
        # adding multiple identical -imsvc<path> items to CFLAGS.
        # This small variation has no effect on compiler output, but it
        # makes ninja rebuild everything, and causes sccache cache misses.
        # TODO(piscisaureus): fix this upstream.
        env["INCLUDE"] = ""
        env["LIB"] = ""
        env["LIBPATH"] = ""

    return env
Exemplo n.º 6
0

if __name__ == '__main__':

    arglist = parse_args()
    if not os.path.exists(arglist.data_dir):
        os.makedirs(arglist.data_dir)
    total_frames = 0
    if arglist.game == "Pong-2p-v0":
        with U.single_threaded_session():
            model = make_model(arglist,
                               action_space=1,
                               scope="pong",
                               model_path=arglist.model_path,
                               load_model=arglist.use_model)
            env = make_env(arglist, full_episode=True)
            model.render_mode = arglist.render_mode
            U.initialize()
            for trial in range(arglist.max_trials):  # 200 trials per worker
                try:
                    random_generated_int = random.randint(0, 2**31 - 1)
                    filename = arglist.data_dir + "/" + str(
                        random_generated_int) + ".npz"
                    recording_obs = []
                    recording_action = []
                    recording_oppo_action = []

                    np.random.seed(random_generated_int)
                    env.seed(random_generated_int)
                    # random policy
                    model.init_random_model_params(stdev=np.random.rand() *
Exemplo n.º 7
0
import numpy as np
from dqn_agent import DQNAgent
from util import make_env, plot_learning_curve
from gym import wrappers

if __name__ == "__main__":
    env = make_env('PongNoFrameskip-v4')
    best_score = -np.inf
    load_checkpoint = True
    n_games = 5
    
    agent = DQNAgent(gamma=0.99, epsilon=0.1, lr=0.0001,
                    input_dims=(env.observation_space.shape),  # we have modified the default env settings to get env.observation_space.shape
                    n_actions=env.action_space.n, mem_size=50000, eps_min=0.1,
                    batch_size=32, replace=1000, eps_dec=1e-5, 
                    chkpt_dir='.\\models\\', algo='DQNAgent',
                    env_name='PongNoFrameskip-v4')
    if load_checkpoint:
        agent.load_models()

    #env = wrappers.Monitor(env, '.\\video\\', video_callable=lambda episode_id: True, force=True)
    fname= agent.algo+'_'+agent.env_name+'_lr'+str(agent.lr)+'_'+str(n_games)+'games'
    figure_file = '.\\plots\\'+fname+'.png'

    n_steps= 0
    scores, eps_history, steps_array = [], [], []
    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()  # reset() method is overloaded in class StackFrames(gym.ObservationWrapper) , observation.shape = (4, 84, 84)
                                   # here (1, 84, 84) == (2, 84, 84) == (3, 84, 84) == (4, 84, 84)