Exemplo n.º 1
0
def benchmark_score_from_local(benchmark_id, training_dir):
    spec = gym.benchmark_spec(benchmark_id)

    directories = []
    for name, _, files in os.walk(training_dir):
        manifests = gym.monitoring.detect_training_manifests(name, files=files)
        if manifests:
            directories.append(name)

    benchmark_results = defaultdict(list)
    for training_dir in directories:
        results = gym.monitoring.load_results(training_dir)

        env_id = results['env_info']['env_id']
        benchmark_result = spec.score_evaluation(
            env_id, results['data_sources'],
            results['initial_reset_timestamps'], results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'])
        # from pprint import pprint
        # pprint(benchmark_result)
        benchmark_results[env_id].append(benchmark_result)

    return gym.benchmarks.scoring.benchmark_aggregate_score(
        spec, benchmark_results)
Exemplo n.º 2
0
def q1_run(num_timesteps):
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]

    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed, expt_dir='tmp/gym-results2')

    optimizer_spec = OptimizerSpec(
        constructor=optim.RMSprop,
        kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS),
    )

    exploration_schedule = LinearSchedule(1000000, 0.1)

    dqn_learning(
        env=env,
        q_func=DQN,
        runname="normal_run",
        optimizer_spec=optimizer_spec,
        exploration=exploration_schedule,
        stopping_criterion=stopping_criterion2(num_timesteps),
        replay_buffer_size=REPLAY_BUFFER_SIZE,
        batch_size=BATCH_SIZE,
        gamma=GAMMA,
        learning_starts=LEARNING_STARTS,
        learning_freq=LEARNING_FREQ,
        frame_history_len=FRAME_HISTORY_LEN,
        target_update_freq=TARGET_UPDATE_FREQ
    )
Exemplo n.º 3
0
def main():
    # Games that we'll be testing.
    game_to_ID = {'BeamRider':0,
                  'Breakout':1,
                  'Enduro':2,
                  'Pong':3,
                  'Qbert':4}

    # Get some arguments here. Note: num_timesteps default uses tasks default.
    parser = argparse.ArgumentParser()
    parser.add_argument('--game', type=str, default='Pong')
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--num_timesteps', type=int, default=40000000)
    args = parser.parse_args()

    # Choose the game to play and set log file.
    benchmark = gym.benchmark_spec('Atari40M')
    task = benchmark.tasks[game_to_ID[args.game]]
    log_name = args.game+"_s"+str(args.seed).zfill(3)+".pkl"

    # Run training. Should change the seed if possible!
    # Also, the actual # of iterations run is _roughly_ num_timesteps/4.
    seed = args.seed
    env = get_env(task, seed)
    session = get_session()
    print("task = {}".format(task))
    atari_learn(env, 
                session, 
                num_timesteps=args.num_timesteps,
                log_file=log_name)
Exemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--lr_multiplier', type=float, default=1.0)
    parser.add_argument('--target_update_freq', type=float, default=10000)
    parser.add_argument('--exp_name', type=str, default='Knapsack')
    parser.add_argument('--boltzmann_exploration', action='store_true')
    args = parser.parse_args()

    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]

    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    # env = get_env(task, seed)
    env = Knapsack(10, 3)
    # test_env = Knapsack(5, 1)
    # session = get_session()
    knapsack_learn(env,
                   None,
                   num_timesteps=task.max_timesteps,
                   lr_multiplier=args.lr_multiplier,
                   target_update_freq=args.target_update_freq,
                   exp_name=args.exp_name,
                   boltzmann_exploration=args.boltzmann_exploration)
Exemplo n.º 5
0
def main():
    # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    # os.environ["CUDA_VISIBLE_DEVICES"] = "4"
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[6]
    id2game = {
        'QbertNoFrameskip-v4': 'qbert',
        'SpaceInvadersNoFrameskip-v4': 'spaceinvaders'
    }
    g = id2game[task.env_id]
    # task = 'SpaceInvadersNoFrameskip-v4'

    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)
    session = get_session()
    log_dir = os.path.join(
        './logs', env.spec.id,
        datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    atari_learn(env,
                g,
                session,
                num_timesteps=task.max_timesteps,
                log_dir=log_dir,
                double_q=True,
                soft_q=True,
                use_expert=False)
Exemplo n.º 6
0
def main(_):
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
 
        global_step = tf.Variable(0, name='global_step', trainable=False)

        benchmark = gym.benchmark_spec('Atari40M')

        # Change the index to select a different game.
        task = benchmark.tasks[3]

        # Run training
        seed = 0 # Use a seed of zero (you may want to randomize the seed!)
        env = get_env(task, seed)

        np.random.seed(RANDOM_SEED)
        tf.set_random_seed(RANDOM_SEED)

        # state_dim = np.prod(env.observation_space.shape)
        state_dim = env.reset().shape
        print('state_dim:',state_dim)
        action_dim = env.action_space.n/2 # 3 actions: 1: hold, 2: up, 3: down
        print('action_dim:',action_dim)


        pg = PGNetwork(sess, state_dim, action_dim, PG_LEARNING_RATE)

        train(sess, env, pg, global_step)
Exemplo n.º 7
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_norm', action='store_true')
    parser.add_argument('--max_pool', action='store_true')
    parser.add_argument('--doubleQ', action='store_true')
    parser.add_argument('--log_name', type=str, default='default')
    parser.add_argument('--buf_size', type=int, default=1000000)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--learn_start', type=int, default=50000)
    parser.add_argument('--learn_freq', type=int, default=4)
    parser.add_argument('--frame_hist', type=int, default=4)
    parser.add_argument('--targ_up_freq', type=int, default=10000)
    parser.add_argument('--grad_clip', type=float, default=10.0)
    parser.add_argument('--lr_multiplier', type=float, default=1.0)
    parser.add_argument('--ep_start', type=float, default=1.0)

    args = parser.parse_args()

    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]

    # Run training
    seed = random.randint(0, 1000)
    print("Seed: {}".format(seed))
    env = get_env(task, seed)
    atari_learn(env, args, num_timesteps=task.max_timesteps)
Exemplo n.º 8
0
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    # VRR: We can use this to try multiple games. (0) Flat 3d space invaders (1) Breakout  (2) Racing Car (3) Pong (4) Weird pyramid (5) Seaquest (6) Old looking space invaders
    task = benchmark.tasks[5]

    # Run training
    seed = round(time.time()
                 )  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)

    #    #["[10, 30]_[20, 50]","[25, 10]_[15, 70]","[42, 10]_[20, 42]","[10, 10]_[60, 20]"]
    #    #origin = [[10,30],[25,10],[42,10],[10,10]]
    #    #h_and_w = [[20,50],[15,70],[20,42],[60,20]]
    #    origin = [[50,10],[10,50],[40,20],[10,15]]
    #    h_and_w = [[15,70],[60,20],[20,50],[20,42]]
    #
    #    obs = sensor_noise(env.reset(),origin[0][0],origin[0][1],h_and_w[0][0],h_and_w[0][1])
    #    obs = sensor_noise(obs,origin[1][0],origin[1][1],h_and_w[1][0],h_and_w[1][1])
    #    obs = sensor_noise(obs,origin[2][0],origin[2][1],h_and_w[2][0],h_and_w[2][1])
    #    obs = sensor_noise(obs,origin[3][0],origin[3][1],h_and_w[3][0],h_and_w[3][1])
    #    plt.imshow(obs[:,:,0])
    #    plt.pause(10.0)
    #    env.reset();

    session = get_session()
    #atari_learn(env, session, num_timesteps=task.max_timesteps/2)
    robust_rl(env, session)
Exemplo n.º 9
0
def q2_run(num_timesteps):
    schedulers = {"no_explore": ConstantSchedule(0.1),
                  "delayed_decay": PiecewiseSchedule([(0, 1.0), (0.25e6, 1.0), (1.25e6, 0.1)], outside_value=0.1),
                  "slower_decay": LinearSchedule(1500000, 0.1)}

    for name, exploration_schedule in schedulers.items():
        # Get Atari games.
        benchmark = gym.benchmark_spec('Atari40M')

        # Change the index to select a different game.
        task = benchmark.tasks[3]

        # Run training
        seed = 0  # Use a seed of zero (you may want to randomize the seed!)
        env = get_env(task, seed)
        env.reset()

        optimizer_spec = OptimizerSpec(constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS))

        dqn_learning(
            env=env,
            q_func=DQN,
            runname=name,
            optimizer_spec=optimizer_spec,
            exploration=exploration_schedule,
            stopping_criterion=stopping_criterion2(num_timesteps),
            replay_buffer_size=REPLAY_BUFFER_SIZE,
            batch_size=BATCH_SIZE,
            gamma=GAMMA,
            learning_starts=LEARNING_STARTS,
            learning_freq=LEARNING_FREQ,
            frame_history_len=FRAME_HISTORY_LEN,
            target_update_freq=TARGET_UPDATE_FREQ
        )
Exemplo n.º 10
0
def atari_main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    # ['BeamRiderNoFrameskip-v4', 'BreakoutNoFrameskip-v4', 'EnduroNoFrameskip-v4',
    #  'PongNoFrameskip-v4', 'QbertNoFrameskip-v4', 'SeaquestNoFrameskip-v4',
    #  'SpaceInvadersNoFrameskip-v4']
    task = benchmark.tasks[1]

    print('availabe tasks: ', [t.env_id for t in benchmark.tasks])
    print('task: ', task.env_id, 'max steps: ', task.max_timesteps)

    # Run training
    seed = 0 # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)

    last_obs = env.reset()

    exploration_schedule = PiecewiseSchedule(
        [
            (0, 1.0),
            (1e6, 0.1),
            (task.max_timesteps / 2, 0.01),
        ], outside_value=0.01
    )

    dqn = DoubleDQN(image_shape=(84, 84, 1),
                    num_actions=env.action_space.n,
                    training_starts=50000,
                    target_update_freq=10000,
                    training_batch_size=32,
                    # training_starts=2000,
                    # target_update_freq=500,
                    # training_batch_size=3,
                    exploration=exploration_schedule
                   )

    reward_sum_episode = 0
    num_episodes = 0
    episode_rewards = deque(maxlen=100)
    for step in range(task.max_timesteps):
        if step > 0 and step % 1000 == 0:
            print('step: ', step, 'episodes:', num_episodes, 'epsilon:', exploration_schedule.value(step),
                  'learning rate:', dqn.get_learning_rate(), 'last 100 training loss mean', dqn.get_avg_loss(),
                  'last 100 episode mean rewards: ', np.mean(np.array(episode_rewards, dtype=np.float32)))
        env.render()
        action = dqn.choose_action(step, last_obs)
        obs, reward, done, info = env.step(action)
        reward_sum_episode += reward
        dqn.learn(step, action, reward, done, info)
        if done:
            last_obs = env.reset()
            episode_rewards.append(reward_sum_episode)
            reward_sum_episode = 0
            num_episodes += 1
        else:
            last_obs = obs
Exemplo n.º 11
0
def _upload_benchmark(training_dir, algorithm_id, benchmark_id, benchmark_run_tags, api_key, ignore_open_monitors, skip_videos):
    # We're uploading a benchmark run.
    directories = []
    env_ids = []
    for name, _, files in os.walk(training_dir):
        manifests = monitoring.detect_training_manifests(name, files=files)
        if manifests:
            env_info = monitoring.load_env_info_from_manifests(manifests, training_dir)
            env_ids.append(env_info['env_id'])
            directories.append(name)

    # Validate against benchmark spec
    try:
        spec = benchmark_spec(benchmark_id)
    except error.UnregisteredBenchmark:
        raise error.Error("Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?".format(benchmark_id))

    spec_env_ids = [task.env_id for task in spec.tasks for _ in range(task.trials)]

    if not env_ids:
        raise error.Error("Could not find any evaluations in {}".format(training_dir))

    # This could be more stringent about mixing evaluations
    if sorted(env_ids) != sorted(spec_env_ids):
        logger.info("WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s", benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids))

    tags = json.dumps(benchmark_run_tags)
    _create_with_retries = util.retry_exponential_backoff(
        resource.BenchmarkRun.create,
        (error.APIConnectionError,),
        max_retries=5,
        interval=3,
    )
    benchmark_run = _create_with_retries(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=tags)
    benchmark_run_id = benchmark_run.id

    # Actually do the uploads.
    for training_dir in directories:
        # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
        _upload_with_retries = util.retry_exponential_backoff(
            _upload,
            (error.APIConnectionError,),
            max_retries=5,
            interval=3,
        )
        _upload_with_retries(training_dir, None, None, benchmark_run_id, api_key, ignore_open_monitors, skip_videos)

    logger.info("""
****************************************************
You successfully uploaded your benchmark on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
    """.rstrip(), benchmark_id, benchmark_run.web_url())

    return benchmark_run_id
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')
    # Change the index to select a different game.
    task = benchmark.tasks[4]
    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)
    session = get_session()
    atari_learn(env, session, num_timesteps=task.max_timesteps)
Exemplo n.º 13
0
def run_model(model_path, log_path, max_episode_count=500):
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')
    # Change the index to select a different game.
    task = benchmark.tasks[3]

    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed, log_path)
    session = get_session()
    atari_run(env, session, model_path, max_episode_count)
Exemplo n.º 14
0
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game. 3 is pong, 4 is Q*bert, 1 is breakout, 5 is seaquest, 6 is space invaders
    task = benchmark.tasks[6]

    # Run training
    seed = 0 # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)
    session = get_session()
    atari_learn(env, session, num_timesteps=task.max_timesteps)
Exemplo n.º 15
0
def gen_pong_env(seed):
    """Generate a pong environment, with all the bells and whistles."""
    benchmark = gym.benchmark_spec('Atari40M')
    task = benchmark.tasks[3]

    env_id = task.env_id
    env = gym.make(env_id)
    env.seed(seed)

    # Can wrap in gym.wrappers.Monitor here if we want to record.
    env = wrap_deepmind(env)
    return env
Exemplo n.º 16
0
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]

    # Run training
    seed = 0 # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)
    session = get_session()
    atari_learn(env, session, num_timesteps=task.max_timesteps)
Exemplo n.º 17
0
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[2]

    # Run training
    env = mvc_env.MVC_env(7)

    graph_learn(env,
                num_timesteps=task.max_timesteps,
                q_func=Q_function_graph_model.Q_func)
Exemplo n.º 18
0
def _upload_benchmark(training_dir, algorithm_id, benchmark_id, benchmark_run_tags, api_key, ignore_open_monitors, skip_videos):
    # We're uploading a benchmark run.
    directories = []
    env_ids = []
    for name, _, files in os.walk(training_dir):
        manifests = monitoring.detect_training_manifests(name, files=files)
        if manifests:
            env_info = monitoring.load_env_info_from_manifests(manifests, training_dir)
            env_ids.append(env_info['env_id'])
            directories.append(name)

    # Validate against benchmark spec
    try:
        spec = benchmark_spec(benchmark_id)
    except error.UnregisteredBenchmark:
        raise error.Error("Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?".format(benchmark_id))

    spec_env_ids = [task.env_id for task in spec.tasks for _ in range(task.trials)]

    if not env_ids:
        raise error.Error("Could not find any evaluations in {}".format(training_dir))

    # This could be more stringent about mixing evaluations
    if sorted(env_ids) != sorted(spec_env_ids):
        logger.info("WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s", benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids))

    benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=json.dumps(benchmark_run_tags))
    benchmark_run_id = benchmark_run.id

    # Actually do the uploads.
    for training_dir in directories:
        # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
        _upload_with_retries = util.retry_exponential_backoff(
            _upload,
            (error.APIConnectionError,),
            max_retries=5,
            interval=3,
        )
        _upload_with_retries(training_dir, None, None, benchmark_run_id, api_key, ignore_open_monitors, skip_videos)

    logger.info("""
****************************************************
You successfully uploaded your benchmark on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
    """.rstrip(), benchmark_id, benchmark_run.web_url())

    return benchmark_run_id
Exemplo n.º 19
0
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[2]

    # Run training
    # env = get_env(task, seed)
    env = mvc_env.MVC_env(7)
    # env = tsp_env.TSP_env(5, no_move_penalty=0)
    graph_learn(env,
                num_timesteps=task.max_timesteps,
                q_func=Q_function_graph_model.Q_func)
Exemplo n.º 20
0
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[2]

    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    # env = get_env(task, seed)
    env = Knapsack(10, 3)
    #env = tsp_env.TSP_env(5, no_move_penalty=0,
    #                      use_alternative_state=True)
    knapsack_learn(env, num_timesteps=task.max_timesteps)
Exemplo n.º 21
0
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]
    PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
    logz.configure_output_dir(os.path.join(PROJECT_ROOT, "log/"+"_RAM_"+time.strftime("%d-%m-%Y_%H-%M-%S")))

    # Run training
    seed = 0 # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)
    session = get_session()
    atari_learn(env, session, num_timesteps=task.max_timesteps)
Exemplo n.º 22
0
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]

    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)
    session = get_session()
    atari_learn(env,
                session,
                num_timesteps=task.max_timesteps,
                dir_name='/Users/anil/Code/ai/deeprlcourse/hw3/dqn/atari/')
Exemplo n.º 23
0
def gen_vectorized_pong_env(n):
    """
    Generate a vectorized pong environment, with n simultaneous
    differently-seeded envs. For deterministic seeding, you
    should seed np.random.seed beforehand.
    """
    benchmark = gym.benchmark_spec('Atari40M')
    task = benchmark.tasks[3]

    env_id = task.env_id
    envs = [wrap_deepmind(gym.make(env_id)) for _ in range(n)]
    env = MultiprocessingEnv(envs)

    seeds = [int(s) for s in np.random.randint(0, 2 ** 30, size=n)]
    env.seed(seeds)
    return env
Exemplo n.º 24
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--cuda', '-gpu', action='store_true')
    args = parser.parse_args()

    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]

    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)
    atari_learn(args, env, num_timesteps=task.max_timesteps)
Exemplo n.º 25
0
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]

    # Run training
    if len(sys.argv) > 1:
        seed = int(sys.argv[1])
    else:
        seed = int(time.time(
        ))  # Use a seed of zero (you may want to randomize the seed!)
    print("Seed: " + str(seed))
    env = get_env(task, seed)
    session = get_session()
    atari_learn(env, session, num_timesteps=task.max_timesteps)
Exemplo n.º 26
0
def main():
    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    #task = benchmark.tasks[3]
    task = benchmark.tasks[0]  # beam rider
    #task = benchmark.tasks[1]  # breakout

    #task.env_id
    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)
    #env.
    session = get_session()
    print("task:" + task.env_id + " max_timesteps:" + str(task.max_timesteps))
    atari_learn(env, session, num_timesteps=task.max_timesteps)
Exemplo n.º 27
0
def main():
    args = parse_args()

    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]

    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed, args["model_fn"])
    session = get_session()

    max_timesteps = args.get("max_timesteps", None) or task.max_timesteps
    results_file = args["results_file"]
    model_fn = getattr(models, args["model_fn"])

    atari_learn(env, session, num_timesteps=max_timesteps, model_fn=model_fn)
Exemplo n.º 28
0
def benchmark_score_from_local(benchmark_id, training_dir):
    spec = gym.benchmark_spec(benchmark_id)

    directories = []
    for name, _, files in os.walk(training_dir):
        manifests = gym.monitoring.detect_training_manifests(name, files=files)
        if manifests:
            directories.append(name)

    benchmark_results = defaultdict(list)
    for training_dir in directories:
        results = gym.monitoring.load_results(training_dir)

        env_id = results['env_info']['env_id']
        benchmark_result = spec.score_evaluation(env_id, results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
        # from pprint import pprint
        # pprint(benchmark_result)
        benchmark_results[env_id].append(benchmark_result)

    return gym.benchmarks.scoring.benchmark_aggregate_score(spec, benchmark_results)
Exemplo n.º 29
0
def main():
    parser = argparse.ArgumentParser(description='RL agents for atari')
    subparsers = parser.add_subparsers(title="subcommands", dest="subcommand")

    train_parser = subparsers.add_parser("train", help="train an RL agent for atari games")
    train_parser.add_argument("--task-id", type=int, required=True, help="0 = BeamRider, 1 = Breakout, 2 = Enduro, 3 = Pong, 4 = Qbert, 5 = Seaquest, 6 = Spaceinvaders")
    train_parser.add_argument("--gpu", type=int, default=None, help="ID of GPU to be used")
    train_parser.add_argument("--double-dqn", type=int, default=0, help="double dqn - 0 = No, 1 = Yes")
    train_parser.add_argument("--dueling-dqn", type=int, default=0, help="dueling dqn - 0 = No, 1 = Yes")

    args = parser.parse_args()

    # command
    if (args.gpu != None):
        if torch.cuda.is_available():
            torch.cuda.set_device(args.gpu)
            print("CUDA Device: %d" %torch.cuda.current_device())

    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')


    # Change the index to select a different game.
    # 0 = BeamRider
    # 1 = Breakout
    # 2 = Enduro
    # 3 = Pong
    # 4 = Qbert
    # 5 = Seaquest
    # 6 = Spaceinvaders
    # for i in benchmark.tasks:
    #     print i
    task = benchmark.tasks[args.task_id]
    # task = benchmark_class('Pong-v0')
    # Run training
    seed = 0 # Use a seed of zero (you may want to randomize the seed!)
    double_dqn = (args.double_dqn == 1)
    dueling_dqn = (args.dueling_dqn == 1)
    env = get_env(task, seed, task.env_id, double_dqn, dueling_dqn)
    print("Training on %s, double_dqn %d, dueling_dqn %d" %(task.env_id, double_dqn, dueling_dqn))
    atari_learn(env, task.env_id, num_timesteps=task.max_timesteps, double_dqn=double_dqn, dueling_dqn=dueling_dqn)
Exemplo n.º 30
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--frame', type=int, default=4)
    parser.add_argument('--log', type=str, default='progress.pkl')
    args = parser.parse_args()

    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')
    #print benchmark
    # Change the index to select a different game.
    task = benchmark.tasks[3]
    print('task.max_timesteps', task.max_timesteps)  #40000000
    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)
    session = get_session()
    atari_learn(env,
                session,
                num_timesteps=task.max_timesteps,
                frame_history_len=args.frame,
                log_file=args.log)
def main():

    # Logger
    # https://github.com/mwhittaker/homework/commit/cb043dbc980d898547f552e07f475696ce57f1d3
    format = "[%(asctime)-15s %(pathname)s:%(lineno)-3s] %(message)s"
    handler = logging.StreamHandler()
    handler.setFormatter(logging.Formatter(format))
    logger = logging.getLogger("dqn")
    logger.propagate = False
    logger.addHandler(handler)
    logger.setLevel(logging.DEBUG)

    # Get Atari games.
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]

    # Run training
    seed = 0  # Use a seed of zero (you may want to randomize the seed!)
    env = get_env(task, seed)
    session = get_session()
    atari_learn(env, session, num_timesteps=task.max_timesteps)
Exemplo n.º 32
0
def bonus_run(num_timesteps):
    def make_range_black(arr: np.ndarray, start, end):
        arr[:, start:end, :] = 0

    frame_filters = {"no_left_side": lambda x: make_range_black(x, 0, x.shape[1] // 4),
                     "no_middle_side": lambda x: make_range_black(x, x.shape[1] // 4, x.shape[1] // 2), }

    for name, frame_filter in frame_filters.items():
        # Get Atari games.
        benchmark = gym.benchmark_spec('Atari40M')

        # Change the index to select a different game.
        task = benchmark.tasks[3]

        # Run training
        seed = 0  # Use a seed of zero (you may want to randomize the seed!)
        env = get_env(task, seed)
        env.reset()

        optimizer_spec = OptimizerSpec(constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS))

        dqn_learning(
            env=env,
            q_func=DQN,
            runname=name,
            frame_filter=frame_filter,
            optimizer_spec=optimizer_spec,
            exploration=LinearSchedule(1000000, 0.1),
            stopping_criterion=stopping_criterion2(num_timesteps),
            replay_buffer_size=REPLAY_BUFFER_SIZE,
            batch_size=BATCH_SIZE,
            gamma=GAMMA,
            learning_starts=LEARNING_STARTS,
            learning_freq=LEARNING_FREQ,
            frame_history_len=FRAME_HISTORY_LEN,
            target_update_freq=TARGET_UPDATE_FREQ
        )
Exemplo n.º 33
0
                episode_count += 1

def get_env(task):
    env_id = task.env_id
    env = gym.make(env_id)
    env = wrap_deepmind(env)
    return env

gamma = 0.99 # discount rate for advantage estimation and reward discounting
s_size = 7056 # Observations are greyscale frames of 84 * 84 * 1
load_model = False
N = 20
k = 1.
model_path = './qrdqn'
 # Get Atari games.
benchmark = gym.benchmark_spec('Atari40M')
# Change the index to select a different game.
task = benchmark.tasks[3]

tf.reset_default_graph()

if not os.path.exists(model_path):
    os.makedirs(model_path)
    
env = get_env(task)
a_size = env.action_space.n

batch_size = 10
global_episodes = tf.Variable(0,dtype=tf.int32,name='global_episodes',trainable=False)
trainer = tf.train.AdamOptimizer(learning_rate=0.00015)
master_network = Q_Network(s_size,a_size,'global',None) # Generate global network
Exemplo n.º 34
0
import gym
benchmark = gym.benchmark_spec('Atari40M')
task = benchmark.tasks[3]
env = gym.make(task.env_id)
# env = gym.make('Pong-ram-v0')
for i_episode in range(20):
    observation = env.reset()
    for t in range(100):
        env.render()
        # print(observation)
        # action = env.action_space.sample()
        action = 2
        observation, reward, done, info = env.step(action)
        print action
        if done:
            print("Episode finished after {} timesteps".format(t + 1))
            break
Exemplo n.º 35
0
Arquivo: api.py Projeto: arboo/gym
def upload(training_dir, algorithm_id=None, writeup=None, tags=None, benchmark_id=None, api_key=None, ignore_open_monitors=False):
    """Upload the results of training (as automatically recorded by your
    env's monitor) to OpenAI Gym.

    Args:
        training_dir (Optional[str]): A directory containing the results of a training run.
        algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id). If the id doesn't match an existing server id it will create a new algorithm using algorithm_id as the name
        benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release.
        writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation.
        tags (Optional[dict]): A dictionary of key/values to store with the benchmark run (ignored for nonbenchmark evaluations). Must be jsonable.
        api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY).
    """

    if benchmark_id:
        # We're uploading a benchmark run.

        directories = []
        env_ids = []
        for name, _, files in os.walk(training_dir):
            manifests = monitoring.detect_training_manifests(name, files=files)
            if manifests:
                env_info = monitoring.load_env_info_from_manifests(manifests, training_dir)
                env_ids.append(env_info['env_id'])
                directories.append(name)

        # Validate against benchmark spec
        try:
            spec = benchmark_spec(benchmark_id)
        except error.UnregisteredBenchmark as e:
            raise error.Error("Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?".format(benchmark_id))

        # TODO: verify that the number of trials matches
        spec_env_ids = [task.env_id for task in spec.tasks for _ in range(task.trials)]

        if not env_ids:
            raise error.Error("Could not find any evaluations in {}".format(training_dir))

        # This could be more stringent about mixing evaluations
        if sorted(env_ids) != sorted(spec_env_ids):
            logger.info("WARNING: Evaluations do not match spec for benchmark {}. In {}, we found evaluations for {}, expected {}".format(benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids)))

        benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=json.dumps(tags))
        benchmark_run_id = benchmark_run.id

        # Actually do the uploads.
        for training_dir in directories:
            # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
            _upload(training_dir, None, writeup, benchmark_run_id, api_key, ignore_open_monitors)

        logger.info("""
****************************************************
You successfully uploaded your benchmark on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
        """.rstrip(), benchmark_id, benchmark_run.web_url())

        return benchmark_run_id
    else:
        if tags is not None:
             logger.warn("Tags will NOT be uploaded for this submission.")
        # Single evalution upload
        benchmark_run_id = None
        evaluation = _upload(training_dir, algorithm_id, writeup, benchmark_run_id, api_key, ignore_open_monitors)

        logger.info("""
****************************************************
You successfully uploaded your evaluation on %s to
OpenAI Gym! You can find it at:

    %s

****************************************************
        """.rstrip(), evaluation.env, evaluation.web_url())

        return None