Exemplos de make_agent em Python, exemplos de agents.make_agent em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: main.py Projeto: VAndreasV/khalan

def run_simulation(conf):
    agent_1 = a.make_agent(pid.P1, conf)

    agent_2 = a.make_agent(pid.P2, conf)

    total_games = args.g

    for i in range(total_games):
        simulate(agent_1, agent_2)
    prefix = conf.get_prefix()
    p.plot_simulation_results(agent_1, agent_2, prefix)
    p.plot_scores(agent_1, agent_2, prefix)
    conf.log()
    print('Done')

Exemplo n.º 2

0

Exibir arquivo

Arquivo: fullsearch.py Projeto: wandererzzx/openai-rl

def exec_second_pass(config):
    start_time = time.time()

    config['result_dir'] = config['result_dir_prefix'] + '/lr-' + str(
        config['lr'])

    try:
        # We create the agent
        env = gym.make(config['env_name'])
        agent = make_agent(config, env)

        # We train the agent
        agent.train(save_every=-1)
        stats = get_stats(config['result_dir'], ["score"])
        mean_score = np.mean(stats['score'])
        stddev_score = np.sqrt(np.var(stats['score']))
        result = {
            'lr': config['lr'],
            'mean_score': mean_score,
            'stddev_score': stddev_score
        }

        seconds = int(round(time.time() - start_time))
        print("Run with lr: {} | {}".format(config['lr'], time.ctime()))
        print("%d seconds." % seconds)
    except:
        result = {
            'lr': config['lr'],
            'mean_score': 0,
            'stddev_score': 0,
            'error': str(sys.exc_info()[0]),
            'error_message': str(sys.exc_info()[1])
        }

    return result

Exemplo n.º 3

0

Exibir arquivo

def eval_proc(file_name):
    print(file_name)
    f = open(os.path.join('./log_more', file_name), 'w+')
    types = ['RANDOM', 'RHCP', 'CDQN', 'MCT']
    # for role_id in [2, 3, 1]:
    #     for ta in types:
    #         agent = make_agent(ta, role_id)
    #         for i in range(1):
    #             env = make_env('MCT')
    #             st = StatCounter()
    #             for j in tqdm(range(100)):
    #                 winning_rate = eval_episode(env, agent)
    #                 st.feed(winning_rate)
    #             f.write('%s with role id %d against %s, winning rate: %f\n' % (ta, role_id, 'MCT', st.average))

    for role_id in [2, 3, 1]:
        agent = make_agent('MCT', role_id)
        for i in range(1):
            for te in types:
                env = make_env(te)
                st = StatCounter()
                for j in tqdm(range(100)):
                    winning_rate = eval_episode(env, agent)
                    st.feed(winning_rate)
                f.write('%s with role id %d against %s, winning rate: %f\n' % ('MCT', role_id, te, st.average))
    f.close()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: experiments.py Projeto: zouzhongy/doudizhu-C

def onlytest():
    # env = make_env('RHCP')
    #env = make_env('RANDOM')
    #env = make_env('MCT')
    # env = make_env('CDQN')

    env = make_env('CDQN')
    agent = make_agent('RANDOM', 1)
    eval_episode(env, agent)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: main.py Projeto: leongchern/RLAgents

def main(_):
    config = flags.FLAGS.__flags.copy()
    # fixed_params must be a string to be passed in the shell, let's use JSON
    config["fixed_params"] = json.loads(config["fixed_params"])

    if config['fullsearch']:
        print('Hyperparameter search not implemented yet')
    else:
        agent = make_agent(config)

        if config['infer']:
            # Some code for inference ...
            agent.infer()
        elif config['test']:
            agent.test()
        else:
            # Some code for training ...
            agent.train()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: experiments.py Projeto: zouzhongy/doudizhu-C

def eval_proc(file_name):
    print(file_name)
    f = open(os.path.join('./log') + file_name, 'w+')
    for te in types:
        for ta in types:
            for role_id in [2, 3, 1]:
                agent = make_agent(ta, role_id)
                for i in range(1):
                    env = make_env(te)
                    st = StatCounter()
                    with get_tqdm(total=100) as pbar:
                        for j in range(100):
                            winning_rate = eval_episode(env, agent)
                            st.feed(winning_rate)
                            pbar.update()
                    f.write(
                        '%s with role id %d against %s, winning rate: %f\n' %
                        (ta, role_id, te, st.average))
    f.close()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: hyperband.py Projeto: strategist922/openai-rl

def run_params(nb_epochs, params, main_config):
    config = copy.deepcopy(main_config)
    config.update(params)
    config['result_dir'] = config['result_dir_prefix'] + '/' + config[
        'env_name'] + '/' + config['agent_name'] + '/run-' + str(
            config['id']).zfill(3)
    config['max_iter'] = int(nb_epochs) * config['games_per_epoch']

    # If we are reusing a configuration, we remove its folder before next training
    if os.path.exists(config['result_dir']):
        shutil.rmtree(config['result_dir'])

    try:
        # We create the agent
        env = gym.make(config['env_name'])
        agent = make_agent(config, env)

        # We train the agent
        agent.train(save_every=-1)
        agent.save()
        mean_score, stddev_score = get_score_stat(config['result_dir'])
        result = {
            'loss': -mean_score,
            'mean_score': mean_score,
            'stddev_score': stddev_score
        }
    except:
        result = {
            'loss': 0,
            'mean_score': 0,
            'stddev_score': 0,
            'error': str(sys.exc_info()[0]),
            'error_message': str(sys.exc_info()[1])
        }

    # If we are training for less than 9 epochs, we remove the folder
    if nb_epochs < 9 and os.path.exists(config['result_dir']):
        shutil.rmtree(config['result_dir'])

    return result

Exemplo n.º 8

0

Exibir arquivo

def test_params(counter, config, params):
    start_time = time.time()

    run_id = str(counter).zfill(4)
    config['result_dir'] = config['result_dir_prefix'] + '/run-' + run_id

    try:
        # We create the env and agent
        env = gym.make(config['env_name'])
        env.seed(config['random_seed'])
        agent = make_agent(config, env)

        # We train the agent
        agent.train(save_every=-1)
        stats = get_stats(config['result_dir'], ["score"])
        print(stats)
        mean_score = np.mean(stats['score'])
        stddev_score = np.sqrt(np.var(stats['score']))
        result = {
            'run_id': run_id
            , 'params': params
            , 'mean_score': mean_score
            , 'stddev_score': stddev_score
        }

        seconds = int( round( time.time() - start_time ))
        print("Run: {} | {}, mean_score {}".format(counter, time.ctime(), mean_score))
        print("%d seconds." % seconds )
    except Exception as inst:
        result = {
            'params': params
            , 'mean_score': 0
            , 'stddev_score': 0
            , 'error': str(sys.exc_info()[0])
            , 'error_message': str(sys.exc_info()[1])
        }


    return result

Exemplo n.º 9

0

Exibir arquivo

    def test_mcagent_act(self):
        config = {
            'lr': 1 # unused
            , 'agent_name': 'TabularMCAgent'
            , 'env_name': 'CartPole-v0'
            , 'random_seed': 0
            , 'result_dir': dir + '/results'
            , 'discount': 1.
            # 'debug': True
        }
        np.random.seed(0)
        config.update(get_agent_class(config).get_random_config())
        config['discount'] = 1.

        env = gym.make(config['env_name'])
        env.seed(0)

        agent = make_agent(config, env)
        act, state_id = agent.act(env.reset())

        self.assertEqual(act, 1)
        self.assertEqual(state_id, 144)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: fullsearch.py Projeto: wandererzzx/openai-rl

def exec_first_pass(counter, config, params):
    start_time = time.time()

    config['result_dir'] = config['result_dir_prefix'] + '/run-' + str(
        counter).zfill(3)

    try:
        # We create the agent
        env = gym.make(config['env_name'])
        agent = make_agent(config, env)

        # We train the agent
        agent.train(save_every=-1)
        mean_score, stddev_score = get_stats(config['result_dir'])
        result = {
            'params': params,
            'mean_score': mean_score,
            'stddev_score': stddev_score
        }

        seconds = int(round(time.time() - start_time))
        print("Run: {} | {}, mean_score {}".format(counter, time.ctime(),
                                                   mean_score))
        print("%d seconds." % seconds)
    except:
        result = {
            'params': params,
            'mean_score': 0,
            'stddev_score': 0,
            'error': str(sys.exc_info()[0]),
            'error_message': str(sys.exc_info()[1])
        }

    if os.path.exists(config['result_dir']):
        shutil.rmtree(config['result_dir'])

    return result

Exemplo n.º 11

0

Exibir arquivo

Arquivo: plot_qs.py Projeto: wandererzzx/openai-rl

def main(_):
    config = flags.FLAGS.__flags.copy()

    env = gym.make(config['env_name'])
    agent = make_agent(config, env)

    qs = agent.sess.run(agent.Qs)

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    x = np.linspace(0, len(qs) - 1, len(qs))
    y = np.linspace(0, len(qs[0]) - 1, len(qs[0]))
    xpos, ypos = np.meshgrid(x, y, indexing='ij')
    xpos = xpos.flatten('F')
    ypos = ypos.flatten('F')
    num_elements = len(qs) * len(qs[0])
    zpos = np.zeros(num_elements)
    dx = np.ones(num_elements)
    dy = np.ones(num_elements)
    dz = np.abs(np.array(qs).flatten('F'))

    ax.bar3d(xpos, ypos, zpos, dx, dy, dz, color='#00ceaa')
    plt.savefig('test.png')

Exemplo n.º 12

0

Exibir arquivo

    def test_mcagent_learn_from_episode(self):
        config = {
            'lr': 1 # unused
            , 'agent_name': 'TabularMCAgent'
            , 'env_name': 'CartPole-v0'
            , 'random_seed': 0
            , 'result_dir': dir + '/results'
            , 'discount': 1.
            # 'debug': True
        }
        np.random.seed(0)
        config.update(get_agent_class(config).get_random_config())
        config['discount'] = 1.

        env = gym.make(config['env_name'])
        env.seed(0)

        agent = make_agent(config, env)
        agent.learn_from_episode(env)

        qs = agent.sess.run(agent.Qs)
        # for i,q in enumerate(qs):
        #     print(i,q)
        self.assertEqual(np.sum(np.isclose(qs[126], [ 4.49999952,  1.99999976])) == 2, True)

Exemplo n.º 13

0

Exibir arquivo

def main(_):
    config = flags.FLAGS.__flags.copy()
    config["fixed_params"] = json.loads(config["fixed_params"])

    # if os.path.isfile(config['result_dir'] + '/config.json'):
    #     print("Overriding shell configuration with the one found in " + config['result_dir'])
    #     with open(config['result_dir'] + '/config.json', 'r') as f:
    #         config = json.loads(f.read())

    if config['hyperband']:
        print('Starting hyperband search')

        config['result_dir_prefix'] = dir + '/results/hyperband/' + str(
            int(time.time()))

        get_params = get_agent_class(config).get_random_config
        hb = Hyperband(get_params, run_params)
        results = hb.run(config, skip_last=True, dry_run=config['dry_run'])

        if not os.path.exists(config['result_dir_prefix']):
            os.makedirs(config['result_dir_prefix'])
        with open(config['result_dir_prefix'] + '/hb_results.json', 'w') as f:
            json.dump(results, f)

    elif config['fullsearch']:
        print('*** Starting full search')
        config['result_dir_prefix'] = dir + '/results/fullsearch/' + str(
            int(time.time())) + '-' + config['agent_name']
        os.makedirs(config['result_dir_prefix'])

        print('*** Starting first pass: full random search')
        summary = fullsearch.first_pass(config)
        with open(config['result_dir_prefix'] + '/fullsearch_results1.json',
                  'w') as f:
            json.dump(summary, f)

        print('*** Starting second pass: Learning rate search')
        best_agent_config = summary['results'][0]['params']
        summary = fullsearch.second_pass(config, best_agent_config)
        with open(config['result_dir_prefix'] + '/fullsearch_results2.json',
                  'w') as f:
            json.dump(summary, f)

        print('*** Starting third pass: Hyperband search with best lr')
        best_lr = summary['results'][0]['lr']
        summary = fullsearch.third_pass(config, best_lr)
        with open(config['result_dir_prefix'] + '/fullsearch_results3.json',
                  'w') as f:
            json.dump(summary, f)
    elif config['randomsearch']:
        print('*** Starting random search')
        config['result_dir_prefix'] = dir + '/results/randomsearch/' + str(
            int(time.time())) + '-' + config['agent_name']
        os.makedirs(config['result_dir_prefix'])

        summary = randomsearch.search(config)
        with open(config['result_dir_prefix'] + '/fullsearch_results1.json',
                  'w') as f:
            json.dump(summary, f)
    else:
        env = gym.make(config['env_name'])
        agent = make_agent(config, env)

        if config['play']:
            for i in range(config['play_nb']):
                agent.play(env)
        else:
            agent.train()
            agent.save()