示例#1
0
def simulateGame(player, opponent):
    # Returns fitness delta
    game = ConnectFour()
    illegal_moves = 0
    random_moves_left = 3
    while not game.isFinished():

        # TODO track stats?

        if game.isOurTurn():
            try:
                pickAndMakeMove(game, player)
            except IndexError:
                illegal_moves += 1
                if illegal_moves >= NUMBER_ILLEGAL_MOVES_ALLOWED:
                    # Penalise player
                    return -NUMBER_TO_SAMPLE
                else:
                    continue

            if game.isFinished():
                break

            if random_moves_left > 0:
                pickAndMakeMove(game, agents.RandomAgent())
                random_moves_left -= 1
            else:
                try:
                    pickAndMakeMove(game, opponent)
                except IndexError:
                    pickAndMakeMove(game, agents.RandomAgent())
        else: # Not our turn
            if random_moves_left > 0:
                pickAndMakeMove(game, agents.RandomAgent())
                random_moves_left -= 1
            else:
                try:
                    pickAndMakeMove(game, opponent)
                except IndexError:
                    pickAndMakeMove(game, agents.RandomAgent())

            if game.isFinished():
                break

            try:
                pickAndMakeMove(game, player)
            except IndexError:
                illegal_moves += 1
                if illegal_moves >= NUMBER_ILLEGAL_MOVES_ALLOWED:
                    # Penalise player
                    return -NUMBER_TO_SAMPLE
                else:
                    continue

    # Game is finished (or illegal move made)

    # TODO debug prints, or stats

    return game.score()
示例#2
0
    def __init__(self, env, feature_mapper):

        # Same mapper
        self.feature_mapper = feature_mapper

        # Play one episode to generate data for scaler
        random_agent = agents.RandomAgent(env.n_stocks)
        X = []

        done = False
        state = env.reset()
        X.append(self.feature_mapper(state))

        while not done:
            action = random_agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            state = next_state.copy()
            X.append(self.feature_mapper(state))

        # Create and fit scaler
        self.scaler = StandardScaler()
        self.scaler.fit(X)

        # Record metadata
        self.size = len(X[0])
示例#3
0
文件: main.py 项目: a100q100/marioai
def main():
    agent = agents.RandomAgent()
    task = marioai.Task()
    exp = marioai.Experiment(task, agent)

    exp.max_fps = 24
    task.env.level_type = 0
    exp.doEpisodes()
示例#4
0
 def __init__(self, env, monitor='output/', seed=None):
     self.env = env
     self.agents = {'universe': agents.A3C(env, monitor+'universe/',
                                           CHECKPOINTS+'/universe/'+env+'/', 1),
                    'tensorpack': agents.TPAgent(env, monitor+'tensorpack/',
                                           CHECKPOINTS+'/tensorpack/'+env+'/'+env, 1),
                    'random': agents.RandomAgent()}
     self.seed = seed
     self.best = ''
示例#5
0
def next_turn(action):
    agent = agents.RandomAgent()
    if agent.make_move(1, action) is None:
        return (action, 0, True)
    else:
        move = agent.make_move(-1, action)
        if move is None:
            return (action, 1, True)
        else:
            return (move, 0, False)
def get_agent(args, env):
    """Get agent by name"""
    if args.agent == 'random':
        agent = agents.RandomAgent(env.action_space.n)
    elif args.agent == 'mfec':
        agent = agents.MFECAgent(env.action_space.n,
                                 84*84, args.logdir,
                                 hash_bits=args.hash_bits,
                                 projection_size=args.projection_size,
                                 epsilon_steps=args.epsilon_steps)
    else:
        raise ValueError('unknown agent: {}'.format(args.agent))
    return agent
示例#7
0
文件: main.py 项目: cjqian/flappy
def generate_training_data(episodes):
    env = gym.make('CartPole-v0')
    env._max_episode_steps = game_lib.TIMESTEPS

    agent = agents.RandomAgent(env)
    start = time.time()
    game_lib.play(env,
                  agent,
                  episodes=episodes,
                  score_threshold=80,
                  render=False,
                  save_training_data=True)

    print('Elapsed time: {t} seconds'.format(t=round(time.time() - start, 4)))

    env.close()
示例#8
0
def main():
    env = gym.make('gym_mmab:mmab-v0', n_players=3, n_arms=5)
    # env = gym.make('gym_mmab:mmab-v0') # If you leave arguments empty, default: n_players=3, n_arms=10


    agent0_candidates = [agents.Agent(0, env.n_arms), agents.RandomAgent(0, env.n_arms), agents.QAgent(0, env.n_arms)]
    saved_obs_history = []
    for agent0 in agent0_candidates:
        print("====================")
        print(f"Testing with {type(agent0).__name__}")
        print("====================")
        obs_history = test_selectedagent(env, agent0)
        saved_obs_history.append(obs_history)
        print()

    print("====================")
    print("Comparing cumulative rewards in last 100 rounds")
    for n, agent0 in enumerate(agent0_candidates):
        tot_rewards = np.array(saved_obs_history[n][-100:]).sum(axis=0)[0]
        print(f"{type(agent0).__name__:>14}: {tot_rewards}")
    print()
示例#9
0
def test_agents():
  """Tests for the stock agents."""
  result = True
  game = ttt.TicTacToe(3, 3)
  random = ag.RandomAgent('Random', game)
  dumb = ag.DumbAgent('Dumb', game)
  optimal = ag.MinimaxAgent('Minimax', game)
  # Smoke test for the random agent.
  game.take_action(random.select_move())
  game.undo_action()
  result = result and expect_equal(dumb.select_move(), 0, 'dumb first move')
  game.take_action(0)
  result = result and expect_equal(dumb.select_move(), 1, 'dumb second move')
  game.take_action(4)
  game.take_action(3)
  result = result and expect_equal(optimal.select_move(), 6, 'minimax move')
  result = result and expect_equal(optimal.optimal_moves(), [6],
                                   'minimax optimal moves')
  game.reset()
  # result = result and expect_equal(optimal.optimal_moves(), range(9), 'minimax all moves optimal')
  test_result(result, 'Agents Test')
示例#10
0
def main():
    game = ttt.TicTacToe(8, 4)
    random = ag.RandomAgent('Random', game)
    config = rl.Config(
        training_epochs=2,
        games_per_epoch=10,
        rollouts_per_move=20,
        rollout_depth=4,
        rollout_policy=functools.partial(po.alpha_zero_mcts_policy,
                                         c_puct=10.0),
        play_policy=functools.partial(po.alpha_zero_play_policy, tau=1.5),
        inference_policy=po.greedy_prior_policy,
        opponent_rollout_policy=None,
        opponent_play_policy=None,
        policy_target=functools.partial(po.alpha_zero_visit_counts_to_target,
                                        action_space=game.action_space(),
                                        tau=1.0),
        inference_rollouts_per_move=40,
        inference_rollout_depth=4)
    model = mo.KerasModel(game, [128], [64, 32], [16, 4], data_passes=100)
    agent = rl.RLAgent('RL Agent', game, model, config, [random], 100)
    agent.train(print_progress=True)
    gl.play_match(g=game, agent_a=agent, agent_b=random, num_games=4)
    gl.interactive_play(game, agent)
示例#11
0
                    type=str,
                    default=None,
                    help="weights files, only valid for --agent=neural")
parser.add_argument(
    '--lite-weights',
    type=str,
    default=None,
    help="tf lite weights files, must be set for --agent=neural_lite")
parser.add_argument('--trials',
                    type=int,
                    default=10,
                    help='num trials to run; new agent per trial')
opts = parser.parse_args()

evaluator = cartpole_fitness.CartPoleFitness(render=opts.env_render)

print("trial\ttotal_reward")
for trial_idx in range(opts.trials):
    if opts.agent == 'random':
        agent = agents.RandomAgent()
    elif opts.agent == 'neural':
        agent = agents.NeuralAgent()
        if opts.weights is not None:
            agent.set_weights_of_model(np.load(opts.weights))
    elif opts.agent == 'neural_lite':
        agent = agents.NeuralLiteAgent(tflite_file=opts.lite_weights)
    else:
        raise Exception("unexpected agent type [%s]" % opts.agent)
    print("%d\t%d" % (trial_idx, evaluator.fitness(agent)))
    sys.stdout.flush()
示例#12
0
import pickle
import matplotlib.pyplot as plt
test_p = pickle.load(open("agentdata/X1QLAGENT_GAMES_100007-19-46-16.p", "rb"))
from spades import Spades
from spades import run_x_games_and_pickle
import spades
import agents
agent_25k = pickle.load(
    open("agentdata/X2QLAGENT_GAMES_250007-20-50-0.p", "rb"))
agent_10k = pickle.load(open("agentdata/QLAGENT_GAMES_100007-21-1-43.p", "rb"))

if __name__ == "__main__":
    ql = agents.QLearningAgent("test", epsilon=0)
    agent_10k.epsilon = 0
    players = [agent_10k, agents.RandomAgent(2)]
    run_x_games_and_pickle(players, 2000)
示例#13
0
import agents
import pong_env
import pygame
import matplotlib.pyplot as plt
from datetime import datetime

#get datetime data for file name
now = datetime.now()

player_random = agents.RandomAgent(3)
player_pg = agents.PolicyGradientAgent(5, 3)

'''
#player pg uses saved model
model = "pg_model_1_3"
player_pg.from_load_model(model)
print("Model loaded.")
'''

num_play = 2
clock = pygame.time.Clock()
scores = []

break_learning = False
display = True
for i in range(num_play):
    done = False
    score = 0
    game = pong_env.Pong()
    while not done:
        if display:
示例#14
0
文件: retecs.py 项目: Jcaner/retecs
    def train(self, no_scenarios, print_log, plot_graphs, save_graphs, collect_comparison=False):
        stats = {
            'scenarios': [],
            'rewards': [],
            'durations': [],
            'detected': [],
            'missed': [],
            'ttf': [],
            'napfd': [],
            'recall': [],
            'avg_precision': [],
            'result': [],
            'step': [],
            'env': self.scenario_provider.name,
            'agent': self.agent.name,
            'action_size': self.agent.action_size,
            'history_length': self.agent.histlen,
            'rewardfun': self.reward_function.__name__,
            'sched_time': self.scenario_provider.avail_time_ratio,
            'hidden_size': 'x'.join(str(x) for x in self.agent.hidden_size) if hasattr(self.agent, 'hidden_size') else 0
        }

        if collect_comparison:
            cmp_agents = {
                'heur_sort': agents.HeuristicSortAgent(self.agent.histlen),
                'heur_weight': agents.HeuristicWeightAgent(self.agent.histlen),
                'heur_random': agents.RandomAgent(self.agent.histlen)
            }

            stats['comparison'] = {}

            for key in cmp_agents.keys():
                stats['comparison'][key] = {
                    'detected': [],
                    'missed': [],
                    'ttf': [],
                    'napfd': [],
                    'recall': [],
                    'avg_precision': [],
                    'durations': []
                }

        sum_actions = 0
        sum_scenarios = 0
        sum_detected = 0
        sum_missed = 0
        sum_reward = 0

        for (i, sc) in enumerate(self.scenario_provider, start=1):
            if i > no_scenarios:
                break

            start = time.time()

            if print_log:
                print('ep %d:\tscenario %s\t' % (sum_scenarios + 1, sc.name), end='')

            (result, reward) = self.process_scenario(sc)

            end = time.time()

            # Statistics
            sum_detected += result[0]
            sum_missed += result[1]
            sum_reward += np.mean(reward)
            sum_actions += 1
            sum_scenarios += 1
            duration = end - start

            stats['scenarios'].append(sc.name)
            stats['rewards'].append(np.mean(reward))
            stats['durations'].append(duration)
            stats['detected'].append(result[0])
            stats['missed'].append(result[1])
            stats['ttf'].append(result[2])
            stats['napfd'].append(result[3])
            stats['recall'].append(result[4])
            stats['avg_precision'].append(result[5])
            stats['result'].append(result)
            stats['step'].append(sum_scenarios)

            if print_log:
                print(' finished, reward: %.2f,\trunning mean: %.4f,\tduration: %.1f,\tresult: %s' %
                      (np.mean(reward), sum_reward / sum_scenarios, duration, result))

            if collect_comparison:
                for key in stats['comparison'].keys():
                    start = time.time()
                    cmp_res = process_scenario(cmp_agents[key], sc, preprocess_discrete)
                    end = time.time()
                    stats['comparison'][key]['detected'].append(cmp_res[0])
                    stats['comparison'][key]['missed'].append(cmp_res[1])
                    stats['comparison'][key]['ttf'].append(cmp_res[2])
                    stats['comparison'][key]['napfd'].append(cmp_res[3])
                    stats['comparison'][key]['recall'].append(cmp_res[4])
                    stats['comparison'][key]['avg_precision'].append(cmp_res[5])
                    stats['comparison'][key]['durations'].append(end - start)

            # Data Dumping
            if self.dump_interval > 0 and sum_scenarios % self.dump_interval == 0:
                pickle.dump(stats, open(self.stats_file + '.p', 'wb'))

            if self.validation_interval > 0 and (sum_scenarios == 1 or sum_scenarios % self.validation_interval == 0):
                if print_log:
                    print('ep %d:\tRun test... ' % sum_scenarios, end='')

                self.run_validation(sum_scenarios)
                pickle.dump(self.validation_res, open(self.val_file + '.p', 'wb'))

                if print_log:
                    print('done')

        if self.dump_interval > 0:
            self.agent.save(self.agent_file)
            pickle.dump(stats, open(self.stats_file + '.p', 'wb'))

        if plot_graphs:
            plot_stats.plot_stats_single_figure(self.file_prefix, self.stats_file + '.p', self.val_file + '.p', 1,
                                                plot_graphs=plot_graphs, save_graphs=save_graphs)

        if save_graphs:
            plot_stats.plot_stats_separate_figures(self.file_prefix, self.stats_file + '.p', self.val_file + '.p', 1,
                                                   plot_graphs=False, save_graphs=save_graphs)

        return np.mean(stats['napfd'])
示例#15
0
    plt.ylabel("Cumulative Reward")
    plt.show()


def plotQ(Q):
    states = [[0, 0], [0, 1], [1, 0], [1, 1]]
    for state in states:
        for a in [0, 1]:
            print("Q[{},{}]={}".format(state, a, Q[env.asint(state), a]))


# Number of iterations
n_iter = 1000

# environment specs
env = EvidenceEnv(n=2, p=0.75)

agent = agents.RandomAgent(env)
runAgent()

# define agent
agent = agents.TabularQAgent(env)
plotQ(agent.Q)
runAgent()
plotQ(agent.Q)

actualQ = agent.Q
agent = agents.NeuralAgent(env, actualQ)
plotQ(agent.Q)
runAgent()
plotQ(agent.Q)
示例#16
0
import pickle
import matplotlib.pyplot as plt
test_p = pickle.load(open("agentdata/QLAGENT_GAMES_100007-18-2-16.p", "rb"))
from spades import Spades
from spades import run_x_games_and_pickle
import spades
import agents

if __name__ == "__main__":
    players = [agents.QLearningAgent(1), agents.RandomAgent(4)]
    run_x_games_and_pickle(players, 100000)


示例#17
0
def main():
    game = ck.Checkers(8, 3)
    random = ag.RandomAgent('Random', game)
    print game.action_space()
    gl.interactive_play(game, random)
示例#18
0
    def set_agents(self, model_path_a, model_path_b, model_path_m):

        if model_path_a == 'human' or model_path_b == 'human':
            game_mode = 'pygame'
        else:
            game_mode = 'text'

        self.env = game.GameState(game_mode)

        if model_path_a == 'random':
            print('load player model:', model_path_a)
            self.player = agents.RandomAgent(BOARD_SIZE)
        elif model_path_a == 'puct':
            print('load player model:', model_path_a)
            self.player = agents.PUCTAgent(BOARD_SIZE, N_MCTS_PLAYER)
        elif model_path_a == 'uct':
            print('load player model:', model_path_a)
            self.player = agents.UCTAgent(BOARD_SIZE, N_MCTS_PLAYER)
        elif model_path_a == 'human':
            print('load player model:', model_path_a)
            self.player = agents.HumanAgent(BOARD_SIZE, self.env)
        elif model_path_a == 'web':
            print('load player model:', model_path_a)
            self.player = agents.WebAgent(BOARD_SIZE)
        else:
            print('load player model:', model_path_a)
            self.player = agents.ZeroAgent(BOARD_SIZE,
                                           N_MCTS_PLAYER,
                                           IN_PLANES_PLAYER,
                                           noise=False)
            self.player.model = model.PVNet(N_BLOCKS_PLAYER,
                                            IN_PLANES_PLAYER,
                                            OUT_PLANES_PLAYER,
                                            BOARD_SIZE).to(device)
            state_a = self.player.model.state_dict()
            my_state_a = torch.load(
                model_path_a, map_location='cuda:0' if use_cuda else 'cpu')
            for k, v in my_state_a.items():
                if k in state_a:
                    state_a[k] = v
            self.player.model.load_state_dict(state_a)

        if model_path_b == 'random':
            print('load enemy model:', model_path_b)
            self.enemy = agents.RandomAgent(BOARD_SIZE)
        elif model_path_b == 'puct':
            print('load enemy model:', model_path_b)
            self.enemy = agents.PUCTAgent(BOARD_SIZE, N_MCTS_ENEMY)
        elif model_path_b == 'uct':
            print('load enemy model:', model_path_b)
            self.enemy = agents.UCTAgent(BOARD_SIZE, N_MCTS_ENEMY)
        elif model_path_b == 'human':
            print('load enemy model:', model_path_b)
            self.enemy = agents.HumanAgent(BOARD_SIZE, self.env)
        elif model_path_b == 'web':
            print('load enemy model:', model_path_b)
            self.enemy = agents.WebAgent(BOARD_SIZE)
        else:
            print('load enemy model:', model_path_b)
            self.enemy = agents.ZeroAgent(BOARD_SIZE,
                                          N_MCTS_ENEMY,
                                          IN_PLANES_ENEMY,
                                          noise=False)
            self.enemy.model = model.PVNet(N_BLOCKS_ENEMY,
                                           IN_PLANES_ENEMY,
                                           OUT_PLANES_ENEMY,
                                           BOARD_SIZE).to(device)
            state_b = self.enemy.model.state_dict()
            my_state_b = torch.load(
                model_path_b, map_location='cuda:0' if use_cuda else 'cpu')
            for k, v in my_state_b.items():
                if k in state_b:
                    state_b[k] = v
            self.enemy.model.load_state_dict(state_b)

        # monitor agent
        self.monitor = agents.ZeroAgent(BOARD_SIZE,
                                        N_MCTS_MONITOR,
                                        IN_PLANES_ENEMY,
                                        noise=False)
        self.monitor.model = model.PVNet(N_BLOCKS_ENEMY,
                                         IN_PLANES_ENEMY,
                                         OUT_PLANES_ENEMY,
                                         BOARD_SIZE).to(device)
        state_b = self.monitor.model.state_dict()
        my_state_b = torch.load(
            model_path_m, map_location='cuda:0' if use_cuda else 'cpu')
        for k, v in my_state_b.items():
            if k in state_b:
                state_b[k] = v
        self.monitor.model.load_state_dict(state_b)
示例#19
0
import models
from helpers import play_game
import numpy as np

INITIAL_INVESTMENT = 20000.0

train_env = Stockmarket.StockMarket("train", INITIAL_INVESTMENT)


def identity(x):
    return (x)


feature_generator = models.FeatureGenerator(train_env, identity)

random_agent = agents.RandomAgent(train_env.n_stocks)
linear_agent = agents.LinearAgent(train_env.n_stocks,
                                  feature_generator,
                                  gamma=0.95,
                                  epsilon_decay=0.9995,
                                  epsilon_min=0.01,
                                  alpha=0.01,
                                  momentum=0.9)

print(play_game(train_env, random_agent))

# Train
print("Training [LinearAgent, Random]")
N = 50  # Should really do about 200
val = np.zeros((2, N))
for i in range(N):
示例#20
0
                                    histlen=args.histlen)

    ## If the action size is 1 , we use MLPClassifier
    ## for action size as 2, we use MLPRegressor
    elif args.agent == 'network':
        if args.reward in ('binary'):
            action_size = 1
        else:
            action_size = 2

        agent = agents.NetworkAgent(state_size=state_size,
                                    action_size=action_size,
                                    hidden_size=args.hiddennet,
                                    histlen=args.histlen)
    elif args.agent == 'heur_random':
        agent = agents.RandomAgent(histlen=args.histlen)
    elif args.agent == 'heur_sort':
        agent = agents.HeuristicSortAgent(histlen=args.histlen)
    elif args.agent == 'heur_weight':
        agent = agents.HeuristicWeightAgent(histlen=args.histlen)
    else:
        print('Unknown Agent')
        sys.exit()

    if args.scenario_provider == 'random':
        scenario_provider = scenarios.RandomScenarioProvider()
    elif args.scenario_provider == 'incremental':
        scenario_provider = scenarios.IncrementalScenarioProvider(
            episode_length=args.no_scenarios)
    elif args.scenario_provider == 'paintcontrol':
        scenario_provider = scenarios.IndustrialDatasetScenarioProvider(
def train_visual_module(img_width, img_height, pc_ensemble, hd_ensemble,
                        lab_config, level, level_boundary_min,
                        level_boundary_max):
    assert str(img_width) == lab_config[
        'width'], "DM-Lab camera width does not match the width of the visual module"
    assert str(img_height) == lab_config[
        'height'], "DM-Lab camera height does not match the height of the visual module"

    model = networks.VisualModule(img_width, img_height, pc_ensemble,
                                  hd_ensemble)

    # model = tf.keras.models.Sequential([
    #     tf.keras.layers.Dense(32, input_shape=(64,64,3))
    # ])

    # Prepare env and random agent
    observations = ['RGB', 'DEBUG.POS.ROT', 'DEBUG.POS.TRANS']
    env = deepmind_lab.Lab(level,
                           observations,
                           config=lab_config,
                           renderer='software')

    agent = agents.RandomAgent(env.action_spec(),
                               forbidden_actions=[
                                   'JUMP', 'FIRE', 'CROUCH',
                                   'LOOK_DOWN_UP_PIXELS_PER_FRAME'
                               ])

    episode_length = 100
    total_frames = episode_length * 1e6
    batch_size = 32
    epochs = 1000
    training_steps_per_epoch = total_frames // batch_size

    # Record training data

    def generate_batch():
        replay_buffer = buffers.ReplayBuffer(batch_size * episode_length)
        # pc_boundary_= (pc_ensemble.pos_max - pc_ensemble.pos_min)
        pc_boundary_scale = (pc_ensemble.pos_max - pc_ensemble.pos_min) / \
            (level_boundary_max - level_boundary_min)

        level_boundary_mean = (level_boundary_max -
                               level_boundary_min) / 2. + level_boundary_min

        while True:
            env.reset()
            # Collect observations
            for _ in range(batch_size):
                for _ in range(episode_length):
                    if not env.is_running():
                        print('Environment stopped early')
                        env.reset()
                        agent.reset()
                    obs = env.observations()
                    if not obs:
                        raise Exception('Observations empty!')

                    # Normalize observations
                    rgb = obs['RGB']
                    target_pos = obs['DEBUG.POS.TRANS'][:2]
                    target_rot = obs['DEBUG.POS.ROT'][1]
                    target_pos -= level_boundary_mean
                    target_pos *= pc_boundary_scale
                    target_rot = target_rot * ((2. * np.pi) / 360.)
                    replay_buffer.add([rgb, target_pos, target_rot])
                    action = agent.step()
                    env.step(action, num_steps=1)

            # Form batches
            # TODO make sure that the replay buffer is actually filled (no early environment stoppings, see above)
            target_pos_batch = np.zeros((batch_size, episode_length, 2))
            target_rot_batch = np.zeros((batch_size, episode_length, 1))
            rgb_batch = np.zeros(
                (batch_size, episode_length, 3, img_width, img_height))

            for i in range(batch_size):
                sampled_obs = replay_buffer.sample(episode_length)
                target_pos_batch[i, :, :] = np.array(
                    list(map(lambda x: x[1], sampled_obs)))
                target_rot_batch[i, :, 0] = np.array(
                    list(map(lambda x: x[2], sampled_obs)))
                rgb_batch[i, :, :, :, :] = np.array(
                    list(map(lambda x: x[0], sampled_obs)))

            replay_buffer.clear()

            targets = utils.encode_targets(target_pos_batch, target_rot_batch,
                                           [pc_ensemble], [hd_ensemble])

            # Compute training targets
            rgb_batch = np.swapaxes(rgb_batch, 2, 4)
            rgb_batch = np.swapaxes(rgb_batch, 2, 3)
            rgb_batch = tf.convert_to_tensor(rgb_batch)

            yield (rgb_batch), (targets[0], targets[1])

    # Prepare model training
    model.compile(optimizer=tf.optimizers.RMSprop(learning_rate=1e-5,
                                                  momentum=0.9,
                                                  clipvalue=1e-5),
                  loss={
                      'output_1': softmax_cross_entropy_logits_loss,
                      'output_2': softmax_cross_entropy_logits_loss
                  })

    # batch_generator = generate_batch(batch_size=10)
    # import ipdb; ipdb.set_trace()
    # for _ in range(epochs):
    #     for _ in range(training_steps_per_epoch):
    #         x, y = next(batch_generator)
    #         pc_pred, hd_pred = model.train_on_batch(x, y)

    # model.fit_generator(
    #     generate_batch(),
    #     epochs=epochs,
    #     steps_per_epoch=training_steps_per_epoch,
    #     verbose=1,
    # )
    model.fit(
        generate_batch(),
        epochs=epochs,
        steps_per_epoch=training_steps_per_epoch,
        verbose=1,
    )
示例#22
0
    def set_agents(self, model_path_a, model_path_b, model_path_m):

        # 플레이어 중 human이 있으면 pygame창에서 게임 실행, 아니면 텍스트만 출력
        if model_path_a == 'human' or model_path_b == 'human':
            game_mode = 'pygame'
        else:
            game_mode = 'text'

        # env파일의 gamemode 설정
        self.env = game.GameState(game_mode)

        # 플레이어의 모델 설정 (human)
        if model_path_a == 'random':
            print('load player model:', model_path_a)
            self.player = agents.RandomAgent(BOARD_SIZE)
        elif model_path_a == 'puct':
            print('load player model:', model_path_a)
            self.player = agents.PUCTAgent(BOARD_SIZE, N_MCTS_PLAYER)
        elif model_path_a == 'uct':
            print('load player model:', model_path_a)
            self.player = agents.UCTAgent(BOARD_SIZE, N_MCTS_PLAYER)
        elif model_path_a == 'human':
            print('load player model:', model_path_a)
            self.player = agents.HumanAgent(BOARD_SIZE, self.env)
        elif model_path_a == 'web':
            print('load player model:', model_path_a)
            self.player = agents.WebAgent(BOARD_SIZE)
        else:
            print('load player model:', model_path_a)
            self.player = agents.ZeroAgent(BOARD_SIZE,
                                           N_MCTS_PLAYER,
                                           IN_PLANES_PLAYER,
                                           noise=False)
            self.player.model = model.PVNet(N_BLOCKS_PLAYER, IN_PLANES_PLAYER,
                                            OUT_PLANES_PLAYER,
                                            BOARD_SIZE).to(device)
            state_a = self.player.model.state_dict()
            my_state_a = torch.load(
                model_path_a, map_location='cuda:0' if use_cuda else 'cpu')
            for k, v in my_state_a.items():
                if k in state_a:
                    state_a[k] = v
            self.player.model.load_state_dict(state_a)

        # 적 플레이어의 모델 설정 (
        if model_path_b == 'random':
            print('load enemy model:', model_path_b)
            self.enemy = agents.RandomAgent(BOARD_SIZE)
        elif model_path_b == 'puct':
            print('load enemy model:', model_path_b)
            self.enemy = agents.PUCTAgent(BOARD_SIZE, N_MCTS_ENEMY)
        elif model_path_b == 'uct':
            print('load enemy model:', model_path_b)
            self.enemy = agents.UCTAgent(BOARD_SIZE, N_MCTS_ENEMY)
        elif model_path_b == 'human':
            print('load enemy model:', model_path_b)
            self.enemy = agents.HumanAgent(BOARD_SIZE, self.env)
        elif model_path_b == 'web':
            print('load enemy model:', model_path_b)
            self.enemy = agents.WebAgent(BOARD_SIZE)
        else:  # 이미 만들어진 데이터를 사용할땐 이 부분이 실행됨
            print('load enemy model:', model_path_b)
            # 적 에이전트 설정
            self.enemy = agents.ZeroAgent(BOARD_SIZE,
                                          N_MCTS_ENEMY,
                                          IN_PLANES_ENEMY,
                                          noise=False)
            # 적 신경망 모델 설정 및 device(GPU)로 불러와 agents.ZeroAgent().model에 저장
            self.enemy.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY,
                                           OUT_PLANES_ENEMY,
                                           BOARD_SIZE).to(device)
            state_b = self.enemy.model.state_dict()  # dict형식의 신경망 파라미터의 텐서
            my_state_b = torch.load(model_path_b,
                                    map_location='cuda:0'
                                    if use_cuda else 'cpu')  # 저장한 파라미터 파일을 불러옴
            # state_b에는 키 값으로 여러 레이어의 weight, bias 등과 그에 해당하는 value들이 저장됨
            for k, v in my_state_b.items():
                if k in state_b:
                    state_b[k] = v
            self.enemy.model.load_state_dict(state_b)  # 딥러닝 모델에 파라미터 설정

        # monitor agent 위와 동일
        self.monitor = agents.ZeroAgent(BOARD_SIZE,
                                        N_MCTS_MONITOR,
                                        IN_PLANES_ENEMY,
                                        noise=False)
        self.monitor.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY,
                                         OUT_PLANES_ENEMY,
                                         BOARD_SIZE).to(device)
        state_b = self.monitor.model.state_dict()
        my_state_b = torch.load(model_path_m,
                                map_location='cuda:0' if use_cuda else 'cpu')
        for k, v in my_state_b.items():
            if k in state_b:
                state_b[k] = v
        self.monitor.model.load_state_dict(state_b)
示例#23
0
    def train(self,
              no_scenarios,
              print_log,
              plot_graphs,
              save_graphs,
              collect_comparison=True):
        # Stats is the Dictionary of this object which has various useful fields mentioned below
        stats = {
            'scenarios': [],
            'rewards': [],
            'durations': [],
            'detected': [],
            'missed': [],
            'ttf': [],
            'napfd': [],
            'recall': [],
            'avg_precision': [],
            'result': [],
            'step': [],
            'env':
            self.scenario_provider.name,
            'agent':
            self.agent.name,
            # 'action_size': self.agent.action_size,
            'history_length':
            self.agent.histlen,
            'rewardfun':
            self.reward_function.__name__,
            'sched_time':
            self.scenario_provider.avail_time_ratio,
            'hidden_size':
            'x'.join(str(x) for x in self.agent.hidden_size) if hasattr(
                self.agent, 'hidden_size') else 0
        }

        if collect_comparison:
            cmp_agents = {
                'heur_sort': agents.HeuristicSortAgent(self.agent.histlen),
                'heur_weight': agents.HeuristicWeightAgent(self.agent.histlen),
                'heur_random': agents.RandomAgent(self.agent.histlen)
            }

            stats['comparison'] = {}

            # stats['comparison']['heur_sort/heur_weight/rand'] initialized for comparison
            for key in cmp_agents.keys():
                stats['comparison'][key] = {
                    'detected': [],
                    'missed': [],
                    'ttf': [],
                    'napfd': [],
                    'recall': [],
                    'avg_precision': [],
                    'durations': []
                }

        sum_actions = 0
        sum_scenarios = 0
        sum_detected = 0
        sum_missed = 0
        sum_reward = 0

        # Enumerate forms a tuple of (count,Element)
        # write_file.write("Agent is "+str(self.agent))

        for (i, sc) in enumerate(self.scenario_provider, start=1):
            if i > no_scenarios:
                break

            start = time.time()

            if print_log:
                print('ep %d:\tscenario %s\t' % (sum_scenarios + 1, sc.name),
                      end='')

            (result, reward) = self.process_scenario(sc)

            end = time.time()

            # Statistics of the CI cycle after Prioritization and Selection of test cases from the test suite

            sum_detected += result[0]
            sum_missed += result[1]

            # In future if we want to include the priority of the test cases also, np.average() could be used, where weights can be adjusted depending
            # upon the priority of the test cases

            sum_reward += np.mean(reward)
            sum_actions += 1
            sum_scenarios += 1
            duration = end - start

            stats['scenarios'].append(sc.name)
            stats['rewards'].append(np.mean(reward))
            stats['durations'].append(duration)
            stats['detected'].append(result[0])
            stats['missed'].append(result[1])
            ## TTF is the time to failure or in simple words, it is the position at which the first test case that failed was placed by our algorithm
            stats['ttf'].append(result[2])
            stats['napfd'].append(result[3])
            stats['recall'].append(result[4])
            stats['avg_precision'].append(result[5])
            stats['result'].append(result)
            stats['step'].append(sum_scenarios)

            if print_log:
                print(
                    ' finished, reward: %.2f,\trunning mean: %.4f,\tduration: %.1f,\tresult: %s'
                    % (np.mean(reward), sum_reward / sum_scenarios, duration,
                       result))

            global total_failures_detected
            global total_failures_missed
            total_failures_detected += result[0]
            total_failures_missed += result[1]

            # Collect Comparison becomes True if we set args.comparable as True
            ## Formulates the results of the heur_sort, heur_random and heur_weight .

            if collect_comparison:
                for key in stats['comparison'].keys():
                    start = time.time()
                    cmp_res = process_scenario(cmp_agents[key], sc,
                                               preprocess_discrete)

                    end = time.time()
                    stats['comparison'][key]['detected'].append(cmp_res[0])
                    stats['comparison'][key]['missed'].append(cmp_res[1])
                    stats['comparison'][key]['ttf'].append(cmp_res[2])
                    stats['comparison'][key]['napfd'].append(cmp_res[3])
                    stats['comparison'][key]['recall'].append(cmp_res[4])
                    stats['comparison'][key]['avg_precision'].append(
                        cmp_res[5])
                    stats['comparison'][key]['durations'].append(end - start)

            # # Data Dumping

            ## The below two commented lines of code write the stats into the file after certain interval. No specific need of this, because anyways, we are writing the entire stats
            ## at the end of the program

            # if self.dump_interval > 0 and sum_scenarios % self.dump_interval == 0:
            # 	pickle.dump(stats, open(self.stats_file + '.p', 'wb'))

            if self.validation_interval > 0 and (
                    sum_scenarios == 1
                    or sum_scenarios % self.validation_interval == 0):
                if print_log:
                    print('ep %d:\tRun test... ' % sum_scenarios, end='')

                self.run_validation(sum_scenarios)

                pickle.dump(self.validation_res,
                            open(self.val_file + '.p', 'wb'))

                if print_log:
                    print('done')

        ## Dumping the Stats of all the CI Cycles into the Stats_File
        if self.dump_interval > 0:
            self.agent.save(self.agent_file)
            pickle.dump(stats, open(self.stats_file + '.p', 'wb'))

        ## Plotting Graphs
        if plot_graphs:
            plot_stats.plot_stats_single_figure(self.file_prefix,
                                                self.stats_file + '.p',
                                                self.val_file + '.p',
                                                1,
                                                plot_graphs=plot_graphs,
                                                save_graphs=save_graphs)
        ## Save the generated Graphs
        if save_graphs:
            plot_stats.plot_stats_separate_figures(self.file_prefix,
                                                   self.stats_file + '.p',
                                                   self.val_file + '.p',
                                                   1,
                                                   plot_graphs=False,
                                                   save_graphs=save_graphs)

        return np.mean(stats['napfd']), np.mean(stats['recall'])
示例#24
0
import os
import sys

from game import ConnectFour
import agents

from utilities import pickMove, pickAndMakeMove

game = ConnectFour()
if len(sys.argv) >= 2:
    print("Using opponent from {}".format(sys.argv[1]))
    with open(sys.argv[1], 'rb') as output:
        opponent = pickle.load(output)
else:
    print("Using random agent")
    opponent = agents.RandomAgent()

while not game.isFinished():
    if game.isOurTurn():
        print(game)
        possibles = game.possibleMoves()
        column = input("Which column {}? ".format(possibles))
        try:
            game.playMove(int(column))
        except Exception as e:
            print("error occurred", e)
    else:
        try:
            pickAndMakeMove(game, opponent)
        except Exception:
            print("AI chose invalid move, trying random")