-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_AI.py
638 lines (504 loc) · 28.4 KB
/
train_AI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
### This script contains the functions related to the shared AI.
### When run, this script will train the shared AI for a designated number of generations.
# import required libraries
from utilities import sample_weights, categorize_tokens, expand_inventory, expand_vector
import numpy as np
import pandas as pd
import keras
from keras import layers
from keras import regularizers
from keras import optimizers
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.models import load_model
from scipy.stats import uniform, randint, bernoulli
### This section contains the functions that run the game.
## function setup_tokens
# returns a linup of tokens shuffled to setup the game
def setup_tokens():
# shuffle the four groups of tokens
level_1 = np.array([0, 0, 1, 1, 2, 2, 3, 3])
np.random.shuffle(level_1)
level_2 = np.array([4, 4, 5, 5, 6, 6, 7, 7])
np.random.shuffle(level_2)
level_3 = np.array([8, 8, 9, 9, 10, 10, 11, 11])
np.random.shuffle(level_3)
level_4 = np.array([12, 12, 13, 13, 14, 14, 15, 15])
np.random.shuffle(level_4)
# stack groups into a single array and shuffle each group
token_line = np.concatenate((level_1, level_2, level_3, level_4), axis = 0)
# return the shuffled token lineup as a vector
return token_line
## function die_roll
# takes an integer loot_count
# returns a number of spaces to move the player
def die_roll(loot_count):
# roll the dice
roll = randint.rvs(1, 4) + randint.rvs(1, 4)
# reduce the roll by the amount of loot caried, but not below zero
roll = roll - loot_count
if roll < 0:
roll = 0
# return the modified roll
return roll
### A class to hold the current game state
class GameState:
# initiate the game state by passing the number of players
def __init__(self, players = 6):
# players as defined on initation
assert players >= 2 and players <= 6, "Must be between 2 and 6 players."
self.players = players
# shuffle the initial tokens
self.tokens = setup_tokens()
# set initial air supply to 25
self.air_supply = 25
# give each player an inventory of zero tokens (list of arrays)
self.inventory = [np.array([]) for i in range(players)]
# put each player on the sub (position -1) with downward heading (+1)
self.positions = np.tile([-1, 1], (players, 1))
# set the active player to player 0
self.active_player = 0
# set each player's score to 0
self.player_scores = np.zeros(players)
# track the game round
self.round = 1
# bound function to update the air supply
def update_air_supply(self):
# remove air from the supply equal to the active player's token count
self.air_supply -= len(self.inventory[self.active_player])
# bound function to collect a token
def collect_token(self):
# add a token to the active player's inventory
self.inventory[self.active_player] = np.append(self.inventory[self.active_player], self.tokens[self.positions[self.active_player][0]])
# remove the token from the lineup (set to -1)
self.tokens[self.positions[self.active_player][0]] = -1
# bound function to drop a token
def drop_token(self, inventory_mask):
# find the index of the selected token
index_to_drop = np.where(inventory_mask == 1)[0][0]
# place the token back in line
self.tokens[self.positions[self.active_player][0]] = self.inventory[self.active_player][index_to_drop]
# remove it from the player's inventory
self.inventory[self.active_player] = np.delete(self.inventory[self.active_player], [index_to_drop])
# bound function to start the next round
def next_round(self, noise):
# reset air supply to 25
self.air_supply = 25
# remove blanks from token row
self.tokens = np.delete(self.tokens, np.where(self.tokens == -1)[0])
# clean up tokens collected in the previous round
for i in range(self.players):
# score any tokens that players got back to the sub with
if self.positions[i, 0] == -1:
self.player_scores[i] += list(map(sum, self.inventory))[i]
# drop any tokens that did not make it back
else:
# set up a counter to track how many tokens have been added to the current stack
stacked = 0
# loop through players
for j in range(self.players):
# request drops until the player's inventory is empty
while len(self.inventory[j]) > 0:
# request which token to drop
selected_token = death_drop(categorize_tokens(self.inventory[j]), self.pass_gamestate(), noise)
# translate the mask back to an indx
selected_token = np.where(selected_token == 1)[0][0]
# add the selected tokens to the token row
# if this is the first token in a stack
if stacked == 0:
# add it to the end of the row
self.tokens = np.append(self.tokens, self.inventory[j][selected_token])
# if the stack is not full, add to it
else:
self.tokens[-1] += self.inventory[j][selected_token]
# remove the tokens from the player's inventory
self.inventory[j] = np.delete(self.inventory[j], selected_token)
# update the stack counter
stacked += 1
# if the stack is full, reset the counter to 0
if stacked == 3:
stacked = 0
# fully clear player inventories
self.inventory = [np.array([]) for i in range(self.players)]
# set the active player to the deepest player
self.active_player = np.argmax(self.positions[:, 0])
# put each player on the sub (position -1) with downward heading (+1)
self.positions = np.tile([-1, 1], (self.players, 1))
# update the round counter
self.round += 1
# bound function that summarizes the gamestate in the vector format necessary to produce predictions
def pass_gamestate(self):
Round = self.round
Tokens_Inventories = np.concatenate((expand_inventory(self.inventory, self.players)[self.active_player:, :], expand_inventory(self.inventory, self.players)[:self.active_player, :])).flatten()
Air_Supply = self.air_supply
Token_Positions = expand_vector(categorize_tokens(self.tokens))
Player_Positions = np.concatenate((self.positions[self.active_player:, :], self.positions[:self.active_player, :])).flatten()
Intermediate_Scores = np.concatenate((self.player_scores[self.active_player:], self.player_scores[:self.active_player])).flatten()
return np.hstack((Round, Tokens_Inventories, Air_Supply, Token_Positions, Player_Positions, Intermediate_Scores))
## function take_turn
# takes a GameState plus game decision models and advances it by a turn
def take_turn(game, model_turn_around, model_pick_up, model_drop_token, noise):
# reporter variables
turn_taken = False
turn_around = False
pick_up = False
drop = np.array([])
# check that the active player is not already back on the submarine
if not (game.positions[game.active_player, 0] == -1 and game.positions[game.active_player, 1] == -1):
# note that the turn was not taken
turn_taken = True
# update the air supply
game.update_air_supply()
# determine whether the active player turns around
if game.positions[game.active_player, 1] == 1 and game.positions[game.active_player, 0] != -1:
if turn_around_decision(game.pass_gamestate(), model_turn_around, noise):
# turn the player back toward the sub if True
game.positions[game.active_player, 1] = -1;
turn_around = True
# roll the dice
roll = die_roll(len(game.inventory[game.active_player]))
# move the player along the token lineup until they consume their moves or reach the end
while roll > 0:
# check to see if the player is on the last available space when moving down
if game.positions[game.active_player, 1] == 1:
if game.positions[game.active_player, 0] == max(np.setdiff1d(np.arange(len(game.tokens)), game.positions[np.arange(len(game.positions)) != game.active_player][:, 0])):
break
# check to see if the player is at the sub when moving up
else:
if game.positions[game.active_player, 0] == -1:
break
# move the player a step in the appropriate direction
game.positions[game.active_player, 0] += game.positions[game.active_player, 1]
# continue moving without consuming the roll if the token is already occupied
if game.positions[game.active_player, 0] in game.positions[np.arange(len(game.positions)) != game.active_player][:, 0]:
continue
# check to see if they have reached the sub
if game.positions[game.active_player, 0] == -1:
break
# consume one movement from the roll
roll = roll - 1
# determine whether the active player picks up or drops a token
# check to see if a token is available
if game.tokens[game.positions[game.active_player, 0]] > -1:
if pick_up_decision(game.pass_gamestate(), model_pick_up, noise):
# pick up the token if True
game.collect_token()
pick_up = True
elif len(game.inventory[game.active_player]) > 0:
# select tokens to drop
drop = drop_decision(categorize_tokens(game.inventory[game.active_player]), game.pass_gamestate(), model_drop_token, noise)
# drop a token if any indicated
if sum(drop > 0):
game.drop_token(drop)
# update the active player
if game.active_player < game.players - 1:
game.active_player += 1
else:
game.active_player = 0
# check to see if oxygen has run out
if game.air_supply <= 0 or np.all(game.positions[:, 0] == -1):
# start the next round
game.next_round(noise)
# return a boolean indicating whether the game is over alongside descriptions of player's actions
return(game.round <= 3, turn_taken, turn_around, pick_up, drop)
### This section contains functions necessary for training the AI.
## function instantiate model
# takes a tuple input shape, and optionally a dropout_rate and l2 lambda value
# returns a compiled model
def instantiate_model(input_shape, dropout_rate = 0, l2_lambda = 0):
## define model architecture
# input
input_layer = layers.Input(shape = input_shape)
# dense
first_dense_layer = layers.Dense(
units = 256,
activation = "relu",
kernel_regularizer = regularizers.l2(l2_lambda)
)(input_layer)
# dropout
first_dropout_layer = layers.Dropout(
rate = dropout_rate
)(first_dense_layer)
# dense
second_dense_layer = layers.Dense(
units = 128,
activation = "relu",
kernel_regularizer = regularizers.l2(l2_lambda)
)(first_dropout_layer)
# dropout
second_dropout_layer = layers.Dropout(
rate = dropout_rate
)(second_dense_layer)
# softmax
softmax_layer = layers.Dense(
#units = np.unique(test[0][1]).shape[0],
units = 2,
activation = "softmax"
)(second_dropout_layer)
# input/output mapping
model = keras.Model(
inputs = input_layer,
outputs = softmax_layer,
)
## compile the model
model.compile(
loss = "binary_crossentropy",
optimizer = optimizers.RMSprop(),
metrics = ["accuracy"]
)
return model
## function simulate_game
# takes an integer number of players plus models for game decisions
# returns a dataframe of board states and a dataframe of player results
def simulate_game(players, model_turn_around, model_pick_up, model_drop_token, noise):
# declare lists to store individual components of the game state
TurnID = [] # int, indexing identifier
Active_Player = [] # int, indexing identifier
Round = [] # int
Tokens_Inventories = [] # array with dim [players, 32], row 0 is inventory of active player
Air_Supply = [] # int
Token_Positions = [] # vector
Player_Positions = [] # array with dim [players, 2], row 0 is active player
Intermediate_Scores = [] # vector, index 0 is active player
# additional lists to document the decisions made by the player on a given turn
Turn_Around = [0] # bool, happens prior to board update
Pick_Up = [0] # bool, happens after board update
Drop = [[0 for i in range(32)]] # vector, happens after board update
# initiate a new game state
game = GameState(players)
# start turn counter, document initial game state
turn = 0
TurnID.append(turn)
Active_Player.append(game.active_player)
Round.append(game.round)
Tokens_Inventories.append(np.concatenate((expand_inventory(game.inventory, game.players)[game.active_player:, :], expand_inventory(game.inventory, game.players)[:game.active_player, :])).flatten())
Air_Supply.append(game.air_supply)
Token_Positions.append(expand_vector(categorize_tokens(game.tokens)))
Player_Positions.append(np.concatenate((game.positions[game.active_player:, :], game.positions[:game.active_player, :])).flatten())
Intermediate_Scores.append(np.concatenate((game.player_scores[game.active_player:], game.player_scores[:game.active_player])).flatten())
# declare a flag for game end
continue_game = True
# take turns until the game is over
while continue_game:
# take a turn, collect the player actions taken
continue_game, turn_taken, turn_around, pick_up, drop = take_turn(game, model_turn_around, model_pick_up, model_drop_token, noise)
# skip to next turn if active player is already back at the sub
if turn_taken == False:
continue
# document the game state
turn += 1
TurnID.append(turn)
Active_Player.append(game.active_player)
Round.append(game.round)
Tokens_Inventories.append(np.concatenate((expand_inventory(game.inventory, game.players)[game.active_player:, :], expand_inventory(game.inventory, game.players)[:game.active_player, :])).flatten())
Air_Supply.append(game.air_supply)
Token_Positions.append(expand_vector(categorize_tokens(game.tokens)))
Player_Positions.append(np.concatenate((game.positions[game.active_player:, :], game.positions[:game.active_player, :])).flatten())
Intermediate_Scores.append(np.concatenate((game.player_scores[game.active_player:], game.player_scores[:game.active_player])).flatten())
# document the player decisions
Turn_Around.append(int(turn_around))
Pick_Up.append(int(pick_up))
Drop.append(expand_vector(drop))
# wrap the board states into a dataframe
Game_Log = pd.DataFrame({"TurnID": TurnID, "Active_Player": Active_Player, "Round": Round, "Tokens_Inventories": Tokens_Inventories, "Air_Supply": Air_Supply, "Token_Positions": Token_Positions, "Player_Positions": Player_Positions, "Intermediate_Scores": Intermediate_Scores, "Turn_Around": Turn_Around, "Pick_Up": Pick_Up, "Drop": Drop})
# document the winner
# ignoring tie breakers, all players lose when tied
Placement = dict([(i, 0) for i in range(len(game.player_scores))])
if not np.all(game.player_scores == game.player_scores[0]):
Placement[np.argmax(game.player_scores)] = 1
return Game_Log, Placement
## function build_training_set
# takes an integer number of games to simulate
# returns a list of couples containing the design matrix and target matrix for each decision function to train
def build_training_set(games, model_turn_around, model_pick_up, model_drop_token, noise):
for i in range(games):
# randomly select a player count from 2 to 6
#players = np.random.choice(np.arange(2, 7))
players = 6
# simulate a game; remove the "4th round" row
board_state, scores = simulate_game(players, model_turn_around, model_pick_up, model_drop_token, noise)
board_state = board_state.drop(board_state.shape[0] - 1)
# pick up and drop decisions occur after player movement and air supply are updated
# shift columns around to reflect this
board_state_tokens = board_state.copy()
board_state_tokens.Air_Supply = board_state_tokens.Air_Supply.shift(-1)
board_state_tokens.Player_Positions = board_state_tokens.Player_Positions.shift(-1)
board_state_tokens.Pick_Up = board_state_tokens.Pick_Up.shift(-1)
board_state_tokens.Drop = board_state_tokens.Drop.shift(-1)
board_state_tokens = board_state_tokens.drop(board_state_tokens.shape[0] - 1)
# initiate matrices if this is the first game
if i == 0:
# store the first row
X_turn_around = np.hstack((board_state.Round[0], board_state.Tokens_Inventories[0], board_state.Air_Supply[0], board_state.Token_Positions[0], board_state.Player_Positions[0], board_state.Intermediate_Scores[0], board_state.Turn_Around[0]))
X_pick_up = np.hstack((board_state_tokens.Round[0], board_state_tokens.Tokens_Inventories[0], board_state_tokens.Air_Supply[0], board_state_tokens.Token_Positions[0], board_state_tokens.Player_Positions[0], board_state_tokens.Intermediate_Scores[0], board_state_tokens.Pick_Up[0]))
X_drop_token = np.hstack((board_state_tokens.Round[0], board_state_tokens.Tokens_Inventories[0], board_state_tokens.Air_Supply[0], board_state_tokens.Token_Positions[0], board_state_tokens.Player_Positions[0], board_state_tokens.Intermediate_Scores[0], board_state_tokens.Drop[0]))
# remove the first row of the dataframe so it is not double-counted
board_state = board_state.drop(0)
# store the first result
Y_turn_around = scores[0]
Y_pick_up = scores[0]
Y_drop_token = scores[0]
# add rows from the dataframe to the turn around matrix and vector
for j in range(board_state.shape[0]):
# only add row if a turn around decision was possible
if board_state.Player_Positions.iloc[j][1] != -1:
X_turn_around = np.vstack((X_turn_around, np.hstack((board_state.Round.iloc[j], board_state.Tokens_Inventories.iloc[j], board_state.Air_Supply.iloc[j], board_state.Token_Positions.iloc[j], board_state.Player_Positions.iloc[j], board_state.Intermediate_Scores.iloc[j], board_state.Turn_Around.iloc[j]))))
Y_turn_around = np.append(Y_turn_around, scores[board_state.Active_Player.iloc[j]])
# add rows from the shifted dataframe to the other matrices and vectors
for j in range(board_state_tokens.shape[0]):
# determine whether a token was available to be picked up
if board_state_tokens.Token_Positions[j][board_state_tokens.Player_Positions[j][0]] != 0:
X_pick_up = np.vstack((X_pick_up, np.hstack((board_state_tokens.Round.iloc[j], board_state_tokens.Tokens_Inventories.iloc[j], board_state_tokens.Air_Supply.iloc[j], board_state_tokens.Token_Positions.iloc[j], board_state_tokens.Player_Positions.iloc[j], board_state_tokens.Intermediate_Scores.iloc[j], board_state_tokens.Pick_Up.iloc[j]))))
Y_pick_up = np.append(Y_pick_up, scores[board_state_tokens.Active_Player.iloc[j]])
# otherwise add to the drop data instead
else:
X_drop_token = np.vstack((X_drop_token, np.hstack((board_state_tokens.Round.iloc[j], board_state_tokens.Tokens_Inventories.iloc[j], board_state_tokens.Air_Supply.iloc[j], board_state_tokens.Token_Positions.iloc[j], board_state_tokens.Player_Positions.iloc[j], board_state_tokens.Intermediate_Scores.iloc[j], board_state_tokens.Drop.iloc[j]))))
Y_drop_token = np.append(Y_drop_token, scores[board_state_tokens.Active_Player.iloc[j]])
return [(X_turn_around, Y_turn_around), (X_pick_up, Y_pick_up), (X_drop_token, Y_drop_token)]
## function train_model
# takes a model object, a design matrix, a target vector, target weights, and an optional verbose flag
# trains model until validation error stops decreasing, then returns the model object
def train_model(model, X, Y, weights, verbose = 0):
# declare an early stopping callback
ES_callback = EarlyStopping(patience = 5, restore_best_weights = True)
# fit the model
model.fit(
x = X,
y = to_categorical(Y),
batch_size = 32,
epochs = 100,
verbose = verbose,
validation_split = 0.2,
callbacks = [ES_callback]
)
# return the trained model plus the number of epochs run
return (model, ES_callback.stopped_epoch + 1)
## function compare_models
# takes two tuples of models and an integer number of games
# simulates games and returns the number of wins for each
def compare_models(model_set_1, model_set_2, games = 1000, noise = 0.1):
# initiate counter to track win totals
wins = np.zeros(2)
for i in range(games):
# initiate a new game state
game = GameState(6)
# start turn counter; declare a flag for game end
turn = 0
continue_game = True
# randomly assign model_set_1 and model_set_2 to even/odd turns
model_set_1_turns = randint.rvs(0, 1)
# take turns until the game is over
while continue_game:
# use model_set_1 on half the turns
if turn % 2 == model_set_1_turns:
# take a turn
continue_game, turn_taken, turn_around, pick_up, drop = take_turn(game, *model_set_1, noise)
# use model_set_2 on the other half
else:
# take a turn
continue_game, turn_taken, turn_around, pick_up, drop = take_turn(game, *model_set_2, noise)
# increment turn counter
turn += 1
# document the winner
if np.argmax(game.player_scores) % 2 == model_set_1_turns:
wins[0] += 1
else:
wins[1] += 1
# return the win totals
return wins
### This section contains functions to make decisions during gameplay.
## function turn_around_decision
# takes a model and a
# returns a boolean indicating whether to turn around or not
def turn_around_decision(gamestate, model_turn_around, noise):
# add "pickup true" to gamestate
gamestate = np.reshape(np.hstack((gamestate, 1)), (1, 245))
# predict probability of winning if the player turns around and add noise
prob = (1 - noise) * model_turn_around.predict(gamestate)[0][1] + noise * uniform.rvs()
# Bernoulli trial with probability
return bernoulli.rvs(prob)
## function pick_up_decision
# returns a boolean indicating whether to pick up the token or not
def pick_up_decision(gamestate, model_pick_up, noise):
# add "drop true" to gamestate
gamestate = np.reshape(np.hstack((gamestate, 1)), (1, 245))
# predict probability of winning if the player picks up the token and add noise
prob = (1 - noise) * model_pick_up.predict(gamestate)[0][1] + noise * uniform.rvs()
# Bernoulli trial with probability
return bernoulli.rvs(prob)
## function drop_decision
# returns an index indicating which token to drop
def drop_decision(inventory, gamestate, model_drop_token, noise):
# create a vector of zeroes
inventory_mask = np.zeros(len(inventory))
# declare a vector to hold predictions
drop_probs = np.array([])
# extend gamestate with a vector of zeroes
gamestate_drop_options = np.reshape(np.hstack((gamestate, np.zeros(32))), (1, 276))
# predict probability of winning if no tokens are dropped
drop_probs = np.append(drop_probs, (1 - noise) * model_drop_token.predict(gamestate_drop_options)[0][1] + noise * uniform.rvs())
# predict probability of winning if each token in inventory is dropped
for i in range(len(inventory)):
# create a vector representation of dropping each token
drop_options = np.zeros(32)
drop_options[i] = 1
# extend gamestate with the drop options vector
gamestate_drop_options = np.reshape(np.hstack((gamestate, drop_options)), (1, 276))
# make prediction
drop_probs = np.append(drop_probs, (1 - noise) * model_drop_token.predict(gamestate_drop_options)[0][1] + noise * uniform.rvs())
# drop the highest probability token
if np.argmax(drop_probs) != 0:
inventory_mask[np.argmax(drop_probs) - 1] = 1
return inventory_mask
## function death_drop
# returns an index indicating which token to drop
def death_drop(inventory, gamestate, noise):
# create a vector of zeroes
inventory_mask = np.zeros(len(inventory))
# select a token at random
inventory_mask[randint.rvs(0, len(inventory))] = 1
return inventory_mask
## function training_iteration
# takes a number of generations to train, a number of games per generation, and a tuple of models
# compares models at the end of training to a baseline set
# returns trained models if they outperform by at least 55% to 45%
# otherwise, returns baseline models
def training_iteration(generations, baseline_models, games_per_generation = 100):
# instantiate models for each of the game decisions
model_turn_around = instantiate_model((245, ), 0.3, 0)
model_pick_up = instantiate_model((245, ), 0.3, 0)
model_drop_token = instantiate_model((276, ), 0.3, 0)
# simulate game data, use it to update models, and repeat
for i in range(generations):
# set the current noise level for game decisions
noise = 0.5 - 0.4 * (i / generations)
# simulate games using baseline models
simulated_game_data = build_training_set(games_per_generation, *baseline_models, noise)
# train the models on the current set of simulated games
model_turn_around, epoch_turn_around = train_model(model_turn_around, simulated_game_data[0][0], simulated_game_data[0][1], sample_weights(simulated_game_data[0][1], 6))
model_pick_up, epoch_pick_up = train_model(model_pick_up, simulated_game_data[1][0], simulated_game_data[1][1], sample_weights(simulated_game_data[1][1], 6))
model_drop_token, epoch_drop_token = train_model(model_drop_token, simulated_game_data[2][0], simulated_game_data[2][1], sample_weights(simulated_game_data[2][1], 6))
# print the number of epochs each model was trained prior to stopping
print("Generation: %i" % i)
print(epoch_turn_around, epoch_pick_up, epoch_drop_token)
# compare the trained models to the baseline models
results = compare_models((model_turn_around, model_pick_up, model_drop_token), baseline_models)
# return the trained models if they exceed previous performance by a 55% to 45% margin
if results[0]/np.sum(results) >= 0.55:
print("Improvement")
return (model_turn_around, model_pick_up, model_drop_token)
# otherwise return the baseline models
else:
print("No Improvement")
return baseline_models
## executed code
# continue until manually stopped
#while True:
# load the current baseline models
# baseline_models = (load_model("model_turn_around.h5"), load_model("model_pick_up.h5"), load_model("model_drop_token.h5"))
# train the models for 100 generations of 100 games
# new_models = training_iteration(100, baseline_models)
# save the returned models
# new_models[0].save("model_turn_around.h5")
# new_models[1].save("model_pick_up.h5")
# new_models[2].save("model_drop_token.h5")