def __init__(self,name,initial=None,constants={}): """ Creates a game definition. It will automatically generate a full_time.lp file with the encoding using timesteps. Args: name (str): The name of the directory inside ./game_definitions: it must contain the following files - background.lp: Clingo file with all rules from the game in GDL format - default_initial.lp: Clingo file with all facts for the idefault initial state - all_initial.lp: Clingo file generating one stable model for each possible initial state - game_def.py: The game definition extending this class initial (str): Optional string or path to file to overwrite the default initial state constants (dic str->str): The dictionary of constants that must be passed to clingo on each execution. """ self.name = name self.path = "./game_definitions/"+name self.background = self.path + "/background.lp" if not os.path.exists(self.path + "/full_time.lp"): log.info("Automatically generating full_time file") gdl_to_full_time(self.path,"/background.lp") self.full_time = self.path + "/full_time.lp" self.random_init = None self.constants = constants if initial is None: self.initial = self.path + "/default_initial.lp" else: self.initial = initial
def render(self): last = self.match.steps[-1] if (last.time_step % 2 == 0 and last.time_step > 0): log.info("\n" + self.match.steps[-2].ascii + "\n\n" + self.match.steps[-1].ascii) else: log.info("\n" + self.match.steps[-1].ascii)
def divide_train_data(training_path, action_size, state_size, clean=False, test_size=0.1): pandas_train = pd.read_csv(training_path, sep=';') if clean: pandas_train = clean_data(pandas_train, action_size, state_size) pandas_train = pandas_train.sample( frac=1, random_state=rnd_state).reset_index(drop=True) train, test = train_test_split(pandas_train, test_size=test_size, shuffle=False) log.info( "With a test size of {}, we get {} training and {} testing samples.". format(test_size, train.shape[0], test.shape[0])) a_s_size = action_size + state_size train_inputs = train.iloc[:, 0:a_s_size] train_next_label = train.iloc[:, a_s_size:a_s_size + state_size] train_pred_label = train.iloc[:, -2:-1] test_inputs = test.iloc[:, 0:a_s_size] test_next_label = test.iloc[:, a_s_size:a_s_size + state_size] test_pred_label = test.iloc[:, -2:-1] train_dict = { "input": train_inputs.to_numpy(), "next": train_next_label.to_numpy(), "pred": train_pred_label.to_numpy() } test_dict = { "input": test_inputs.to_numpy(), "next": test_next_label.to_numpy(), "pred": test_pred_label.to_numpy() } return train_dict, test_dict
def choose_action(self, state, time_step=None, penalize_illegal=False): """ The player chooses an action given a current state. Args: state (State): The current state Returns: action (Action): The selected action. Should be one from the list of state.legal_actions """ p = state.control legal_actions_masked = self.game_def.encoder.mask_legal_actions(state) pi = self.net.predict_state(state) best_idx = np.argmax(pi) # Require best prediction to be legal if (legal_actions_masked[best_idx] == 0 and penalize_illegal): raise IllegalActionError( "Invalid action", str(self.game_def.encoder.all_actions[best_idx])) # Check best prediction from all legal legal_actions_pi = legal_actions_masked * pi if np.sum(legal_actions_pi) == 0: log.info( "All legal actions were predicted with 0 by {} choosing random" .format(self.name)) best_idx = randint(0, len(state.legal_actions) - 1) legal_action = state.legal_actions[best_idx] else: best_idx = np.argmax(legal_actions_pi) best_name = self.game_def.encoder.all_actions[best_idx] legal_action = state.get_legal_action_from_str(str(best_name)) return legal_action
def choose_action(self, state): """ The player chooses an action given a current state. Args: state (State): The current state Returns: action (Action): The selected action. Should be one from the list of state.legal_actions """ if self.style == "tree": # Using tree state_facts = state.to_facts() if state_facts in self.tree_scores: opt = self.tree_scores[state_facts].items() if self.scores_main_player == self.main_player: best = max(opt, key=lambda i: i[1]) else: best = min(opt, key=lambda i: i[1]) action = Action.from_facts(best[0], self.game_def) else: log.debug( "Minmax has no information in tree for current step, choosing random" ) index = randint(0, len(state.legal_actions) - 1) return state.legal_actions[index] action = [l_a for l_a in state.legal_actions if l_a == action][0] return action elif self.style == "rule": return state.legal_actions[-1] else: print("Learning") # Using rules initial = fluents_to_asp_syntax(state.fluents, 0) match, tree, ex, ls, tl = get_minmax_init(self.game_def, self.main_player, initial, extra_fixed="\n".join( self.learned), learning_rules=True) self.learned.extend(ls) if (len(ls) > 0): log.info("{} learned new rules during game play".format( self.name)) if match is None: raise TimeoutError action = match.steps[0].action.action action_name = str(action) action = [ l_a for l_a in state.legal_actions if str(l_a.action) == action_name ][0] return action
def save_plot(plt, args, plot_type=None): if not plot_type: plot_type = args.plot_type file_out = "benchmarks/img/{}/{}_{}.png".format( args.game_name, args.plot_out, args.plot_type) else: file_out = "benchmarks/img/{}/{}_{}.png".format( args.game_name, args.plot_out, plot_type) os.makedirs(os.path.dirname(file_out), exist_ok=True) plt.savefig(file_out, dpi=200, bbox_inches='tight') log.info(plot_type + " plot saved in " + file_out)
def remove_duplicates_training(file_name): csv_file = open(file_name, "r") lines = csv_file.read().split("\n") csv_file.close() writer = open(file_name, "w") lines_set = set(lines) log.info("Removing duplicates in {}, from {} to {} lines".format( file_name, len(lines), len(lines_set))) for line in lines_set: writer.write(line + "\n") writer.close()
def train(self, examples = []): """ Trains the model with the examples given by the episodes """ if self.model is None: raise RuntimeError("A loaded model is required for training") log.info("Training for {} epochs with batch size {}".format(self.args.n_epochs,self.args.batch_size)) input_states, target_pis, target_vs = list(zip(*examples)) input_states = np.asarray(input_states) target_pis = np.asarray(target_pis) target_vs = np.asarray(target_vs) history = self.model.fit(x = input_states, y = [target_pis, target_vs], batch_size = self.args.batch_size, epochs = self.args.n_epochs,verbose=0) log.info("Initial loss: {} Final loss: {}".format(history.history["loss"][0],history.history["loss"][-1]))
def train(self): if self.model is None: raise RuntimeError("A loaded model is required for training") action_size = self.game_def.encoder.action_size state_size = self.game_def.encoder.state_size dyn_model = self.model file_name = self.args.training_file training_file_path = "approaches/supervised_ml/train_data/{}/{}".format(self.game_def.name,file_name) train_data, test_data = divide_train_data(training_file_path,action_size, state_size, clean=True) es = EarlyStopping(monitor='loss', mode='min', verbose=0, patience=50, min_delta=0.0005) log.info("Training dynamics model...") dyn_history = dyn_model.fit(train_data["input"], train_data["next"], epochs=500, batch_size=50, verbose=0, validation_split=0.1, callbacks=[es]) file_name_plot = "approaches/supervised_ml/saved_models/{}/{}_dynamic.pdf".format(self.game_def.name,self.model_name) os.makedirs(os.path.dirname(file_name_plot), exist_ok=True) fig = show_acc_loss_curves(dyn_history) fig.savefig(file_name_plot, format='pdf') results = test_model(dyn_model, test_data, test="next") log.info("Dynamics Network----- loss: {}, acc: {}".format(results[0],results[1])) # set hyperparameter search space # optimiser = ["nag","adam"] # learning_rate = [0.0001, 0.001, 0.01, 0.1] # batch_size = [10, 500, 100, 500] # reg_penalty = [0.01, 0.001, 0.0001] # max_epochs = [5000] # transfer = [True, False] # add_layers = [True, False] optimiser = ["adam"] learning_rate = [0.001] batch_size = [500] reg_penalty = [0.001] max_epochs = [5000] transfer = [True] add_layers = [True, False] # create list of all different parameter combinations param_grid = dict(optimiser = optimiser, learning_rate = learning_rate, batch_size = batch_size, reg_penalty = reg_penalty, epochs = max_epochs, transfer = transfer, add_layers = add_layers) combinations = list(product(*param_grid.values())) model,history = run_3_fold_gridsearch(train_data, test_data, combinations, "./approaches/supervised_ml/grid_search_reg.csv", dyn_model) file_name_plot = "approaches/supervised_ml/saved_models/{}/{}.pdf".format(self.game_def.name, self.model_name) fig = show_acc_loss_curves(history) os.makedirs(os.path.dirname(file_name_plot), exist_ok=True) fig.savefig(file_name_plot, format='pdf') loss_test, acc_test = test_model(model, test_data, test="pred") loss_train, acc_train = test_model(model, train_data, test="pred") log.info("Final on train: loss: {} acc: {}".format(loss_train,acc_train)) log.info("Final on test: loss: {} acc: {}".format(loss_test,acc_test)) self.model = model
def __init__(self, game_def, possible_initial_states, player_name="a", opponent=None): self.game_def = game_def self.game_state = GameState(game_def, possible_initial_states) self.action_space = Discrete(self.game_def.encoder.action_size) self.observation_space = Tuple( [Discrete(2) for i in range(0, self.game_def.encoder.state_size)]) self.reward_range = (-100, 100) if opponent is None: log.info("Using random player as opponent") self.opponent = RandomPlayer(game_def, "", "b") else: log.info("Loading strategy player as opponent from " + opponent) self.opponent = StrategyPlayer(game_def, "startegy-" + opponent, "b")
def load_model_from_file(self): """ Loads the model from a file using the model_name attribute. """ path = '{}/{}'.format(self.file_base, self.model_name) file_weights = "{}.h5".format(path) file_model = "{}.json".format(path) # load json and create model json_file = open(file_model, 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model loaded_model.load_weights(file_weights) log.info("Model loaded from {}".format(path)) self.model = loaded_model
def add_to_training_dic(self, n_total, dic, node): if node.step in dic: log.info("Duplicated step") log.info(node.step) if dic[node.step]['n'] >= node.n: return next_nodes = node.children if len(next_nodes) == 0: return dic[node.step] = { 's_init': node.step.state, 'action': node.step.action, 's_next': next_nodes[0].step.state, 'p': node.n / n_total, 'n': node.n } for n in next_nodes: self.add_to_training_dic(node.n, dic, n)
def print_in_file(self, file_name="tree_test.png"): """ Function to plot generated tree as an image file Args: file_name (str): full name of image to be created """ base_dir="./img/" image_file_name = base_dir + file_name # define local functions def aux(n): a = 'label="{}" {}'.format(n.ascii, n.style(parent=n.parent)) return a # self.remove_leaves() os.makedirs(os.path.dirname(image_file_name), exist_ok=True) UniqueDotExporter(self.root, nodeattrfunc=aux, edgeattrfunc=lambda parent, child: 'arrowhead=vee').to_picture(image_file_name) log.info("Tree image saved in {}".format(image_file_name))
def save_model(self, model_name=None): """ Saves the model using its model name Args: model_name (str): Value to overwrite the name for saving """ model_name = self.model_name if model_name is None else model_name path = '{}/{}'.format(self.file_base, model_name) os.makedirs(os.path.dirname(path), exist_ok=True) file_weights = "{}/{}.h5".format(self.file_base, model_name) file_model = "{}/{}.json".format(self.file_base, model_name) os.makedirs(os.path.dirname(file_weights), exist_ok=True) os.makedirs(os.path.dirname(file_model), exist_ok=True) # serialize model to JSON model_json = self.model.to_json() with open(file_model, "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 self.model.save_weights(file_weights) log.info("Model saved in {}".format(path))
def add_to_training_dic(self, dic, node): """ Adds the information from a node to a dictionary """ if node.step in dic: log.info("Duplicated step") log.info(node.step) if dic[node.step]['n'] >= node.n: return next_nodes = node.children if len(next_nodes) == 0: return dic[node.step] = { 's_init': node.step.state, 'action': node.step.action, 's_next': next_nodes[0].step.state, 'p': node.prob, 'n': node.n } for n in next_nodes: self.add_to_training_dic(dic, n)
def clean_data(dataframe, action_size, state_size): log.info("Cleaning data...") cleaned_df = dataframe cleaned_df_str = convert_cols(cleaned_df, str) cleaned_df_str['combo'] = cleaned_df_str.apply(lambda x: ''.join(x), axis=1) cleaned_df_str['combo'] = cleaned_df_str['combo'].str[:action_size + state_size] cleaned_df_str = cleaned_df_str.sort_values( 'n', ascending=False).drop_duplicates(['combo']) cleaned_df_str = cleaned_df_str.drop('combo', 1) cleaned_df = convert_cols(cleaned_df_str, float) diff = dataframe.shape[0] - cleaned_df.shape[0] log.info( "Removed {} duplicates with different probability scores".format(diff)) log.info("This leaves {} instances for training and testing".format( cleaned_df.shape[0])) return cleaned_df
def run_3_fold_gridsearch(train_data, test_data, combinations, filename, dyn_model): # create containers for resulting data res_df = pd.DataFrame(columns=[ 'transfer', 'deepened', 'optimiser', 'learning rate', 'batch size', 'loss1', 'acc1', 'loss2', 'acc2', 'loss3', 'acc3' ]) hist_dict_global = {} num_splits = 3 best_model = None best_acc = 0 best_history = [] best_params = [] # 3-fold grid search over the combinations defined above for i, combination in enumerate(combinations): kf = KFold(n_splits=num_splits, random_state=42, shuffle=False) metrics_dict = {} log.info("{}/{}: {} - folds completed: ".format( i + 1, len(combinations), combination)) acc_total = 0 for j, (train_index, test_index) in enumerate(kf.split(train_data["input"])): log.info("starting folding {}".format(j)) X_train, X_test = train_data["input"][train_index], train_data[ "input"][test_index] y_train, y_test = train_data["pred"][train_index], train_data[ "pred"][test_index] model = create_model(optimiser=combination[0], learning_rate=combination[1], c=combination[3], transfer_learning=combination[5], deepen_model=combination[6], base_model=dyn_model) es = EarlyStopping(monitor='loss', mode='min', verbose=0, patience=80, min_delta=0.001) hist = model.fit(X_train, y_train, epochs=combination[4], batch_size=combination[2], verbose=0, use_multiprocessing=True, callbacks=[es], validation_split=0.1) # try to evaluate the model loss, acc = model.evaluate(X_test, y_test, verbose=0) acc_total += acc metrics_dict[j + 1] = { "loss": loss, "acc": acc, "epoch_stopped": es.stopped_epoch } acc = acc_total / num_splits if acc > best_acc: log.info("New best model with acc {}".format(acc)) best_model = model best_acc = acc best_history = hist best_params = combination row = { 'transfer': combination[5], 'deepened': combination[6], 'optimiser': combination[0], 'learning rate': combination[1], 'batch size': combination[2], 'reg_penalty': combination[3], 'epoch_stopped1': metrics_dict[1]["epoch_stopped"], 'loss1': metrics_dict[1]["loss"], 'acc1': metrics_dict[1]["acc"], 'epoch_stopped2': metrics_dict[2]["epoch_stopped"], 'loss2': metrics_dict[2]["loss"], 'acc2': metrics_dict[2]["acc"], 'epoch_stopped3': metrics_dict[3]["epoch_stopped"], 'loss3': metrics_dict[3]["loss"], 'acc3': metrics_dict[3]["acc"] } res_df = res_df.append(row, ignore_index=True) res_df.to_csv(filename, sep=";") log.info("Best model found using parameters:") print(best_params) return best_model, best_history
def build(game_def, args): """ Runs the required computation to build a player. For instance, creating a tree or training a model. The computed information should be stored to be accessed latter on using the name_style Args: game_def (GameDef): The game definition used for the creation args (NameSpace): A name space with all the attributes defined in add_parser_build_args """ best_net = NetAlpha(game_def, args.model_name, model=None, args=args) best_net.load_model_from_args() game_def.get_random_initial() using_random = not args.train_rand is None if (using_random): log.info( "Using random seed {} for initial states in training".format( args.train_rand)) game_def.get_random_initial() initial_states = game_def.random_init random.Random(args.train_rand).shuffle(initial_states) else: log.info("Using default initial state in training {} ".format( game_def.initial)) initial_states = [game_def.initial] number_initial = len( initial_states) if args.n_vs > len(initial_states) else args.n_vs for i in range(args.n_train): log.info("------- Iteration {} --------".format(i)) training_examples = [] for e in range(args.n_episodes): log.debug("\t\tEpisode {}...".format(e)) new_examples = TreeZero.run_episode(game_def, best_net) training_examples += new_examples game_def.initial = initial_states[i % len(initial_states)] new_net = best_net.copy() #Training new net log.info("Training net with {} examples".format( len(training_examples))) new_net.train(training_examples) #Comparing nets log.info("Comparing networks...") p_old = AlphaZero(game_def, "training_old", "a", best_net) p_new = AlphaZero(game_def, "training_new", "a", new_net) benchmarks = Match.vs(game_def, args.n_vs, [[p_old, p_new], [p_new, p_old]], initial_states, ["old_net", "new_net"], penalize_illegal=args.penalize_illegal) log.info(benchmarks) new_wins = benchmarks["b"]["wins"] old_wins = benchmarks["a"]["wins"] log.info( "New: Wan {} Lost Illegal {}\nOld network: Wan {} Lost Illegal {}" .format(new_wins, benchmarks["b"]["matches_lost_by_illegal"], old_wins, benchmarks["a"]["matches_lost_by_illegal"])) #Updating best net if new_wins > old_wins: log.info( "{}--------------- New network is better {}vs{}------------------{}" .format(bcolors.FAIL, new_wins, old_wins, bcolors.ENDC)) best_net = new_net best_net.save_model( model_name="{}/{}".format(best_net.model_name, i)) if args.vis_tree: # Visualizing tree of best net game_def.initial = initial_states[0] state = game_def.get_initial_state() p_new.visualize_net( state, "train-{}-iter-{}-new".format(new_net.model_name, i)) log.info("Saving model") best_net.save_model()
def build(game_def, args): """ Runs the required computation to build a player. For instance, creating a tree or training a model. The computed information should be stored to be accessed latter on using the name_style Args: game_def (GameDef): The game definition used for the creation args (NameSpace): A name space with all the attributes defined in add_parser_build_args """ args.rules_file_name = None args.tree_image_file_name = None args.train_file_name = None args.tree_name = None if args.ilasp_examples_file_name is None: log.debug("Generating examples using min_max_asp algorithm") args.ilasp_examples_file_name = 'temp_examples.las' PrunedMinmaxPlayer.build(game_def, args) base_path = './approaches/ilasp/{}/'.format(game_def.name) lines = [] with open(args.background_path, 'r') as background_file: lines.extend(background_file.readlines()) with open('{}languages/{}'.format(base_path, args.language_bias_name), 'r') as language_bias_file: langauage_bias_lines = language_bias_file.readlines() lines.extend(langauage_bias_lines) with open( '{}examples/{}'.format(base_path, args.ilasp_examples_file_name), 'r') as examples_file: lines.extend(examples_file.readlines()) with open('{}temporal.las'.format(base_path), 'w') as complete_file: complete_file.write("".join(lines)) complete_file.close() if not args.ilasp_arg is None: ilasp_args = ["--" + a for a in args.ilasp_arg] else: ilasp_args = [] command = [ "ILASP ", "--clingo5 ", "--version=2i", '{}temporal.las'.format(base_path), "--multi-wc ", "--simple", "--max-rule-length=6", "--max-wc-length=5", "-ml=5", "-q" ] command.extend(ilasp_args) string_command = " ".join(command) log.info("Running ilasp command: \n{}".format(" ".join(command))) result = subprocess.check_output(string_command, shell=True).decode("utf-8") log.debug("Found strategy: \n{}".format(result)) t0 = time.time() strategy_file_path = '{}/strategies/{}'.format(base_path, args.strategy_name) os.makedirs(os.path.dirname(strategy_file_path), exist_ok=True) langauage_bias_predicates = [ l for l in langauage_bias_lines if l[0] != "#" ] result = result + "".join(langauage_bias_predicates) with open(strategy_file_path, 'w') as startegy: startegy.write(result) startegy.close() log.debug("Strategy saved in {}/strategies/{}".format( base_path, args.strategy_name)) t1 = time.time() save_time = round((t1 - t0) * 1000, 3) return {'save_time': save_time}
def simulate(game_def, players, depth=None, ran_init=False, signal_on=True): """ Call it with the path to the game definition Args: players (Player,Player): A tuple of the players depth: - n: Generate until depth n or terminal state reached """ def handler(signum, frame): raise TimeoutError("Action time out") if signal_on: signal.signal(signal.SIGALRM, handler) if (ran_init): initial = game_def.get_random_initial() else: initial = game_def.initial state = StateExpanded.from_game_def(game_def, initial, strategy=players[0].strategy) match = Match([]) time_step = 0 continue_depth = True if depth == None else time_step < depth log.debug("\n--------------- Simulating match ----------------") log.debug("\na: {}\nb: {}\n".format(players[0].name, players[1].name)) letters = ['a', 'b'] response_times = {'a': [], 'b': []} while (not state.is_terminal and continue_depth): if signal_on: signal.alarm(3) t0 = time.time() try: selected_action = players[time_step % 2].choose_action(state) except TimeoutError as ex: log.info( "Time out for player {}, choosing random action".format( letters[time_step % 2])) index = randint(0, len(state.legal_actions) - 1) selected_action = state.legal_actions[index] if signal_on: signal.alarm(0) t1 = time.time() response_times[letters[time_step % 2]].append( round((t1 - t0) * 1000, 3)) step = Step(state, selected_action, time_step) match.add_step(step) time_step += 1 continue_depth = True if depth == None else time_step < depth state = state.get_next(selected_action, strategy_path=players[time_step % 2].strategy) match.add_step(Step(state, None, time_step)) log.debug(match) return match, { k: round(sum(lst) / (len(lst) if len(lst) > 0 else 1), 3) for k, lst in response_times.items() }
def on_train_end(self, logs): """ Print training time at end of training """ duration = timeit.default_timer() - self.train_start self.save_to_file() log.info('Training finished, took {:.3f} seconds'.format(duration))
def on_train_begin(self, logs): """ Print training values at beginning of training """ self.train_start = timeit.default_timer() self.metrics_names = self.model.metrics_names self.all_info = [] log.info('Training for {} steps ...'.format(self.params['nb_steps']))
getattr(pc, "add_parser_build_args")(approach_parser) # ---------------------------- Setting default arguments ---------------------------- args = parser.parse_args() n = args.num_repetitions log.set_level(args.log.upper()) if args.const is None: constants = {} else: constants = {c.split("=")[0]: c.split("=")[1] for c in args.const} game_def = GameDef.from_name(args.game_name, constants=constants) using_random = not args.random_initial_state_seed is None using_fixed_initial = not args.initial is None if (using_random): log.info("Using random seed {} for initial states".format( args.random_initial_state_seed)) game_def.get_random_initial() initial_states = game_def.random_init random.Random(args.random_initial_state_seed).shuffle(initial_states) elif (using_fixed_initial): log.info("Using fixed initial state {}".format(args.initial)) initial_states = [game_def.path + "/" + args.initial] else: log.info("Using default initial state {}".format(game_def.initial)) initial_states = [game_def.initial] # ---------------------------- Computing VS ---------------------------- if args.selected_approach == 'vs': style_a = args.pA_style style_b = args.pB_style