def lookup_all_games(): # ensure things are initialised from ggplib.util.init import setup_once setup_once() failed = [] known_to_fail = [ 'amazonsTorus_10x10', 'atariGoVariant_7x7', 'gt_two_thirds_4p', 'gt_two_thirds_6p', 'linesOfAction' ] for game in lookup.get_all_game_names(): if game not in known_to_fail: try: game_info = lookup.by_name(game, build_sm=False) assert game_info.game == game sm = game_info.get_sm() # run some depth charges to ensure we have valid statemachine interface.depth_charge(sm, 1) log.verbose("DONE GETTING Statemachine FOR GAME %s %s" % (game, sm)) except lookup.LookupFailed as exc: log.warning("Failed to lookup %s: %s" % (game, exc)) failed.append(game) if failed: log.error("Failed games %s" % (failed, )) assert False, failed
def cleanup(self, keep_sm=False): try: self.player.cleanup() if self.verbose: log.verbose("done cleanup player: %s" % self.player) except Exception as exc: log.error("FAILED TO CLEANUP PLAYER: %s" % exc) type, value, tb = sys.exc_info() log.error(traceback.format_exc()) # cleanup c++ stuff if self.verbose: log.warning("cleaning up c++ stuff") # all the basestates for bs in self.states: # cleanup bs interface.dealloc_basestate(bs) self.states = [] if self.joint_move: interface.dealloc_jointmove(self.joint_move) self.joint_move = None if self.sm and not keep_sm: interface.dealloc_statemachine(self.sm) self.sm = None if self.verbose: log.info("match - done cleaning up")
def send_request_to_train_nn(self): assert not self.training_in_progress next_step = self.conf.current_step + 1 log.verbose("send_request_to_train_nn() @ step %s" % next_step) train_conf = self.conf.base_training_config assert train_conf.game == self.conf.game assert train_conf.generation_prefix == self.conf.generation_prefix train_conf.next_step = next_step m = msgs.RequestNetworkTrain() m.game = self.conf.game m.train_conf = train_conf m.network_model = self.conf.base_network_model m.generation_description = self.conf.base_generation_description # send out message to train self.the_nn_trainer.worker.send_msg(m) log.info("sent out request to the_nn_trainer!") self.training_in_progress = True
def checkpoint(self): num_samples = len(self.accumulated_samples) log.verbose("entering checkpoint with %s sample accumulated" % num_samples) if num_samples > 0: gen_samples = self.save_sample_data() if num_samples > self.conf.num_samples_to_train: if self.pending_gen_samples is None: log.info( "data done for: %s" % self.get_generation_name(self.conf.current_step + 1)) self.pending_gen_samples = gen_samples if not self.training_in_progress: if self.the_nn_trainer is None: log.error("There is no trainer - please start") else: self.send_request_to_train_nn() # cancel any existing cb if self.checkpoint_cb is not None and self.checkpoint_cb.active(): self.checkpoint_cb.cancel() # call checkpoint again in n seconds self.checkpoint_cb = reactor.callLater(self.conf.checkpoint_interval, self.checkpoint)
def go(sm, seconds_to_run): log.verbose("running depth charges for %s seconds %s" % (seconds_to_run, "(in c)" if rollouts_in_c else "")) if rollouts_in_c: return interface.depth_charge(sm, seconds_to_run) else: role_count = len(sm.get_roles()) # cache some objects joint_move = sm.get_joint_move() base_state = sm.new_base_state() # resolution is assumed to be good enough not to cheat too much here (we return # msecs_taken so it is all good) start_time = cur_time = time.time() end_time = start_time + seconds_to_run rollouts = 0 num_state_changes = 0 while cur_time < end_time: # the number of moves of the game depth = 0 # tells the state machine to reset everything and return to initial state sm.reset() # while the game has not ended while not sm.is_terminal(): # choose a random move for each role for role_index in range(role_count): ls = sm.get_legal_state(role_index) choice = ls.get_legal(random.randrange(0, ls.get_count())) joint_move.set(role_index, choice) # play move, the base_state will be new state sm.next_state(joint_move, base_state) # update the state machine to new state sm.update_bases(base_state) # increment the depth depth += 1 # simulate side effect of getting the scores from the statemachine scores = [sm.get_goal_value(r) for r in range(role_count)] # stats rollouts += 1 num_state_changes += depth # update the time cur_time = time.time() msecs_taken = int(1000 * (cur_time - start_time)) return msecs_taken, rollouts, num_state_changes
def summary(self): ' log keras nn summary ' # one way to get print_summary to output string! lines = [] self.keras_model.summary(print_fn=lines.append) for l in lines: log.verbose(l)
def lazy_load(self, the_game_store): if self.sm is None: # ok here we can cache the game XXX self.model, self.sm = builder.build_sm(self.gdl_str, the_game_store=the_game_store, add_to_game_store=True) log.verbose("Lazy loading done for %s" % self.game)
def start(self, meta_time=10, move_time=5, initial_basestate=None, game_depth=0): self.match_id = self.create_match_id() assert self.players if initial_basestate is not None: # update the state machine self.sm.update_bases(initial_basestate) # check the game isn't finished assert not self.sm.is_terminal() else: # reset state machine, returns it to initial state. self.sm.reset() if self.matches is None: player_matches = [] for player, role in self.players: match = Match(self.game_info, self.match_id, role, meta_time, move_time, player, verbose=self.verbose, no_cleanup=True) player_matches.append(match) # call do start... if self.verbose: log.verbose("Starting for %s / %s" % (match.role, match.player)) match.do_start(initial_basestate=initial_basestate, game_depth=game_depth) # reorder matches to roles (and check that we have them) self.matches = [] for role in self.sm.get_roles(): for match in player_matches: if role == match.role: self.matches.append(match) break assert len(self.matches) == len(self.sm.get_roles()) else: for (player, role), match in zip(self.players, self.matches): match.fast_reset(self.match_id, player, role) match.do_start(initial_basestate=initial_basestate, game_depth=game_depth)
def load(self, verbose=True): if verbose: log.info("Building the database") filenames = self.rulesheets_store.listdir("*.kif") for fn in sorted(filenames): # skip tmp files if fn.startswith("tmp"): continue game = fn.replace(".kif", "") # get the gdl gdl_str = self.rulesheets_store.load_contents(fn) info = GameInfo(game, gdl_str) # first does the game directory exist? the_game_store = self.games_store.get_directory(game, create=True) if the_game_store.file_exists("sig.json"): info.idx = the_game_store.load_json("sig.json")['idx'] else: if verbose: log.verbose("Creating signature for %s" % game) info.get_symbol_map() if info.symbol_map is None: log.warning("FAILED to add: %s" % game) raise Exception("FAILED TO add %s" % game) # save as json assert info.idx is not None the_game_store.save_json("sig.json", dict(idx=info.idx)) assert info.idx is not None if info.idx in self.idx_mapping: other_info = self.idx_mapping[info.idx] log.warning("DUPE GAMES: %s %s!=%s" % (info.idx, game, other_info.game)) raise Exception("Dupes not allowed in database") self.idx_mapping[info.idx] = info self.game_mapping[info.game] = info
def create_and_play(sm): assert len(sm.get_roles()) == 2 sm.reset() assert sm.get_initial_state() ls = sm.get_legal_state(0) # 9 possible moves initially assert ls.get_count() == 9 def f(ri, i): return sm.legal_to_move(ri, ls.get_legal(i)) moves = [f(0, ii) for ii in range(ls.get_count())] assert "(mark 2 2)" in moves play_moves = [("(mark 2 2)", "noop"), ("noop", "(mark 3 3)"), ("(mark 2 3)", "noop"), ("noop", "(mark 1 1)"), ("(mark 2 1)", "noop")] # get some states joint_move = sm.get_joint_move() base_state = sm.new_base_state() for move in play_moves: assert not sm.is_terminal() log.info("Playing %s" % (move, )) for ri in range(len(sm.get_roles())): ls = sm.get_legal_state(ri) the_moves = [f(ri, ii) for ii in range(ls.get_count())] log.verbose("%s moves %s" % (sm.get_roles()[ri], the_moves)) choice = the_moves.index(move[ri]) joint_move.set(ri, ls.get_legal(choice)) # update state machine sm.next_state(joint_move, base_state) sm.update_bases(base_state) assert sm.is_terminal() assert sm.get_goal_value(0) == 100 assert sm.get_goal_value(1) == 0
def main_3(game_file, output_file, seconds_to_run): # builds without accessing database database _, game_info = lookup.by_gdl(open(game_file).read()) sm = game_info.get_sm() if debug: log.verbose("GAME_FILE %s" % game_file) log.verbose("OUTPUT_FILE %s" % output_file) log.verbose("SECONDS_TO_RUN %s" % seconds_to_run) # for the result f = open(output_file, "w") print >> f, "version=%s" % VERSION try: msecs_taken, rollouts, num_state_changes = go(sm, seconds_to_run) # see gdl-perf (XXX do python3 print) print >> f, "millisecondsTaken=%s" % msecs_taken print >> f, "numStateChanges=%s" % num_state_changes print >> f, "numRollouts=%s" % rollouts except Exception as exc: error_str = "Error %s" % exc type, value, tb = sys.exc_info() traceback.print_exc() print >> f, "errorMessage=%s" % (error_str, ) f.close()
def play_single_move(self, last_move=None): assert not self.finished() actions = [] new_last_move = [] for role_index, (match, role) in enumerate(zip(self.matches, self.sm.get_roles())): if self.verbose: log.verbose("===============================================================") log.verbose("do_play(%s) for %s / %s" % (last_move, role, match.player)) move = match.do_play(last_move) new_last_move.append(move) # check the move is in the legals ls = self.sm.get_legal_state(role_index) choices = [ls.get_legal(ii) for ii in range(ls.get_count())] for choice in choices: choice_move = self.sm.legal_to_move(role_index, choice) if choice_move == move: self.joint_move.set(role_index, choice) actions.append(move) break assert len(actions) == len(self.matches) if self.verbose: log.verbose("playing %s" % (actions,)) self.sm.next_state(self.joint_move, self.next_basestate) self.sm.update_bases(self.next_basestate) return tuple(new_last_move)
def gather_data(self): # abbreviate, easier on the eyes conf = self.train_config if self.samples_buffer is None: print "Recreating samples buffer" self.samples_buffer = SamplesBuffer() self.buckets = Buckets(conf.resample_buckets) total_samples = 0 leveled_data = [] for fn, sample_data in self.samples_buffer.files_to_sample_data(conf): assert sample_data.game == conf.game log.debug("Proccesing %s" % fn) log.debug("Game %s, with gen: %s and sample count %s" % (sample_data.game, sample_data.with_generation, sample_data.num_samples)) if not sample_data.transformed: # sample_data.verify_samples(self.game_info.get_sm()) sample_data.transform_all(self.transformer) level_data = LevelData(len(leveled_data)) for ins, outs in sample_data: level_data.add(ins, outs) log.verbose("Validation split") level_data.validation_split(conf.validation_split) leveled_data.append(level_data) total_samples += len(level_data) log.info("total samples: %s" % total_samples) return leveled_data
def check_running_processes(self): procs, self.procs = self.procs, [] for cmd, proc in procs: retcode = proc.poll() if retcode is not None: log.debug("cmd '%s' exited with return code: %s" % (cmd, retcode)) stdout, stderr = proc.stdout.read().strip(), proc.stderr.read( ).strip() if stdout: log.verbose("stdout:%s" % stdout) if stderr: log.warning("stderr:%s" % stderr) continue self.procs.append((cmd, proc)) if time.time() > self.timeout_time: for cmd, proc in self.procs: if cmd not in self.killing: self.killing.add(cmd) log.warning("cmd '%s' taking too long, terminating" % cmd) os.kill(proc.pid, SIGTERM) if time.time() > self.timeout_time + 1: for cmd, proc in self.procs: if cmd not in self.terminating: self.terminating.add(cmd) log.warning( "cmd '%s' didn't terminate gracefully, killing" % cmd) os.kill(proc.pid, SIGKILL) if self.procs: reactor.callLater(0.1, self.check_running_processes) else: self.cb_on_completion()
def finalise_match(self, last_move): if self.verbose: log.verbose("Played to depth %d" % self.get_game_depth()) log.verbose("Last move %s" % (last_move, )) for ri, role in enumerate(self.sm.get_roles()): score = self.sm.get_goal_value(ri) self.scores[role] = score if self.verbose: log.verbose("Final score for %s : %s " % (role, score)) # Need to do the final move for player for match in self.matches: assert match.do_play(last_move) == "done" # and stop them match.do_stop()