def __init__(self, config): self.config = config # Create session self.session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) # Create networks self.prior_network = PolicyNetwork( scope=config.prior_network, temperature=config.prior_temperature, use_symmetry=config.use_symmetry) self.rollout_network = PolicyNetwork( scope=config.rollout_network, temperature=config.rollout_temperature, reuse=config.prior_network == config.rollout_network, use_symmetry=config.use_symmetry) self.value_network = ValueNetwork( scope=config.value_network, use_symmetry=config.use_symmetry) # Load networks from checkpoints run_dir = util.run_directory(config) util.restore_network_or_fail(self.session, run_dir, self.prior_network) util.restore_network_or_fail(self.session, run_dir, self.rollout_network) util.restore_network_or_fail(self.session, run_dir, self.value_network) # Create queues self.prior_queue = AllQueue() self.rollout_queue = AllQueue(maxsize=16) self.value_queue = AllQueue(maxsize=16) self.new_game()
def create_config(): config = flags.FLAGS config.game = '_'.join( [g.lower() for g in re.findall('[A-Z]?[a-z]+', config.game)]) config.num_actions = Atari.num_actions(config) config.frameskip = eval(str(config.frameskip)) config.input_shape = eval(str(config.input_shape)) config.exploration_frame_shape = eval(str(config.exploration_frame_shape)) config.reward_clipping = config.reward_clipping and not config.reward_scaling config.run_dir = util.run_directory(config) if not config.bootstrapped: config.num_bootstrap_heads = 1 if config. async is None: config.num_threads = 1
def __init__(self, config): self.config = config self.run_dir = util.run_directory(config) self.position_targets = PositionTargets(config, self.run_dir) self.session = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions(allow_growth=True))) self.value_network = ValueNetwork('value') util.restore_or_initialize_network(self.session, self.run_dir, self.value_network) # Train ops self.create_train_op(self.value_network) self.writer = tf.summary.FileWriter(self.run_dir) util.restore_or_initialize_scope(self.session, self.run_dir, self.training_scope.name)
def __init__(self, config): self.config = config session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) self.random_player = RandomPlayer() self.exploratory_network = PolicyNetwork(config.exploratory_network) self.exploratory_player = PolicyPlayer(self.exploratory_network, session) self.playout_network = PolicyNetwork( config.playout_network, reuse=config.exploratory_network == config.playout_network) self.playout_player = PolicyPlayer(self.playout_network, session) self.run_dir = util.run_directory(config) util.restore_network_or_fail(session, self.run_dir, self.exploratory_network) util.restore_network_or_fail(session, self.run_dir, self.playout_network)
def __init__(self, config): self.config = config self.run_dir = util.run_directory(config) self.session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) self.policy_network = PolicyNetwork('policy') self.policy_player = PolicyPlayer(self.policy_network, self.session) util.restore_or_initialize_network(self.session, self.run_dir, self.policy_network) # Train ops self.create_train_op(self.policy_network) self.writer = tf.summary.FileWriter(self.run_dir) util.restore_or_initialize_scope(self.session, self.run_dir, self.training_scope.name) self.opponents = Opponents( [RandomPlayer(), RandomThreatPlayer(), MaxThreatPlayer()]) self.opponents.restore_networks(self.session, self.run_dir)