def testScreenSmallerThanMinimapRaises(self): with self.assertRaises(ValueError): features.Dimensions(screen=84, minimap=100)
def testScreenWidthHeightWithoutMinimapRaises(self): with self.assertRaises(ValueError): features.Dimensions(screen=(84, 80))
def testNoneNoneRaises(self): with self.assertRaises(ValueError): features.Dimensions(screen=None, minimap=None)
def __init__(self, reward_types, map_name=None, unit_type=[], generate_xai_replay=False, xai_replay_dimension=256, verbose=False): if map_name is None: map_name = MAP_NAME maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') register_map(maps_dir, map_name) if generate_xai_replay: aif = features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), rgb_dimensions=sc2_env.Dimensions( screen=(xai_replay_dimension, xai_replay_dimension), minimap=(64, 64), ), action_space=actions.ActionSpace.FEATURES, camera_width_world_units=28, #use_camera_position = True, ) step_mul_value = 4 else: aif = features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), action_space=actions.ActionSpace.FEATURES, camera_width_world_units=50, ) step_mul_value = 16 self.sc2_env = sc2_env.SC2Env( map_name=map_name, agent_interface_format=aif, step_mul=step_mul_value, game_steps_per_episode=0, score_index=0, visualize=True, ) self.current_obs = None self.actions_taken = 0 self.decomposed_rewards = [] self.verbose = verbose self.signal_of_end = False self.end_state = None self.get_income_signal = 2 self.reward_types = reward_types self.last_decomposed_reward_dict = {} self.decomposed_reward_dict = {} for rt in reward_types: self.decomposed_reward_dict[rt] = 0 self.last_decomposed_reward_dict[rt] = 0 unit_type = [ UNIT_TYPES['Marine'], UNIT_TYPES['Viking'], UNIT_TYPES['Colossus'] ] self.input_screen_features = { "PLAYER_RELATIVE": [1, 4], "UNIT_TYPE": unit_type, 'HIT_POINT': 0, 'HIT_POINT_RATIO': 0, 'SHIELD': 0, 'SHIELD_RATIO': 0, 'UNIT_DENSITY': 0 }
def testScreenSizeWithoutMinimapRaises(self): with self.assertRaises(ValueError): features.Dimensions(screen=84)
def run_one_env(config, run_num=0, run_variables=None, rename_if_duplicate=False, output_file=None): # save a copy of the configuration file being used for a run in the run's folder (first time only) if run_variables is None: run_variables = {} restore = True if not os.path.exists(config['model_dir']): restore = False elif rename_if_duplicate: restore = False time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") config['model_dir'] = config['model_dir'] + '_' + time if not restore and not config['inference_only']: os.makedirs(config['model_dir'], exist_ok=True) with open(config['model_dir'] + '/config.json', 'w+') as fp: fp.write(json.dumps(config, indent=4)) # if continuing from another model (say for transfer learning), we are restoring if config['copy_model_from'] != "": restore = True # variables for episode stats max_ep_score = None all_ep_scores = [] last_n_ep_score = [] all_ep_wins = [] last_n_ep_wins = [] win_count = 0 # action use stats actions_used = {} if output_file is not None and not os.path.isfile(output_file): write_summary_file_header(output_file, run_variables) with sc2_env.SC2Env( map_name=config['env']['map_name'], players=[sc2_env.Agent(sc2_env.Race['random'], None)], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(config['env']['screen_size'], config['env']['minimap_size']), action_space=actions.ActionSpace.FEATURES, use_feature_units=True, use_raw_units=True ), visualize=config['env']['visualize'], step_mul=config['env']['step_mul'], realtime=config['inference_only'] and config['inference_only_realtime'] ) as env: tf.reset_default_graph() tf_config = tf.ConfigProto() # tf_config.gpu_options.allow_growth = True with tf.Session(config=tf_config) as sess: if 'use_scripted_bot' in config: if config['use_scripted_bot'] == 'noop': rl_agent = scripted_bots.NoopBot() elif config['use_scripted_bot'] == 'random': rl_agent = scripted_bots.RandomBot(config) elif config['use_scripted_bot'] == 'attack_weakest': rl_agent = scripted_bots.AttackWeakestBot(config) elif config['use_scripted_bot'] == 'attack_weakest_nearest': rl_agent = scripted_bots.AttackWeakestNearestBot(config) else: rl_agent = DQNAgent(sess, config, restore) else: rl_agent = DQNAgent(sess, config, restore) # observations from the env are tuples of 1 Timestep per player obs = env.reset()[0] step = 0 episode = 1 episode_reward = 0 # if we are using evaluation episodes, this will be true during those episodes eval_episode = False # Rewards from the map have to be integers, # and some maps calculate normalized float rewards and then multiply them by some factor. match = re.search(r"factor_(\d+)", config['env']['map_name']) factor = float(match.group(1)) if match else 1.0 # For combat micro maps we may have a shaped reward or not, but we independently want to record win/loss match = re.match(r"^combat", config['env']['map_name']) win_loss = True if match else False new_episode = True while (config['max_steps'] == 0 or step <= config['max_steps']) and (config['max_episodes'] == 0 or episode <= config['max_episodes']): state = preprocess_state(obs, config) available_actions = obs.observation['available_actions'] step_reward = obs.reward / factor if 'step_penalty' in config: step_reward -= config['step_penalty'] episode_reward += step_reward win = 0 terminal = False # handle episode end if obs.step_type is StepType.LAST: terminal = True # if this map type uses this win/loss calc if win_loss: win = get_win_loss(obs) win_count += win if 'episode_extra_win_reward' in config: step_reward += config['episode_extra_win_reward'] * win episode_reward += config['episode_extra_win_reward'] * win if eval_episode: print("Eval Episode", episode, "finished. Steps:", step, "Win:", win, "Score:", episode_reward) else: print("Episode", episode, "finished. Steps:", step, "Win:", win, "Score:", episode_reward) # don't add to run stats if doing an eval episode and not training if not eval_episode or config['train_on_eval_episodes']: if len(last_n_ep_score) == num_eps_summary_last: last_n_ep_score.pop(0) last_n_ep_wins.pop(0) last_n_ep_score.append(episode_reward) last_n_ep_wins.append(win) all_ep_scores.append(episode_reward) all_ep_wins.append(win) if max_ep_score is None or episode_reward > max_ep_score: max_ep_score = episode_reward episode_reward = 0 episode += 1 # check for eval episode. can't have two eval eps in a row. repeat episode num after eval ep if eval_episode and not config['train_on_eval_episodes']: eval_episode = False episode -= 1 else: eval_episode = config['do_eval_episodes'] and episode % config['one_eval_episode_per'] == 0 # we don't take an action (from the perspective of the agent) on a terminal state, so no step++ if not terminal and (not eval_episode or config['train_on_eval_episodes']): step += 1 # observe the reward if this state is not the first of a new episode if not new_episode: rl_agent.observe(terminal=terminal, reward=step_reward, win=win, eval_episode=eval_episode) if not terminal: new_episode = False action = rl_agent.act(state, available_actions, eval_episode=eval_episode) action_for_sc = get_action_function(obs, action, config) if not config['inference_only'] and (not eval_episode or config['train_on_eval_episodes']): action_name = actions.FUNCTIONS[action_for_sc.function].name if action_name not in actions_used: actions_used[action_name] = [0] * (episode - 1) actions_used[action_name].append(1) else: # this action may not have been used for some episode(s) actions_used[action_name] += [0] * (episode - len(actions_used[action_name])) # increment count for this episode actions_used[action_name][-1] += 1 else: # take dummy no_op action if this is a terminal state action_for_sc = actions.FunctionCall(0, []) # if this was a terminal state, the next state is going to be the beginning of an episode new_episode = True # actions passed into env.step() are in a list with one action per player obs = env.step([action_for_sc])[0] # write out run stats to output file if doing a batch if output_file is not None: write_summary_file_line( output_file, last_n_ep_score, all_ep_scores, last_n_ep_wins, all_ep_wins, config, run_num, step, episode, max_ep_score, win_count, run_variables ) if experiments_summary_file is not None: write_summary_file_line( experiments_summary_file, last_n_ep_score, all_ep_scores, last_n_ep_wins, all_ep_wins, config, run_num, step, episode, max_ep_score, win_count ) # write out the stats of which actions were used to a file if training if not config['inference_only']: with open(config['model_dir'] + '/action_stats.csv', 'a+') as f: headers = [] for key in actions_used: headers.append(key) # add 0s to end if needed actions_used[key] += [0] * (episode - len(actions_used[key])) f.write(','.join(val for val in headers) + '\n') # get some key sample_key = "" for key in actions_used: sample_key = key break for i in range(len(actions_used[sample_key])): episode_actions = [] for key in actions_used: episode_actions.append(actions_used[key][i]) f.write(','.join(str(val) for val in episode_actions) + '\n') # print out some results of the run if we are doing inference only not in realtime if config['inference_only'] and not config['inference_only_realtime']: print('Inference_only summary for', config['model_dir'] + ':') print('Num episodes:', episode - 1) print('Win rate:', win_count / (episode - 1)) print('Average score:', sum(all_ep_scores) / (episode - 1)) print('Max score:', max_ep_score)
def run_game_with_agent(agent, mapname, iterations): ### dqn parameters frame_num = 4 state_size = [84, 84, 7 * frame_num] action_size = 2 learning_rate = 0.001 eps_f = 0.05 eps_s = 1.00 # Q learning hyperparameters gamma = 0.95 # Discounting rate ### TRAINING HYPERPARAMETERS total_episodes = 10 #prev 2000 # Total episodes for training batch_size = 2 #prev 100 iter_num = 10 #prev 20 ### Experience HYPERPARAMETERS print("pre training!") pretrain_length = batch_size # Number of experiences stored in the Memory when initialized for the first time experience_size = 3 * batch_size #prev 800 dqn_sc2 = DQN(state_size, action_size, learning_rate) game_data = [] exp = experience(experience_size) decay_rate = 0.0005 #prev 0.0005 with sc2_env.SC2Env( map_name=mapname, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=84, minimap=64), use_feature_units=True), step_mul=100, #if too low, nothing really happens visualize=True, game_steps_per_episode=0) as env: agent.setup(env.observation_spec(), env.action_spec()) #pretrain state = State(4) for i in range(pretrain_length): # print("Playing game {}".format(i+1)) timesteps = env.reset() agent.reset() state.reinitialize(transform_state(timesteps)) #initialize state while True: # print("new") state_old = state [step_actions, step_actions_array] = random_step(timesteps[0]) # print("random action: ", step_actions) done = False if timesteps[0].last(): done = True reward = get_reward(timesteps[0], done) timesteps = env.step([step_actions]) state.add(transform_state(timesteps)) # for layer in state[0]: # # print("layer: ", layer.shape) # print ("Is zero?: ", (np.zeros(layer.shape) == layer).all()) exp.add([ state_old.get(), step_actions_array, reward, state.get(), done ]) if timesteps[0].last(): break # print("timesteps [0] : ", type(timesteps[0])) #train agent.setup(env.observation_spec(), env.action_spec()) saver = tf.train.Saver() # total_learning_episodes =10 agent.reset() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) time = 0 #initialize time for i in range(iter_num): train_iteration(env, sess, agent, exp, saver, total_episodes, dqn_sc2, eps_s, eps_f, decay_rate, time, batch_size, gamma) test(env, agent, saver, dqn_sc2, exp) return []
def training_game(): env = Environment( map_name="HallucinIce", visualize=True, game_steps_per_episode=150, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=64, minimap=32))) input_shape = (_SIZE, _SIZE, 1) nb_actions = _SIZE * _SIZE # Should this be an integer model = neural_network_model(input_shape, nb_actions) # memory : how many subsequent observations should be provided to the network? memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() ### Policy # Agent´s behaviour function. How the agent pick actions # LinearAnnealedPolicy is a wrapper that transforms the policy into a linear incremental linear solution . Then why im not see LAP with other than not greedy ? # EpsGreedyQPolicy is a way of selecting random actions with uniform distributions from a set of actions . Select an action that can give max or min rewards # BolztmanQPolicy . Assumption that it follows a Boltzman distribution. gives the probability that a system will be in a certain state as a function of that state´s energy?? policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0, nb_steps=1e6) # policy = (BoltzmanQPolicy( tau=1., clip= (-500,500)) #clip defined in between -500 / 500 ### Agent # Double Q-learning ( combines Q-Learning with a deep Neural Network ) # Q Learning -- Bellman equation dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor) dqn.compile(Adam(lr=.001), metrics=["mae"]) ## Save the parameters and upload them when needed name = "HallucinIce" w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) callbacks = [ModelIntervalCheckpoint(check_w_file, interval=1000)] callbacks += [FileLogger(log_file, interval=100)] if LOAD_MODEL: dqn.load_weights(w_file) dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
def main(unused_argv): try: while True: with sc2_env.SC2Env( map_name="MoveToBeacon", players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=84, minimap=64), # default size of feature screen and feature minimap use_feature_units=True), step_mul= 64, # 16 gives roughly 150 apm (8 would give 300 apm) # larger num here makes it run faster game_steps_per_episode=0, visualize=True) as env: # create a keras-rl env keras_env = PySC2ToKerasRL_env(env) obs = keras_env.reset() # create an agent that can interact with # Test Agent (makes marine run in circle) # keras_agent = MoveToBeacon_KerasRL() # keras_agent.reset() # while True: #play the game # # step_actions = keras_agent.step(obs) # obs, reward, done, info = keras_env.step(step_actions) # Replace simple agent with a learning one # A simple model (taken from Keras-RL cartpole dqn) nb_actions = keras_env.action_space.n model = Sequential() model.add( Flatten(input_shape=(1, ) + keras_env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) output_filename = "DQN_Rewards_smallerObs_smallerActions.csv" #some other model # model = Sequential() # model.add(Flatten(input_shape=(1,) + keras_env.observation_space.shape)) # model.add(Dense(16)) # model.add(Activation('relu')) # model.add(Dense(16)) # model.add(Activation('relu')) # model.add(Dense(16)) # model.add(Activation('relu')) # model.add(Dense(nb_actions)) # model.add(Activation('linear')) # print(model.summary()) #output_filename = "DQN Rewards.csv" # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=15, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! (hopefully) hist = dqn.fit(keras_env, nb_steps=50000, visualize=False, verbose=2) with open(output_filename, 'w+', newline='') as csvfile: #save the rewards over time writer = csv.writer(csvfile) writer.writerow(hist.history.get('episode_reward')) break #kill the env except KeyboardInterrupt: pass
def testNegativeMinimapTupleRaises(self): with self.assertRaises(ValueError): features.Dimensions(screen=64, minimap=(-32, -32))
def __init__(self, map_name='MoveToBeacon', visualize=False, screen_dim=32, minimap_dim=32, mock=False): super().__init__() self.settings = { 'map_name': map_name, 'players': [sc2_env.Agent(sc2_env.Race.terran)], # true for all mini-games 'agent_interface_format': features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=screen_dim, minimap=minimap_dim), use_feature_units=True), 'step_mul': 8, # how many game steps pass between actions; default is 8, which is 300APM, 16 means 150APM 'game_steps_per_episode': 0, # the fixed length of a game, if 0: as long as needed 'visualize': visualize, # whether to draw the game } # see https://github.com/deepmind/pysc2/blob/master/docs/mini_games.md if map_name == "MoveToBeacon": # Fog of War disabled # No camera movement required (single-screen) action_set = ActionSet.Select_Army_Move_2D elif map_name == "CollectMineralShards": # Fog of War disabled # No camera movement required (single-screen) # action_set = ActionSet.Select_Army_Move_2D action_set = ActionSet.Select_Multi_Move_2D elif map_name == "FindAndDefeatZerglings": # Fog of War enabled # Camera movement required (map is larger than single-screen) # action_set = ActionSet.Select_Army_Attack_2D action_set = ActionSet.Attack_2D_Move_Camera elif map_name == "DefeatRoaches": # Fog of War disabled # No camera movement required (single-screen) # action_set = ActionSet.Select_Army_Attack_2D action_set = ActionSet.Select_Multi_Move_Attack_2D elif map_name == "DefeatZerglingsAndBanelings": # Fog of War disabled # No camera movement required (single-screen) # action_set = ActionSet.Select_Army_Attack_2D action_set = ActionSet.Select_Multi_Move_Attack_2D elif map_name == "CollectMineralsAndGas": # Fog of War disabled # No camera movement required (single-screen) action_set = ActionSet.Build_SCVs elif map_name == "BuildMarines": # Fog of War disabled # No camera movement required (single-screen) action_set = ActionSet.Build_Marines else: raise ValueError("map is not supported") self.mock = mock self._wrapped_env = self._init_env() self._space_converter = SpaceConverter( action_set=action_set, action_spec=self._wrapped_env.action_spec(), observation_spec=self._wrapped_env.observation_spec(), env_settings=self.settings, screen_dim=screen_dim) self.action_space = self._space_converter.get_action_space() self.observation_space = self._space_converter.get_observation_space() self._cur_timestep = None
def testNegativeScreenTupleRaises(self): with self.assertRaises(ValueError): features.Dimensions(screen=(-64, -64), minimap=32)
def testThreeTupleMinimapRaises(self): with self.assertRaises(ValueError): features.Dimensions(screen=64, minimap=(1, 2, 3))
def testTwoZeroesRaises(self): with self.assertRaises(ValueError): features.Dimensions(screen=(0, 0), minimap=(0, 0))
class Env: metadata = {'render.modes': ['human']} default_settings = { 'map_name': "CollectMineralShards", 'players': [sc2_env.Agent(sc2_env.Race.terran)], 'agent_interface_format': features.AgentInterfaceFormat(feature_dimensions=features.Dimensions( screen=64, minimap=64), action_space=actions.ActionSpace.RAW, use_raw_units=True, raw_resolution=64), 'step_mul': 2, 'game_steps_per_episode': 0, 'visualize': True, 'realtime': False } def __init__(self, **kwargs): super().__init__() self.kwargs = kwargs self.env = None self.marine1 = None self.marine2 = None self.marine1_ID = None self.marine2_ID = None self.action_counter = 0 self.state = np.zeros([3, 64, 64]) def reset(self): if self.env is None: self.init_env() self.marine1 = None self.marine2 = None self.action_counter = 0 self.state = np.zeros([3, 64, 64]) raw_obs = self.env.reset()[0] return self.get_state_from_obs(raw_obs, True) def init_env(self): args = {**self.default_settings, **self.kwargs} self.env = sc2_env.SC2Env(**args) def get_state_from_obs(self, raw_obs, reset): marines = self.get_units_by_type(raw_obs, units.Terran.Marine) if reset: self.marine1_ID = marines[0].tag self.marine2_ID = marines[1].tag self.marine1 = marines[0] self.marine2 = marines[1] else: if self.marine1_ID == marines[0].tag: self.marine1 = marines[0] self.marine2 = marines[1] elif self.marine1_ID == marines[1].tag: self.marine1 = marines[1] self.marine2 = marines[0] else: assert False shard_matrix = np.array( raw_obs.observation.feature_minimap.player_relative) shard_matrix[shard_matrix < 2] = 0 marine1_matrix = np.zeros([64, 64]) marine1_matrix[self.marine1.x, int(self.marine1.y)] = 1 marine2_matrix = np.zeros([64, 64]) marine2_matrix[self.marine2.x, int(self.marine2.y)] = 2 self.state = np.stack([shard_matrix, marine1_matrix, marine2_matrix], axis=0) return self.state def step(self, action): raw_obs = self.take_action(action) new_state = self.get_state_from_obs(raw_obs, False) return new_state, int(raw_obs.reward), raw_obs.last() def take_action(self, action): if action < 4096: x = action % 64 y = int(action / 64) mapped_action = actions.RAW_FUNCTIONS.Move_pt( "now", self.marine1.tag, [x, y]) else: action = action - 4096 x = action % 64 y = int(action / 64) mapped_action = actions.RAW_FUNCTIONS.Move_pt( "now", self.marine2.tag, [x, y]) raw_obs = self.env.step([mapped_action])[0] return raw_obs def get_units_by_type(self, obs, unit_type): unit_list = [] for unit in obs.observation.raw_units: if unit.unit_type == unit_type: unit_list.append(unit) return unit_list def close(self): if self.env is not None: self.env.close() super().close()
def run_game_with_agent(agent, mapname, iterations): ### dqn parameters frame_num = 4 state_size = [84, 84, 7 * frame_num] action_size = 32 + 40 + 1 learning_rate = 0.00025 eps_f = 0.1 eps_s = 1.00 # Q learning hyperparameters gamma = 0.95 # Discounting rate ### TRAINING HYPERPARAMETERS total_episodes = 200 #prev 2000 # Total episodes for training batch_size = 10 #prev 100 iter_num = 10 #prev 20 ### Experience HYPERPARAMETERS print("pre training!") pretrain_length = batch_size # Number of experiences stored in the Memory when initialized for the first time experience_size = 3 * batch_size #prev 800 dqn_sc2 = DQN(state_size, action_size, learning_rate, name="dqn_sc2") dqn_target = DQN(state_size, action_size, learning_rate, name="dqn_target") game_data = [] exp = experience(experience_size) decay_rate = 0.0000009 #prev 0.0005 with sc2_env.SC2Env( map_name=mapname, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=84, minimap=64), use_feature_units=True), step_mul=20, #if too low, nothing really happens visualize=True, game_steps_per_episode=0) as env: agent.setup(env.observation_spec(), env.action_spec()) #pretrain state = State(4) for i in range(pretrain_length): # print("Playing game {}".format(i+1)) timesteps = env.reset() agent.reset() state.reinitialize(transform_state(timesteps)) #initialize state while True: # print("new") state_old = state [step_actions, step_actions_array] = random_step(timesteps[0]) # print("random action: ", step_actions) done = False if timesteps[0].last(): done = True reward = get_reward(timesteps[0], done) timesteps = env.step([step_actions]) state.add(transform_state(timesteps)) # for layer in state[0]: # # print("layer: ", layer.shape) # print ("Is zero?: ", (np.zeros(layer.shape) == layer).all()) exp.add([ state_old.get(), step_actions_array, reward, state.get(), done ]) if timesteps[0].last(): break # print("timesteps [0] : ", type(timesteps[0])) #train agent.setup(env.observation_spec(), env.action_spec()) saver = tf.train.Saver() # total_learning_episodes =10 agent.reset() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) time = 0 #initialize time for i in range(iter_num): time = train_iteration(env, sess, agent, exp, saver, total_episodes, dqn_sc2, dqn_target, eps_s, eps_f, decay_rate, time, batch_size, gamma) # #execute with what we have # print("testing!") # with tf.Session() as sess: # agent.setup(env.observation_spec(), env.action_spec()) # # Load the model # saver.restore(sess, "./models/dqn_split.ckpt") # state = State(4) # for i in range(10): # agent.reset() # timesteps = env.reset() # state.reinitialize(transform_state(timesteps) ) # totalScore = 0 # done=False # while not done: # # Take the biggest Q value (= the best action) # Qs = sess.run(dqn_sc2.Yhat, feed_dict = {dqn_sc2.inputs_: state.get().reshape((1,*(state.get().shape) ))}) #run NN # [step_actions , step_actions_array] =argmax_action(np.argmax(Qs),timesteps[0] ) # state_old = state # timesteps = env.step([step_actions]) # state.add(transform_state(timesteps)) # if timesteps[0].last(): # done=True # reward = get_reward(timesteps[0],done) # exp.add([state_old.get(),step_actions_array, reward, state.get(), done]) # # print("Score: ", reward) # totalScore += reward # print("TOTAL_SCORE", totalScore) return []
def main(unused_argv): agent = terranAgent() try: while True: with sc2_env.SC2Env( map_name="HellionZerglings", players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=84, minimap=64), use_feature_units=True), step_mul=16, game_steps_per_episode=0, visualize=True) as env: agent.setup(env.observation_spec(), env.action_spec()) timesteps = env.reset() agent.reset() agent.terminal = False agent.s, agent.readout, agent.h_fc1 = agent.createNetwork() # define the cost function agent.a = tf.placeholder("float", [None, ACTIONS]) agent.y = tf.placeholder("float", [None]) agent.readout_action = tf.reduce_sum(tf.multiply( agent.readout, agent.a), reduction_indices=1) agent.cost = tf.reduce_mean( tf.square(agent.y - agent.readout_action)) agent.train_step = tf.train.AdamOptimizer(1e-6).minimize( agent.cost) # store the previous observations in replay memory agent.D = deque() # printing agent.a_file = open("logs_" + GAME + "/readout.txt", 'w') agent.h_file = open("logs_" + GAME + "/hidden.txt", 'w') agent.x_t = agent.grab_screen(timesteps[0]) print("SHAPE: {}".format(agent.x_t.shape)) agent.r_t = -0.1 #agent.ret, agent.x_t = cv2.threshold(agent.x_t, 1, 255, cv2.THRESH_BINARY) agent.s_t = np.stack( (agent.x_t, agent.x_t, agent.x_t, agent.x_t), axis=2) agent.a_t = np.zeros([ACTIONS]) agent.a_t[0] = 1 agent.action_index = 0 agent.total_reward = 0 agent.readout_t = 0 agent.sess = tf.InteractiveSession() # saving and loading networks agent.saver = tf.train.Saver() agent.sess.run(tf.initialize_all_variables()) agent.checkpoint = tf.train.get_checkpoint_state( "saved_networks") # if agent.checkpoint and agent.checkpoint.model_checkpoint_path: # agent.saver.restore(agent.sess, agent.checkpoint.model_checkpoint_path) # print("Successfully loaded:", agent.checkpoint.model_checkpoint_path) # else: # print("Could not find old network weights") agent.epsilon = INITIAL_EPSILON agent.t = 0 while True: step_actions = [agent.step(timesteps[0])] timesteps = env.step(step_actions) except KeyboardInterrupt: pass
def __init__(self, map_name = None, unit_type = [], generate_xai_replay = False, xai_replay_dimension = 256, verbose = False): if map_name is None: map_name = MAP_NAME maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') print("map director: " + str(maps_dir)) register_map(maps_dir, map_name) if generate_xai_replay: aif=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), rgb_dimensions=sc2_env.Dimensions( screen=(1.5*xai_replay_dimension, xai_replay_dimension), minimap=(64, 64), ), action_space=actions.ActionSpace.FEATURES, camera_width_world_units = 28, #use_camera_position = True, ) step_mul_value = 4 # step_mul_value = 16 else: aif=features.AgentInterfaceFormat( feature_dimensions = features.Dimensions(screen = SCREEN_SIZE, minimap = SCREEN_SIZE), action_space = actions.ActionSpace.FEATURES, camera_width_world_units = 100, ) step_mul_value = 16 np.set_printoptions(threshold=sys.maxsize,linewidth=sys.maxsize, precision = 2) self.sc2_env = sc2_env.SC2Env( map_name = map_name, agent_interface_format = aif, step_mul = step_mul_value, game_steps_per_episode = 0, score_index = 0, visualize = True,) self.current_obs = None self.decomposed_rewards = [] self.verbose = verbose self.miner_index = 0 self.reset_steps = -1 self.mineral_limiation = 1500 self.norm_vector = np.array([700, 50, 40, 20, 50, 40, 20, 3, 50, 40, 20, 50, 40, 20, 3, 50, 40, 20, 50, 40, 20, 50, 40, 20, 50, 40, 20, 2000, 2000, 2000, 2000, 40]) self.decision_point = 1 self.signal_of_end = False self.end_state = None self.maker_cost_np = np.zeros(len(maker_cost)) # Have to change the combine func if this changed self.pylon_cost = 300 self.pylon_index = 7 for i, mc in enumerate(maker_cost.values()): self.maker_cost_np[i] = mc self.last_decomposed_reward_dict = {} self.decomposed_reward_dict = {} self.num_waves = 0 maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') action_dict_path = os.path.join(os.path.dirname(__file__), 'action_1500_tow_2L.pt') print("actions path:" + action_dict_path) self.a_dict = torch.load(action_dict_path) self.action_space = self.a_dict['actions'] self.action_space_dict = self.a_dict['mineral'] # print(self.a_dict.keys()) # at the end of the reward type name: # 1 means for player 1 is positive, for player 2 is negative # 2 means for player 2 is positive, for player 1 is negative self.reward_types = list(reward_dict.values()) # print(self.reward_types) for rt in self.reward_types: self.decomposed_reward_dict[rt] = 0 self.last_decomposed_reward_dict[rt] = 0 unit_type = [UNIT_TYPES['Marine'], UNIT_TYPES['Baneling'], UNIT_TYPES['Immortal']]
def main(unused_argv): agent = AiBot() epsilon = 0 epsilon_min = 0 eps_reduction_factor = 0.99 save_game = False episode = 0 path = "" save_buffer = False last_100 = deque(maxlen=100) iter = 1 agent.actor_critic_agent = ActorCriticAgent(15, [ "no_op", "build_scv", "build_supply_depot", "build_marine", "build_barracks", "return_scv", "attack" ], epsilon) game_results = [] latest_result = 0 all_rewards = [] # Plot setup fig, ax = plt.subplots(num=1) ax.plot() ax.set_title("Score for each game over time") fig2, ax2 = plt.subplots(num=3) ax2.plot() ax2.set_title("win%") fig3, ax3 = plt.subplots(num=4) ax3.plot() ax3.set_title("totalreward") try: with sc2_env.SC2Env( map_name="AbyssalReef", players=[ sc2_env.Agent(sc2_env.Race.terran), sc2_env.Bot(sc2_env.Race.terran, sc2_env.Difficulty.medium) ], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=84, minimap=64), use_feature_units=True, use_raw_units=True, use_camera_position=True), step_mul=8, game_steps_per_episode=30000, visualize=False, disable_fog=True) as env: while True: agent.setup(env.observation_spec(), env.action_spec()) timesteps = env.reset() agent.actor_critic_agent.build_index = 0 if os.path.isfile('good_buffer.data'): filehandler1 = open("good_buffer.data", 'rb') agent.actor_critic_agent.good_buffer = pickle.load( filehandler1) agent.reset() if episode > 0: all_rewards = all_rewards + [ agent.actor_critic_agent.total_reward ] with open('all_rewards.txt', mode='w') as filehandle: for i in all_rewards: filehandle.write("%s\n" % i) game_results = game_results + [latest_result] with open('game_results.txt', mode='w') as filehandle: for i in game_results: filehandle.write("%s\n" % i) episode += 1 if agent.actor_critic_agent.epsilon > epsilon_min: agent.actor_critic_agent.epsilon *= eps_reduction_factor if agent.actor_critic_agent.buffer_epsilon > agent.actor_critic_agent.buffer_epsilon_min: agent.actor_critic_agent.buffer_epsilon *= agent.actor_critic_agent.buffer_epsilon_decay if agent.actor_critic_agent.actor.IMITATION_WEIGHT > 0.0001: agent.actor_critic_agent.actor.IMITATION_WEIGHT = imitation_factor( episode) else: agent.actor_critic_agent.actor.IMITATION_WEIGHT = 0.0001 print("Imitation weight: ", agent.actor_critic_agent.actor.IMITATION_WEIGHT) # For determining win/loss/tie agent.reward = 0 # For plotting total reward agent.actor_critic_agent.total_reward = 0 while True: step_actions = [agent.step(timesteps[0], epsilon, episode)] if timesteps[0].last(): state, oldscore, map = agent.game_state.get_state_now( timesteps[0]) # If it won if agent.reward == 1: last_100.append(1) latest_result = 1 end_reward = 100 # If it lost elif agent.reward == -1: last_100.append(0) latest_result = 0 end_reward = -100 # If time's up else: last_100.append(0) latest_result = 0 end_reward = -5 agent.actor_critic_agent.total_reward += end_reward if agent.actor_critic_agent.GOOD_GAME: agent.actor_critic_agent.good_buffer.append([ agent.actor_critic_agent.prev_state[0], agent.actor_critic_agent.prev_actions, end_reward, state[0], True ]) else: agent.actor_critic_agent.buffer.append([ agent.actor_critic_agent.prev_state[0], agent.actor_critic_agent.prev_actions, end_reward, state[0], True ]) if save_buffer and agent.reward == 1: filehandler = open("good_buffer.data", 'wb') pickle.dump(agent.actor_critic_agent.good_buffer, filehandler) print(len(agent.actor_critic_agent.good_buffer)) if save_game: agent.save_game(path, episode) print("Score: ", timesteps[0].observation.score_cumulative.score) ax.scatter( episode, timesteps[0].observation.score_cumulative.score, s=3, c='blue') fig.savefig("score.png") if len(last_100) == last_100.maxlen: percent = sum(last_100) / 100 ax2.scatter(iter, percent, s=3, c='blue') iter += 1 fig2.savefig("winpercent.png") ax3.scatter(episode, agent.actor_critic_agent.total_reward, s=3, c='blue') fig3.savefig("total_reward.png") break timesteps = env.step(step_actions) except KeyboardInterrupt: pass
def eval_genomes(genomes, config): global global_stats if params.parallel: """ MULTITHREADING - currently creating threads equal to number of agents and divides genomes between them speed increase caps out at 4 to 5 threads """ # TODO environment timeout handling in rare case of environment failing thread_list = [] queue_lock = threading.Lock() # create queues agent_queue = queue.Queue(maxsize=params.num_of_agents) env_queue = queue.Queue(maxsize=params.num_of_agents) results = {} queue_lock.acquire() for agent in agents: agent_queue.put(agent) for env in envs: env_queue.put(env) queue_lock.release() # create threads and divide genomes between them for evaluation thread_id = 1 total_genomes = len(genomes) per_thread = int(total_genomes / params.num_of_agents) for i in range(params.num_of_agents): thread = WorkerThread( thread_id=thread_id, genomes=genomes[i * per_thread:i * per_thread + per_thread], agent_queue=agent_queue, env_queue=env_queue, lock=queue_lock, config=config, results=results) thread_list.append(thread) # if need to send genome to each thread # for genome_id, genome in genomes: # thread = WorkerThread(thread_id=thread_id, genome=genome, agent_queue=agent_queue, env_queue=env_queue, # lock=queue_lock, config=config, results=results) # thread_list.append(thread) # thread_id += 1 # start and join threads for one generation of genomes for thread in thread_list: thread.start() for thread in thread_list: thread.join() # collect and assign results for genome_id, genome in genomes: # assuming genome_id is the same as genome.key genome.fitness = results[genome_id] else: # serialised evaluation - env timeouts are handled agent, env = retrieve_agent_and_environment() for genome_id, genome in genomes: while True: try: genome.fitness = eval_single_genome( genome, config, agent, env) break except KeyboardInterrupt: exit() except Exception as e: print(e) print("Exception during eval_single_genome") if isinstance(e, protocol.ConnectionError): print("timeout issues") print("plotting graphs and pickling results") print("attempting to restart env") pickle_results([], good_genomes, []) plot_graphs(config, global_stats, display=False, winner=None) # close env env.close() # start new env env = sc2_env.SC2Env( map_name=params.map, players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions( screen=params.dimensions, minimap=params.dimensions), use_feature_units=True), step_mul=params.step_mul, game_steps_per_episode=params.game_steps, visualize=params.visualize, random_seed=1) # reinitialise agent agent.reinitialize() # eval single genome should run again at top of while loop print("Genome ID {}, Genome fitness {}".format( genome_id, genome.fitness)) save_good_genome(genome, agent.genome_threshold) return_agent_and_environment(agent, env)
def run_thread(agent, map_name): while True: try: print("\nStarting episode %s for agent %s ..." % (agent.episode, agent.id)) clean_sc2_temp_folder(tmp_maps_path, 8, 90) agent.rollouts_manager.empty_dict_rollouts() agent.episode_values = [] agent.episode_cumulated_reward = 0 agent.episode_step_count = 0 agent.current_episode_actions = [] agent.current_episode_rewards = [] agent.current_episode_values = [] L_players = [sc2_env.Agent(sc2_env.Race.terran)] with sc2_env.SC2Env( map_name=map_name, players=L_players, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions( screen=params['resolution'], minimap=params['resolution']), use_feature_units=True), step_mul=params['step_mul'], game_steps_per_episode=0, visualize=False, disable_fog=True) as env: agent.setup(env.observation_spec(), env.action_spec()) timesteps = env.reset() agent.reset() global start_time start_time = time.time() while True: step_actions = [agent.step(timesteps[0])] if timesteps[0].last(): break timesteps = env.step(step_actions) print("\nEpisode over for agent %s ..." % agent.id) #Summary parameters : available_actions_ratio = len( agent.current_episode_unique_actions) / len( agent.current_episode_available_actions) summary = tf.Summary() summary.value.add(tag='Perf/1_Reward', simple_value=float( agent.episode_cumulated_reward)) summary.value.add(tag='Perf/2_Distinct actions', simple_value=float( len(agent.current_episode_unique_actions))) summary.value.add(tag='Perf/3_Average advantage', simple_value=float(np.mean(agent.advantages))) summary.value.add(tag='Perf/4_Previous actions ratio', simple_value=float(agent.previous_actions_ratio)) summary.value.add(tag='Perf/5_Average value', simple_value=float(agent.average_value)) summary.value.add(tag='Perf/6_Available actions ratio', simple_value=float(available_actions_ratio)) summary.value.add(tag='Perf/7_Average agent return', simple_value=float(np.mean(agent.agent_return))) summary.value.add(tag='Perf/8_Random policy', simple_value=float(agent.random_policy)) summary.value.add(tag='Perf/9_Episode length', simple_value=float( agent.current_episode_step_count)) summary.value.add(tag='Losses/1_Value loss', simple_value=float(agent.value_loss)) summary.value.add(tag='Losses/2_Policy loss', simple_value=float(agent.global_policy_loss)) summary.value.add(tag='Losses/3_Entropy loss', simple_value=float(agent.entropy)) summary.value.add(tag='Losses/4_Network loss', simple_value=float(agent.network_loss)) #summary.value.add(tag='Losses/5_Grad norm', simple_value=float(agent.grad_norms)) #summary.value.add(tag='Losses/6_Var norm', simple_value=float(agent.var_norms)) for label in agent.dict_policy.keys(): policy = agent.dict_policy[label][0] policy_len = len(policy) indexed_label = agent.index_label( label) + ' | (%s)' % policy_len summary.value.add(tag=indexed_label, histo=build_histo_summary( policy, policy_len)) agent.summary_writer.add_summary(summary, agent.episode) agent.summary_writer.flush() if agent.episode > 0 and agent.episode % 20 == 0: session_path = training_path + "sessions\\model_episode_%s.cptk" % ( str(agent.episode)) build_path(session_path) saver.save(sess, session_path) print("\nModel saved") agent.episode += 1 except KeyboardInterrupt: break except pysc2.lib.remote_controller.RequestError: print( "\n\npysc2.lib.remote_controller.RequestError for worker %s\n\n" % agent.name) env.close() print("\n\nenvironment closed for worker %s\n\n" % agent.name) time.sleep(2) pass except pysc2.lib.remote_controller.ConnectError: print() except pysc2.lib.protocol.ConnectionError: print("\n\npysc2.lib.protocol.ConnectionError for worker %s\n\n" % agent.name) #Picked from "https://github.com/inoryy/reaver-pysc2/blob/master/reaver/envs/sc2.py#L57-L69" # hacky fix from websocket timeout issue... # this results in faulty reward signals, but I guess it beats completely crashing... env.close()
MAPNAME = 'Simple64' APM = 300 APM = int(APM / 18.75) UNLIMIT = 0 VISUALIZE = True REALTIME = True SCREEN_SIZE = 84 MINIMAP_SIZE = 64 players = [sc2_env.Agent(sc2_env.Race.terran),\ sc2_env.Bot(sc2_env.Race.zerg,\ sc2_env.Difficulty.very_easy)] interface = features.AgentInterfaceFormat(\ feature_dimensions = features.Dimensions(\ screen = SCREEN_SIZE, minimap = MINIMAP_SIZE), use_feature_units = True) class Agent(base_agent.BaseAgent): def step(self, obs): super(Agent, self).step(obs) return actions.FUNCTIONS.no_op() def main(args): agent = Agent() try: with sc2_env.SC2Env(map_name = MAPNAME, players = players,\ agent_interface_format = interface,\ step_mul = APM, game_steps_per_episode = UNLIMIT,\ visualize = VISUALIZE, realtime = REALTIME) as env:
player_id: 1 minerals: 0 vespene: 0 food_cap: 10 food_used: 0 food_army: 0 food_workers: 0 idle_worker_count: 0 army_count: 0 warp_gate_count: 0 larva_count: 0 } game_loop: 20 """ RECTANGULAR_DIMENSIONS = features.Dimensions(screen=(84, 80), minimap=(64, 67)) SQUARE_DIMENSIONS = features.Dimensions(screen=84, minimap=64) class AvailableActionsTest(absltest.TestCase): always_expected = { "no_op", "move_camera", "select_point", "select_rect", "select_control_group" } def setUp(self): super(AvailableActionsTest, self).setUp() self.obs = text_format.Parse(observation_text_proto, sc_pb.Observation()) self.hideSpecificActions(True)
def __init__(self, reward_types, map_name = None, unit_type = [], generate_xai_replay = False, xai_replay_dimension = 256, verbose = False): if map_name is None: map_name = MAP_NAME maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') print("map director: " + str(maps_dir)) register_map(maps_dir, map_name) if generate_xai_replay: aif=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE), rgb_dimensions=sc2_env.Dimensions( screen=(xai_replay_dimension, xai_replay_dimension), minimap=(64, 64), ), action_space=actions.ActionSpace.FEATURES, camera_width_world_units = 28, #use_camera_position = True, ) step_mul_value = 4 else: aif=features.AgentInterfaceFormat( feature_dimensions = features.Dimensions(screen = SCREEN_SIZE, minimap = SCREEN_SIZE), action_space = actions.ActionSpace.FEATURES, camera_width_world_units = 100, ) np.set_printoptions(threshold=sys.maxsize,linewidth=sys.maxsize, precision = 1) step_mul_value = 16 self.sc2_env = sc2_env.SC2Env( map_name = map_name, agent_interface_format = aif, step_mul = step_mul_value, game_steps_per_episode = 0, score_index = 0, visualize = True,) self.current_obs = None self.actions_taken = 0 self.decomposed_rewards = [] self.verbose = verbose self.decision_point = 1 self.miner_index = 12 self.reset_steps = -1 self.norm_vector = np.array([1, 1, 1, 1, 50, 50, 1, 1, 1, 1, 50, 50, 100]) self.signal_of_end = False self.end_state = None self.maker_cost_np = np.zeros(len(maker_cost)) for i, mc in enumerate(maker_cost.values()): self.maker_cost_np[i] = mc self.reward_types = reward_types self.last_decomposed_reward_dict = {} self.decomposed_reward_dict = {} for rt in reward_types: self.decomposed_reward_dict[rt] = 0 self.last_decomposed_reward_dict[rt] = 0 unit_type = [UNIT_TYPES['Marine'], UNIT_TYPES['Viking'], UNIT_TYPES['Colossus']] self.input_screen_features = { "PLAYER_RELATIVE":[1, 4], "UNIT_TYPE": unit_type, 'HIT_POINT': 0, 'HIT_POINT_RATIO': 0, 'SHIELD': 0, 'SHIELD_RATIO': 0, 'UNIT_DENSITY': 0 }
def testScreenWidthWithoutHeightRaises(self): with self.assertRaises(ValueError): features.Dimensions(screen=(84, 0), minimap=64)
import sc2agents.learning.deep.keras.models as keras_models DEFAULT_MINIMAP_SIZE = 64 DEFAULT_SCREEN_SIZE = 84 DEFAULT_STEP_MUL = 32 DEFAULT_DIFFICULTY = sc2_env.Difficulty.very_easy DEFAULT_GAME_SPECIFICATION = { 'map_name': "Eastwatch", 'players': [ sc2_env.Agent(sc2_env.Race.terran), sc2_env.Bot(sc2_env.Race.zerg, DEFAULT_DIFFICULTY) ], 'agent_interface_format': features.AgentInterfaceFormat(feature_dimensions=features.Dimensions( screen=DEFAULT_SCREEN_SIZE, minimap=DEFAULT_MINIMAP_SIZE), use_feature_units=True), 'step_mul': DEFAULT_STEP_MUL, 'game_steps_per_episode': 0, 'visualize': False # 'ensure_available_actions': False #TODO enable after pysc2 release } def run_game(agent, game_specification=None): if game_specification is None: game_specification = DEFAULT_GAME_SPECIFICATION with sc2_env.SC2Env(**game_specification) as env:
def testMinimapWidthAndHeightWithoutScreenRaises(self): with self.assertRaises(ValueError): features.Dimensions(minimap=(64, 67))
def training_game(): env = Environment( map_name="ForceField", visualize=True, game_steps_per_episode=150, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=64, minimap=32))) input_shape = (_SIZE, _SIZE, 1) nb_actions = 12 # Number of actions model = neural_network_model(input_shape, nb_actions) memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() # Policy policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.2, value_test=.0, nb_steps=1e2) # Agent dqn = DQNAgent( model=model, nb_actions=nb_actions, memory=memory, enable_double_dqn=True, enable_dueling_network=True, # 2019-07-12 GU Zhan (Sam) # nb_steps_warmup=500, target_model_update=1e-2, policy=policy, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor, delta_clip=1) dqn.compile(Adam(lr=.001), metrics=["mae", "acc"]) # Tensorboard callback timestamp = f"{datetime.datetime.now():%Y-%m-%d %I:%M%p}" # 2019-07-12 GU Zhan (Sam) folder name for Lunux: # callbacks = keras.callbacks.TensorBoard(log_dir='./Graph/'+ timestamp, histogram_freq=0, # write_graph=True, write_images=False) # 2019-07-12 GU Zhan (Sam) folder name for Windows: callbacks = keras.callbacks.TensorBoard(log_dir='.\Graph\issgz', histogram_freq=0, write_graph=True, write_images=False) # Save the parameters and upload them when needed name = "agent" w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) if LOAD_MODEL: dqn.load_weights(w_file) class Saver(Callback): def on_episode_end(self, episode, logs={}): if episode % 200 == 0: self.model.save_weights(w_file, overwrite=True) s = Saver() logs = FileLogger('DQN_Agent_log.csv', interval=1) # dqn.fit(env, callbacks=[callbacks,s,logs], nb_steps=600, action_repetition=2, dqn.fit(env, callbacks=[callbacks, s, logs], nb_steps=10000, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
def testSingularZeroesRaises(self): with self.assertRaises(ValueError): features.Dimensions(screen=0, minimap=0)
def testEquality(self): self.assertEqual(features.Dimensions(screen=64, minimap=64), features.Dimensions(screen=64, minimap=64)) self.assertNotEqual(features.Dimensions(screen=64, minimap=64), features.Dimensions(screen=64, minimap=32)) self.assertNotEqual(features.Dimensions(screen=64, minimap=64), None)