def __init__(self, path="data/battle_model_3_players", total_step=1000, add_counter=10, add_interval=50): # some parameter map_size = 125 eps = 0.00 # init the game env = magent.GridWorld(utils.load_config(map_size)) handles = env.get_handles() models = [] models.append( DeepQNetwork(env, handles[0], 'trusty-battle-game-l1', use_conv=True)) # models.append(DeepQNetwork(env, handles[1], 'trusty-battle-game-l2', use_conv=True)) models.append( DeepQNetwork(env, handles[1], 'trusty-battle-game-r', use_conv=True)) # load model # tf.reset_default_graph() models[0].load(path, 1, 'trusty-battle-game-l1') # models[1].load(path, 1, 'trusty-battle-game-l2') # tf.reset_default_graph() models[2].load(path, 1, 'trusty-battle-game-r') # init environment env.reset() utils.generate_map(env, map_size, handles) # save to member variable self.env = env self.handles = handles self.eps = eps self.models = models self.map_size = map_size self.total_step = total_step self.add_interval = add_interval self.add_counter = add_counter self.done = False self.total_handles = [ self.env.get_num(self.handles[0]), self.env.get_num(self.handles[1]) ]
def __init__(self, path="data/against_v2", total_step=500): # some parameter map_size = 125 eps = 0.00 # init the game env = magent.GridWorld("battle", map_size=map_size) handles = env.get_handles() models = [] models.append( DeepQNetwork(env, handles[0], 'trusty-battle-game-l', use_conv=True)) models.append(DeepQNetwork(env, handles[1], 'battle', use_conv=True)) # load model # models[0].load(path, 999, 'against-a') # # models[0].load('data/battle_model_1000_vs_500', 1500, 'trusty-battle-game-l') # models[1].load(path, 999, 'battle') # models[0].load("data/battle_model_1000_vs_500", 1500, 'trusty-battle-game-l') models[1].load("data/battle_model_1000_vs_500", 1500, 'trusty-battle-game-r') # init environment env.reset() x0, y0, x1, y1 = utils.generate_map(env, map_size, handles) # generate_map(env, map_size, handles) # save to member variable self.env = env self.handles = handles self.eps = eps self.models = models self.map_size = map_size self.total_step = total_step self.done = False self.total_handles = [ self.env.get_num(self.handles[0]), self.env.get_num(self.handles[1]) ]
def reset_map(self, id): # load map info self.mapId = "generate" if np.random.uniform() < 0.1 else id if self.mapId == "generate": self.maps["generate"] = utils.generate_map() else: self.maps[self.mapId] = utils.random_gold(self.maps[self.mapId]) self.map = json.loads(self.maps[self.mapId]) self.userMatch = self.map_info(self.map) self.stepState.golds = self.userMatch.gameinfo.golds # self.map = json.loads(self.maps[self.mapId]) self.energyOnMap = json.loads(self.maps[self.mapId]) for x in range(len(self.map)): for y in range(len(self.map[x])): if self.map[x][y] > 0: # gold self.energyOnMap[x][y] = -4 else: # obstacles self.energyOnMap[x][y] = ObstacleInfo.types[self.map[x][y]]
# Get prediction for input image print("Epoch %d, image %d of %d" % ((j + 1), (i + 1), n_predict)) unlabelled = utils.get_unlabelled(i, batch_size=1, test=True) pred = sess.run(convnet.out_max, feed_dict={images: unlabelled}) # Compute accuracy and dsc if mask is available if os.path.isfile("testing set/test_%d_mask.png" % (i + 1)): labels = utils.get_labelled(i, 1, test=True) accuracy, dsc = utils.compute_accuracy(pred, labels) print("Prediction percent accuracy: %.3f and DSC: %.3f" % (accuracy, dsc)) epoch_acc[j] += accuracy epoch_dsc[j] += dsc logging.info("Creating output map") map = utils.generate_map(pred) scp.misc.imsave( 'predictions test/pred_%d_epoch_%d_a_%.3f_d_%.3f.png' % (i + 1, j + 1, accuracy, dsc), map) else: print("Mask not found. Cannot compute accuracy and DSC") logging.info("Creating output map") map = utils.generate_map(pred) scp.misc.imsave( 'predictions test/pred_%d_epoch_%d.png' % (i + 1, j + 1), map) # Stats for each epoch epoch_acc = np.divide(epoch_acc, n_predict) epoch_dsc = np.divide(epoch_dsc, n_predict) print('Accuracy each epoch')
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ # Project site: https://github.com/bravikov/pylee import utils x0 = 2 y0 = 12 map_width = 15 map_height = 15 my_map = utils.generate_map(map_width, map_height) utils.add_obstacle(my_map, 6, 5, 3, 4) utils.print_map(my_map) start_location = utils.Location() start_location.x = x0 start_location.y = y0 locations = [start_location] weight = 1 while len(locations) > 0: nextLocations = [] for location in locations: utils.set_weight(my_map, location, weight)
model = networks.SimpleNetwork(dim, arms=arms, num_locs=num_locs) quantal_temp = 1 optimizer = tf.optimizers.Adam(1e-5) model.build(input_shape=(batch_size, dim, dim, 1)) iterations = 80000 interval = 5000 file_name = '10_10_mu' regrets = [] regrets_std = [] losses = [] for step in range(iterations): with tf.GradientTape() as tape: utilities = utils.generate_map(batch_size, dim, dim) inputs = tf.expand_dims(utilities, axis=3) policies = model(inputs, True) samples, selection = utils.sample_actions(policies, batch_size, num_locs, arms, dim) op_locs = [] utilities_copy = tf.reshape(utilities, [batch_size, -1]) for j in range(op_num_locs): indmax = tf.argmax(utilities_copy, axis=1) opponent = tf.expand_dims(tf.stack( [tf.math.floordiv(indmax, dim), tf.math.floormod(indmax, dim)], axis=1), axis=1)
def play_a_round(env, map_size, handles, models, print_every, train=True, render=False, eps=None): env.reset() utils.generate_map(env, map_size, handles) step_ct = 0 done = False n = len(handles) obs = [[] for _ in range(n)] ids = [[] for _ in range(n)] acts = [[] for _ in range(n)] nums = [env.get_num(handle) for handle in handles] total_reward = [0 for _ in range(n)] print("===== sample =====") print("eps %.2f number %s" % (eps, nums)) start_time = time.time() counter = 10 while not done: # take actions for every model for i in range(n): obs[i] = env.get_observation(handles[i]) ids[i] = env.get_agent_id(handles[i]) # let models infer action in parallel (non-blocking) models[i].infer_action(obs[i], ids[i], 'e_greedy', eps, block=False) for i in range(n): acts[i] = models[i].fetch_action() # fetch actions (blocking) env.set_action(handles[i], acts[i]) # simulate one step done = env.step() # sample step_reward = [] for i in range(n): rewards = env.get_reward(handles[i]) pos = env.get_pos(handles[i]) for (x, y) in pos: rewards -= ((1.0 * x / map_size - 0.5)**2 + (1.0 * y / map_size - 0.5)**2) / 100 if train: alives = env.get_alive(handles[i]) # store samples in replay buffer (non-blocking) models[i].sample_step(rewards, alives, block=False) s = sum(rewards) step_reward.append(s) total_reward[i] += s # render if render: env.render() # stat info nums = [env.get_num(handle) for handle in handles] # clear dead agents env.clear_dead() # check return message of previous called non-blocking function sample_step() if args.train: for model in models: model.check_done() if step_ct % print_every == 0: print("step %3d, nums: %s reward: %s, total_reward: %s " % (step_ct, nums, np.around(step_reward, 2), np.around(total_reward, 2))) step_ct += 1 # if step_ct % 50 == 0 and counter >= 0: # counter -= 1 # utils.add_agents(env, 0, 0, handles, map_size, True) # # add_agents(env, np.random.randint(0, map_size - 1)) # # pos = [] # # x = np.random.randint(0, map_size - 1) # # y = np.random.randint(0, map_size - 1) # # for i in range(-4, 4): # # for j in range(-4, 4): # # pos.append((x + i, y + j)) # # env.add_agents(handles[g], method="custom", pos=pos) # # pos = [] # # x = np.random.randint(0, map_size - 1) # # y = np.random.randint(0, map_size - 1) # # for i in range(-2, 2): # # for j in range(-4, 4): # # pos.append((x + i, y + j)) # # env.add_agents(handles[g + 1], method="custom", pos=pos) # step_ct = 0 if step_ct > 500: break sample_time = time.time() - start_time print("steps: %d, total time: %.2f, step average %.2f" % (step_ct, sample_time, sample_time / step_ct)) # train total_loss, value = [0 for _ in range(n)], [0 for _ in range(n)] if train: print("===== train =====") start_time = time.time() # train models in parallel for i in range(n): models[i].train(print_every=1000, block=False) for i in range(n): total_loss[i], value[i] = models[i].fetch_train() train_time = time.time() - start_time print("train_time %.2f" % train_time) def round_list(l): return [round(x, 2) for x in l] return round_list(total_loss), nums, round_list(total_reward), round_list( value)
# set logger buffer.init_logger(args.name) # init the game env = magent.GridWorld(utils.load_config(args.map_size)) # env.set_render_dir("build/render") # two groups of agents handles = env.get_handles() # sample eval observation set eval_obs = [None, None, None] if args.eval: print("sample eval set...") env.reset() utils.generate_map(env, args.map_size, handles) for i in range(len(handles)): eval_obs[i] = buffer.sample_observation(env, handles, 2048, 500) # load models batch_size = 256 unroll_step = 8 target_update = 1200 train_freq = 5 if args.alg == 'dqn': RLModel = DeepQNetwork base_args = { 'batch_size': batch_size, 'memory_size': 2**21, 'learning_rate': 1e-4,
batch_size = 32 dim = 5 arms = 8 num_locs = 3 op_num_locs = 2 model = networks.SimpleNetwork(dim, arms=arms, num_locs=num_locs) quantal_temp = 1 optimizer = tf.optimizers.Adam(1e-5) model.build(input_shape=(batch_size, dim, dim, 1)) iterations = 20000 interval = 2000 file_name = '5-5_3-2' test_population = utils.generate_map(batch_size, dim, dim) inputs = tf.expand_dims(test_population, axis=3) policies = model(inputs, False) samples, selection = utils.sample_actions(policies, batch_size, num_locs, arms, dim) samples = samples.numpy() op_locs = [] utilities_copy = tf.reshape(test_population, [batch_size, -1]) for j in range(op_num_locs): indmax = tf.argmax(utilities_copy, axis=1) opponent = tf.expand_dims(tf.stack( [tf.math.floordiv(indmax, dim), tf.math.floormod(indmax, dim)], axis=1), axis=1)
def save_prediction(self, pred, epoch, start_idx): map = utils.generate_map(pred) scp.misc.imsave( 'predictions training/epoch_%d_img_%d.png' % (epoch + 1, start_idx + 1), map)
def play_a_round(env, map_size, handles, models, print_every, eps, step_batch_size=None, train=True, train_id=0, render=False): """play a round of game""" env.reset() # generate_map(env, map_size, handles) x0, y0, x1, y1 = utils.generate_map(env, args.map_size, handles) step_ct = 0 done = False n = len(handles) obs = [[] for _ in range(n)] ids = [[] for _ in range(n)] acts = [[] for _ in range(n)] nums = [env.get_num(handle) for handle in handles] total_reward = [0 for _ in range(n)] n_transition = 0 pos_reward_num = 0 total_loss, value = 0, 0 print("===== sample =====") print("eps %s number %s" % (eps, nums)) start_time = time.time() while not done: # take actions for every model for i in range(n): obs[i] = env.get_observation(handles[i]) ids[i] = env.get_agent_id(handles[i]) # let models infer action in parallel (non-blocking) models[i].infer_action(obs[i], ids[i], 'e_greedy', eps[i], block=False) for i in range(n): acts[i] = models[i].fetch_action() # fetch actions (blocking) env.set_action(handles[i], acts[i]) # simulate one step done = env.step() # sample step_reward = [] for i in range(n): rewards = env.get_reward(handles[i]) pos = env.get_pos(handles[i]) if i == 0: for (x, y) in pos: rewards -= ((1.0 * x / map_size - x1 / map_size)**2 + (1.0 * y / map_size - y1 / map_size)**2) / 50 elif i == 1: for (x, y) in pos: rewards -= ((1.0 * x / map_size - x0 / map_size)**2 + (1.0 * y / map_size - y0 / map_size)**2) / 50 if train and i == train_id: alives = env.get_alive(handles[train_id]) # store samples in replay buffer (non-blocking) models[train_id].sample_step(rewards, alives, block=False) pos_reward_num += len(rewards[rewards > 0]) s = sum(rewards) step_reward.append(s) total_reward[i] += s # render if render: env.render() # stat info nums = [env.get_num(handle) for handle in handles] n_transition += nums[train_id] # clear dead agents env.clear_dead() # check return message of previous called non-blocking function sample_step() if train: models[train_id].check_done() if step_ct % print_every == 0: print( "step %3d, nums: %s reward: %s, total_reward: %s, pos_rewards %d" % (step_ct, nums, np.around(step_reward, 2), np.around(total_reward, 2), pos_reward_num)) step_ct += 1 if step_ct > args.n_step: break if step_batch_size and n_transition > step_batch_size and train: total_loss, value = models[train_id].train(500) n_transition = 0 sample_time = time.time() - start_time print("steps: %d, total time: %.2f, step average %.2f" % (step_ct, sample_time, sample_time / step_ct)) # train if train: print("===== train =====") start_time = time.time() total_loss, value = models[train_id].train(500) train_time = time.time() - start_time print("train_time %.2f" % train_time) return total_loss, nums, total_reward, value
import random import time import utils from uninformedSearch import dijkstra from informedSearch import a_star # from test import * # set the files path and generate the map vertices_number = '100' file_version = '0.1' vertices_file = './graphs/graph' + vertices_number + '_' + file_version + '/v.txt' edges_file = './graphs/graph' + vertices_number + '_' + file_version + '/e.txt' map = utils.generate_map(vertices_file,edges_file) # get the random start point and end point start_point = str(random.randint(0, int(vertices_number))) end_point = str(random.randint(0, int(vertices_number) - 1)) # set the specific start point and end point (! both types should be str) # start_point = '0' # end_point = '99' # informed search - A* informed_search_start_time = time.time() informed_search_res = a_star(start_point, end_point, map) informed_search_end_time = time.time() informed_search_cost = informed_search_end_time - informed_search_start_time print(f'With the informed search A-star, the minimum distance from {start_point} to {end_point} ' f'is {informed_search_res[0]} and its time cost is {informed_search_cost} \n' f'Algorithm Step: {informed_search_res[1]}')