示例#1
0
    def __init__(self,
                 path="data/battle_model_3_players",
                 total_step=1000,
                 add_counter=10,
                 add_interval=50):
        # some parameter
        map_size = 125
        eps = 0.00

        # init the game
        env = magent.GridWorld(utils.load_config(map_size))

        handles = env.get_handles()
        models = []
        models.append(
            DeepQNetwork(env,
                         handles[0],
                         'trusty-battle-game-l1',
                         use_conv=True))
        # models.append(DeepQNetwork(env, handles[1], 'trusty-battle-game-l2', use_conv=True))
        models.append(
            DeepQNetwork(env,
                         handles[1],
                         'trusty-battle-game-r',
                         use_conv=True))

        # load model
        # tf.reset_default_graph()
        models[0].load(path, 1, 'trusty-battle-game-l1')
        # models[1].load(path, 1, 'trusty-battle-game-l2')
        # tf.reset_default_graph()
        models[2].load(path, 1, 'trusty-battle-game-r')

        # init environment
        env.reset()
        utils.generate_map(env, map_size, handles)

        # save to member variable
        self.env = env
        self.handles = handles
        self.eps = eps
        self.models = models
        self.map_size = map_size
        self.total_step = total_step
        self.add_interval = add_interval
        self.add_counter = add_counter
        self.done = False
        self.total_handles = [
            self.env.get_num(self.handles[0]),
            self.env.get_num(self.handles[1])
        ]
    def __init__(self, path="data/against_v2", total_step=500):
        # some parameter
        map_size = 125
        eps = 0.00

        # init the game
        env = magent.GridWorld("battle", map_size=map_size)

        handles = env.get_handles()
        models = []
        models.append(
            DeepQNetwork(env,
                         handles[0],
                         'trusty-battle-game-l',
                         use_conv=True))
        models.append(DeepQNetwork(env, handles[1], 'battle', use_conv=True))

        # load model
        # models[0].load(path, 999, 'against-a')
        # # models[0].load('data/battle_model_1000_vs_500', 1500, 'trusty-battle-game-l')
        # models[1].load(path, 999, 'battle')
        #
        models[0].load("data/battle_model_1000_vs_500", 1500,
                       'trusty-battle-game-l')
        models[1].load("data/battle_model_1000_vs_500", 1500,
                       'trusty-battle-game-r')

        # init environment
        env.reset()
        x0, y0, x1, y1 = utils.generate_map(env, map_size, handles)
        # generate_map(env, map_size, handles)

        # save to member variable
        self.env = env
        self.handles = handles
        self.eps = eps
        self.models = models
        self.map_size = map_size
        self.total_step = total_step
        self.done = False
        self.total_handles = [
            self.env.get_num(self.handles[0]),
            self.env.get_num(self.handles[1])
        ]
    def reset_map(self, id):  # load map info
        self.mapId = "generate" if np.random.uniform() < 0.1 else id
        if self.mapId == "generate":
            self.maps["generate"] = utils.generate_map()
        else:
            self.maps[self.mapId] = utils.random_gold(self.maps[self.mapId])

        self.map = json.loads(self.maps[self.mapId])
        self.userMatch = self.map_info(self.map)
        self.stepState.golds = self.userMatch.gameinfo.golds
        # self.map = json.loads(self.maps[self.mapId])
        self.energyOnMap = json.loads(self.maps[self.mapId])

        for x in range(len(self.map)):
            for y in range(len(self.map[x])):
                if self.map[x][y] > 0:  # gold
                    self.energyOnMap[x][y] = -4
                else:  # obstacles
                    self.energyOnMap[x][y] = ObstacleInfo.types[self.map[x][y]]
            # Get prediction for input image
            print("Epoch %d, image %d of %d" % ((j + 1), (i + 1), n_predict))
            unlabelled = utils.get_unlabelled(i, batch_size=1, test=True)
            pred = sess.run(convnet.out_max, feed_dict={images: unlabelled})

            # Compute accuracy and dsc if mask is available
            if os.path.isfile("testing set/test_%d_mask.png" % (i + 1)):
                labels = utils.get_labelled(i, 1, test=True)
                accuracy, dsc = utils.compute_accuracy(pred, labels)
                print("Prediction percent accuracy: %.3f and DSC: %.3f" %
                      (accuracy, dsc))
                epoch_acc[j] += accuracy
                epoch_dsc[j] += dsc

                logging.info("Creating output map")
                map = utils.generate_map(pred)
                scp.misc.imsave(
                    'predictions test/pred_%d_epoch_%d_a_%.3f_d_%.3f.png' %
                    (i + 1, j + 1, accuracy, dsc), map)
            else:
                print("Mask not found. Cannot compute accuracy and DSC")
                logging.info("Creating output map")
                map = utils.generate_map(pred)
                scp.misc.imsave(
                    'predictions test/pred_%d_epoch_%d.png' % (i + 1, j + 1),
                    map)

# Stats for each epoch
epoch_acc = np.divide(epoch_acc, n_predict)
epoch_dsc = np.divide(epoch_dsc, n_predict)
print('Accuracy each epoch')
示例#5
0
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

# Project site: https://github.com/bravikov/pylee

import utils

x0 = 2
y0 = 12

map_width = 15
map_height = 15

my_map = utils.generate_map(map_width, map_height)

utils.add_obstacle(my_map, 6, 5, 3, 4)

utils.print_map(my_map)

start_location = utils.Location()
start_location.x = x0
start_location.y = y0

locations = [start_location]
weight = 1
while len(locations) > 0:
    nextLocations = []
    for location in locations:
        utils.set_weight(my_map, location, weight)
示例#6
0
model = networks.SimpleNetwork(dim, arms=arms, num_locs=num_locs)
quantal_temp = 1
optimizer = tf.optimizers.Adam(1e-5)
model.build(input_shape=(batch_size, dim, dim, 1))
iterations = 80000
interval = 5000

file_name = '10_10_mu'

regrets = []
regrets_std = []
losses = []

for step in range(iterations):
    with tf.GradientTape() as tape:
        utilities = utils.generate_map(batch_size, dim, dim)
        inputs = tf.expand_dims(utilities, axis=3)
        policies = model(inputs, True)
        samples, selection = utils.sample_actions(policies, batch_size,
                                                  num_locs, arms, dim)

        op_locs = []
        utilities_copy = tf.reshape(utilities, [batch_size, -1])

        for j in range(op_num_locs):
            indmax = tf.argmax(utilities_copy, axis=1)
            opponent = tf.expand_dims(tf.stack(
                [tf.math.floordiv(indmax, dim),
                 tf.math.floormod(indmax, dim)],
                axis=1),
                                      axis=1)
def play_a_round(env,
                 map_size,
                 handles,
                 models,
                 print_every,
                 train=True,
                 render=False,
                 eps=None):
    env.reset()
    utils.generate_map(env, map_size, handles)

    step_ct = 0
    done = False

    n = len(handles)
    obs = [[] for _ in range(n)]
    ids = [[] for _ in range(n)]
    acts = [[] for _ in range(n)]
    nums = [env.get_num(handle) for handle in handles]
    total_reward = [0 for _ in range(n)]

    print("===== sample =====")
    print("eps %.2f number %s" % (eps, nums))
    start_time = time.time()
    counter = 10
    while not done:
        # take actions for every model
        for i in range(n):
            obs[i] = env.get_observation(handles[i])
            ids[i] = env.get_agent_id(handles[i])
            # let models infer action in parallel (non-blocking)
            models[i].infer_action(obs[i],
                                   ids[i],
                                   'e_greedy',
                                   eps,
                                   block=False)

        for i in range(n):
            acts[i] = models[i].fetch_action()  # fetch actions (blocking)
            env.set_action(handles[i], acts[i])

        # simulate one step
        done = env.step()

        # sample
        step_reward = []
        for i in range(n):
            rewards = env.get_reward(handles[i])
            pos = env.get_pos(handles[i])
            for (x, y) in pos:
                rewards -= ((1.0 * x / map_size - 0.5)**2 +
                            (1.0 * y / map_size - 0.5)**2) / 100
            if train:
                alives = env.get_alive(handles[i])
                # store samples in replay buffer (non-blocking)
                models[i].sample_step(rewards, alives, block=False)
            s = sum(rewards)
            step_reward.append(s)
            total_reward[i] += s

        # render
        if render:
            env.render()

        # stat info
        nums = [env.get_num(handle) for handle in handles]

        # clear dead agents
        env.clear_dead()

        # check return message of previous called non-blocking function sample_step()
        if args.train:
            for model in models:
                model.check_done()

        if step_ct % print_every == 0:
            print("step %3d,  nums: %s reward: %s,  total_reward: %s " %
                  (step_ct, nums, np.around(step_reward,
                                            2), np.around(total_reward, 2)))
        step_ct += 1
        # if step_ct % 50 == 0 and counter >= 0:
        #     counter -= 1
        #     utils.add_agents(env, 0, 0, handles, map_size, True)
        #     # add_agents(env, np.random.randint(0, map_size - 1))
        #     # pos = []
        #     # x = np.random.randint(0, map_size - 1)
        #     # y = np.random.randint(0, map_size - 1)
        #     # for i in range(-4, 4):
        #     #     for j in range(-4, 4):
        #     #         pos.append((x + i, y + j))
        #     # env.add_agents(handles[g], method="custom", pos=pos)

        #     # pos = []
        #     # x = np.random.randint(0, map_size - 1)
        #     # y = np.random.randint(0, map_size - 1)
        #     # for i in range(-2, 2):
        #     #     for j in range(-4, 4):
        #     #         pos.append((x + i, y + j))
        #     # env.add_agents(handles[g + 1], method="custom", pos=pos)

        #     step_ct = 0
        if step_ct > 500:
            break

    sample_time = time.time() - start_time
    print("steps: %d,  total time: %.2f,  step average %.2f" %
          (step_ct, sample_time, sample_time / step_ct))

    # train
    total_loss, value = [0 for _ in range(n)], [0 for _ in range(n)]
    if train:
        print("===== train =====")
        start_time = time.time()

        # train models in parallel
        for i in range(n):
            models[i].train(print_every=1000, block=False)
        for i in range(n):
            total_loss[i], value[i] = models[i].fetch_train()

        train_time = time.time() - start_time
        print("train_time %.2f" % train_time)

    def round_list(l):
        return [round(x, 2) for x in l]

    return round_list(total_loss), nums, round_list(total_reward), round_list(
        value)
    # set logger
    buffer.init_logger(args.name)

    # init the game
    env = magent.GridWorld(utils.load_config(args.map_size))
    # env.set_render_dir("build/render")

    # two groups of agents
    handles = env.get_handles()

    # sample eval observation set
    eval_obs = [None, None, None]
    if args.eval:
        print("sample eval set...")
        env.reset()
        utils.generate_map(env, args.map_size, handles)
        for i in range(len(handles)):
            eval_obs[i] = buffer.sample_observation(env, handles, 2048, 500)

    # load models
    batch_size = 256
    unroll_step = 8
    target_update = 1200
    train_freq = 5

    if args.alg == 'dqn':
        RLModel = DeepQNetwork
        base_args = {
            'batch_size': batch_size,
            'memory_size': 2**21,
            'learning_rate': 1e-4,
示例#9
0
batch_size = 32
dim = 5
arms = 8
num_locs = 3
op_num_locs = 2
model = networks.SimpleNetwork(dim, arms=arms, num_locs=num_locs)
quantal_temp = 1
optimizer = tf.optimizers.Adam(1e-5)
model.build(input_shape=(batch_size, dim, dim, 1))
iterations = 20000
interval = 2000

file_name = '5-5_3-2'

test_population = utils.generate_map(batch_size, dim, dim)
inputs = tf.expand_dims(test_population, axis=3)
policies = model(inputs, False)
samples, selection = utils.sample_actions(policies, batch_size, num_locs, arms,
                                          dim)
samples = samples.numpy()

op_locs = []
utilities_copy = tf.reshape(test_population, [batch_size, -1])

for j in range(op_num_locs):
    indmax = tf.argmax(utilities_copy, axis=1)
    opponent = tf.expand_dims(tf.stack(
        [tf.math.floordiv(indmax, dim),
         tf.math.floormod(indmax, dim)], axis=1),
                              axis=1)
示例#10
0
 def save_prediction(self, pred, epoch, start_idx):
     map = utils.generate_map(pred)
     scp.misc.imsave(
         'predictions training/epoch_%d_img_%d.png' %
         (epoch + 1, start_idx + 1), map)
示例#11
0
def play_a_round(env,
                 map_size,
                 handles,
                 models,
                 print_every,
                 eps,
                 step_batch_size=None,
                 train=True,
                 train_id=0,
                 render=False):
    """play a round of game"""
    env.reset()
    # generate_map(env, map_size, handles)
    x0, y0, x1, y1 = utils.generate_map(env, args.map_size, handles)

    step_ct = 0
    done = False

    n = len(handles)
    obs = [[] for _ in range(n)]
    ids = [[] for _ in range(n)]
    acts = [[] for _ in range(n)]
    nums = [env.get_num(handle) for handle in handles]
    total_reward = [0 for _ in range(n)]
    n_transition = 0
    pos_reward_num = 0
    total_loss, value = 0, 0

    print("===== sample =====")
    print("eps %s number %s" % (eps, nums))
    start_time = time.time()
    while not done:
        # take actions for every model
        for i in range(n):
            obs[i] = env.get_observation(handles[i])
            ids[i] = env.get_agent_id(handles[i])
            # let models infer action in parallel (non-blocking)
            models[i].infer_action(obs[i],
                                   ids[i],
                                   'e_greedy',
                                   eps[i],
                                   block=False)

        for i in range(n):
            acts[i] = models[i].fetch_action()  # fetch actions (blocking)
            env.set_action(handles[i], acts[i])

        # simulate one step
        done = env.step()

        # sample
        step_reward = []
        for i in range(n):
            rewards = env.get_reward(handles[i])
            pos = env.get_pos(handles[i])
            if i == 0:
                for (x, y) in pos:
                    rewards -= ((1.0 * x / map_size - x1 / map_size)**2 +
                                (1.0 * y / map_size - y1 / map_size)**2) / 50
            elif i == 1:
                for (x, y) in pos:
                    rewards -= ((1.0 * x / map_size - x0 / map_size)**2 +
                                (1.0 * y / map_size - y0 / map_size)**2) / 50
            if train and i == train_id:
                alives = env.get_alive(handles[train_id])
                # store samples in replay buffer (non-blocking)
                models[train_id].sample_step(rewards, alives, block=False)
                pos_reward_num += len(rewards[rewards > 0])
            s = sum(rewards)
            step_reward.append(s)
            total_reward[i] += s

        # render
        if render:
            env.render()

        # stat info
        nums = [env.get_num(handle) for handle in handles]
        n_transition += nums[train_id]

        # clear dead agents
        env.clear_dead()

        # check return message of previous called non-blocking function sample_step()
        if train:
            models[train_id].check_done()

        if step_ct % print_every == 0:
            print(
                "step %3d,  nums: %s reward: %s,  total_reward: %s, pos_rewards %d"
                % (step_ct, nums, np.around(step_reward, 2),
                   np.around(total_reward, 2), pos_reward_num))
        step_ct += 1
        if step_ct > args.n_step:
            break

        if step_batch_size and n_transition > step_batch_size and train:
            total_loss, value = models[train_id].train(500)
            n_transition = 0

    sample_time = time.time() - start_time
    print("steps: %d,  total time: %.2f,  step average %.2f" %
          (step_ct, sample_time, sample_time / step_ct))

    # train
    if train:
        print("===== train =====")
        start_time = time.time()
        total_loss, value = models[train_id].train(500)
        train_time = time.time() - start_time
        print("train_time %.2f" % train_time)

    return total_loss, nums, total_reward, value
示例#12
0
import random
import time
import utils
from uninformedSearch import dijkstra
from informedSearch import a_star
# from test import *


# set the files path and generate the map
vertices_number = '100'
file_version = '0.1'
vertices_file = './graphs/graph' + vertices_number + '_' + file_version + '/v.txt'
edges_file = './graphs/graph' + vertices_number + '_' + file_version + '/e.txt'
map = utils.generate_map(vertices_file,edges_file)

# get the random start point and end point
start_point = str(random.randint(0, int(vertices_number)))
end_point = str(random.randint(0, int(vertices_number) - 1))
# set the specific start point and end point (! both types should be str)
# start_point = '0'
# end_point = '99'

# informed search - A*
informed_search_start_time = time.time()
informed_search_res = a_star(start_point, end_point, map)
informed_search_end_time = time.time()
informed_search_cost = informed_search_end_time - informed_search_start_time
print(f'With the informed search A-star, the minimum distance from {start_point} to {end_point} '
      f'is {informed_search_res[0]} and its time cost is {informed_search_cost} \n'
      f'Algorithm Step: {informed_search_res[1]}')