Exemplo n.º 1
0
def randomGame():
    print("\n== init random game ==")
    game = Game(20, 20)  # height, width
    game.setRandomMap(3, 3, 4)  # numbers of agents, targets, obstacles
    game.setScore(100, 20, 10, -2, -0.04, -20)

    game.printGodInfo()

    print("\n== 1st round ==")
    commands = []
    commands.append(Command(0, 1, 1))  # id, dx, dy
    commands.append(Command(1, -1, 1))
    commands.append(Command(2, 1, -1))

    game.runOneRound(commands)

    game.printConsoleInfo()
    print("Score: " + str(game.outputScore()))

    print("\n== 2ed round ==")
    commands = []
    commands.append(Command(0, 1, 1))
    commands.append(Command(1, -1, 1))
    commands.append(Command(2, 1, -1))

    game.runOneRound(commands)

    game.printConsoleInfo()
    print("Score: " + str(game.outputScore()))
Exemplo n.º 2
0
def testManualGameImageOutput2():
    print("\n== init manual setting game ==")
    height = 20
    width = 20
    mode = {0: False, 1: False, 2: False}
    #now target = {0:{},1:{},2:{}}
    belongs = {0: [], 1: [], 2: []}
    game = Game(height, width)

    game.setRandomMap(0, 50, 0)  # numbers of agents, targets, obstacles
    game.setScore(100, 10, -0.01, -100)

    game.printGodMap()

    agents = {
        0: Agent(0, 0, 0, height, width, r=5),  # id, x, y, height, width
        1: Agent(1, width - 1, 0, height, width,
                 r=5),  # id, x, y, height, width
        2: Agent(2, int(width / 2), height - 1, height, width,
                 r=5),  # id, x, y, height, width
    }
    game.setAgents(agents)
    #agents[id].x,game.consolemap.targets
    game.printConsoleMap()
    game.runOneRound([Command(0, 1, 1), Command(1, -1, 1), Command(2, -1, -1)])
    for item in game.consolemap.targets:
        index = 0
        if target_agent_len(item, agents[index]) > target_agent_len(
                item, agents[1]):
            index = 1
        if target_agent_len(item, agents[index]) > target_agent_len(
                item, agents[2]):
            index = 2
        belongs[index].append(item)
    print(belongs)
    for i in range(3):
        if mode[i] == False:
            target_find = belongs[i][0]
            for target_list in belongs[i]:
                if target_agent_len(target_list, agents[i]) < target_agent_len(
                        target_find, agents[i]):
                    target_find = target_list
            print(target_find)
            mode[i] = True

    game.printConsoleMap()
    '''for i in range(0, 10):
Exemplo n.º 3
0
def testManualGameImageOutput1():
    print("\n== init manual setting game ==")
    height = 10
    width = 10
    game = Game(height, width)

    obstacles = [{"x": 1, "y": 1}, {"x": 2, "y": 2}, {"x": 3, "y": 3}]
    targets = [{"x": 4, "y": 4}, {"x": 5, "y": 5}, {"x": 6, "y": 6}]
    game.setObstacles(obstacles)
    game.setTargets(targets)
    #game.setScore(100, 10, -0.01, -100)

    game.printGodMap()

    agents = {
        0: Agent(0, 7, 6, height, width, r=3),  # id, x, y, height, width
    }
    game.setAgents(agents)

    game.printConsoleMap()

    print("\n== 1st round ==")
    game.runOneRound([Command(0, -1, -1)])  # (6, 5)
    game.printConsoleMap()

    print("\n== 2st round ==")
    game.runOneRound([Command(0, -1, 0)])  # (5, 5)
    game.printConsoleMap()

    print("\n== 3st round ==")
    game.runOneRound([Command(0, -1, 0)])  #(4, 5)
    game.printConsoleMap()
Exemplo n.º 4
0
def manualGame():
    print("\n== init manual setting game ==")
    height = 20
    width = 20
    game = Game(height, width)

    obstacles = [{"x": 1, "y": 1}, {"x": 2, "y": 2}, {"x": 3, "y": 3}]
    targets = [{"x": 10, "y": 10}, {"x": 12, "y": 12}, {"x": 13, "y": 13}]
    game.setObstacles(obstacles)
    game.setTargets(targets)

    agents = {
        0: Agent(0, 4, 4, height, width),  # id, x, y, height, width
        1: Agent(1, 15, 17, height, width),
        2: Agent(2, 12, 15, height, width),
    }
    game.setAgents(agents)
    game.setScore()

    game.printGodInfo()

    print("\n== 1st round ==")
    commands = []
    commands.append(Command(0, 1, 1))  # id, dx, dy
    commands.append(Command(1, -1, 1))
    commands.append(Command(2, 1, -1))

    game.runOneRound(commands)

    game.printConsoleInfo()

    print("\n== 2ed round ==")
    commands = []
    commands.append(Command(0, 1, 1))
    commands.append(Command(1, -1, 1))
    commands.append(Command(2, 1, -1))

    game.runOneRound(commands)

    game.printConsoleInfo()
Exemplo n.º 5
0
def testManualGameImageOutput2():
    print("\n== init manual setting game ==")
    height = 40
    width = 40
    crash = 0

    mode = {
        0: False,
        1: False,
        2: False
    }  #agents' mode True if agent has target rightnow
    found_target = []
    now_target = {0: [], 1: [], 2: []}
    belongs = {0: [], 1: [], 2: []}
    cmd = []

    game = Game(height, width)

    game.setRandomMap(0, 200, 0)  # numbers of agents, targets, obstacles
    #game.setScore(100, 10, -0.01, -100)

    game.printGodMap()

    agents = {
        0: Agent(0, 0, 0, height, width, r=5),  # id, x, y, height, width
        1: Agent(1, width - 1, 0, height, width,
                 r=5),  # id, x, y, height, width
        2: Agent(2, int(width / 2), height - 1, height, width,
                 r=5),  # id, x, y, height, width
    }

    game.setAgents(agents)
    ##########
    game.runOneRoundwithoutMovement()
    game.printConsoleMap()
    round = 1

    while (game.consolemap.targets != []
           or haveunseenspace(game.consolemap.areas, height, width)):
        print("====the %d round" % round)

        found_target = game.consolemap.targets
        #print("found:",found_target)
        #print("agent mode",mode)

        for item in found_target:  # cluster the target
            index = 0
            if target_agent_len(item, agents[index]) > target_agent_len(
                    item, agents[1]):
                index = 1
            if target_agent_len(item, agents[index]) > target_agent_len(
                    item, agents[2]):
                index = 2
            belongs[index].append(item)

        cmd = []  # store the new command for agents

        for i in agents:

            if mode[i] == False:  # assign a target to agent
                now_target[i] = []
                for target_list in belongs[i]:
                    if now_target[i] == [] and target_list != []:
                        now_target[i] = target_list
                        mode[i] = True
                    else:
                        if target_agent_len(target_list,
                                            agents[i]) < target_agent_len(
                                                now_target[i], agents[i]):
                            now_target[i] = target_list

            no_target_command = {
                0: {
                    "dx": 0,
                    "dy": 0
                },
                1: {
                    "dx": 1,
                    "dy": -1
                },
                2: {
                    "dx": -1,
                    "dy": -1
                },
                3: {
                    "dx": -1,
                    "dy": 1
                },
                4: {
                    "dx": 1,
                    "dy": 1
                },
            }

            if mode[i] == False:  # assign the cammand to agent
                direction = no_target_walk(game.getmap(), agents[i])
                cmd.append(
                    Command(agents[i].id, no_target_command[direction]["dx"],
                            no_target_command[direction]["dy"]))
                print("agent %d goes" % agents[i].id,
                      no_target_command[direction]["dx"], "and",
                      no_target_command[direction]["dy"])
            elif mode[i] == True:
                cmd.append(walk(now_target[i], agents[i]))
                mode[i] = False

        game.runOneRound(cmd)

        for i in agents:  #calculate crash time
            for j in range(i + 1, len(agents)):
                if agents[i].x == agents[j].x and agents[i].y == agents[j].y:
                    crash += 1
        print(found_target)

        game.printConsoleMap()
        belongs = {0: [], 1: [], 2: []}
        round += 1
    print("crush time: %d" % crash)
    print("finish")
Exemplo n.º 6
0
class ReplayBuffer():
    def __init__(self, agent_num, model_structure, model_weight, height, width, path = "./buffer", cuda=False):
        #initial the model in the replay buffer
        #model is be defined using pytorch lib
        self.model = model_structure
        self.model_weight = model_weight

        self.loadWeight()

        #the game is the enviroment of route planning game in the project
        self.height = height
        self.width = width

        self.game = None

        self.hx = None
        self.cx = None
        self.bot_observe = None

        # self.agent_numbers = int(((self.height + self.width) / 2) * 0.3)
        self.agent_numbers = agent_num

        #default score setting in the game
        self.acquisition_sum = 700
        self.explored_target_sum = 70
        self.explored_sum = 40
        self.time_decrease = -0.00005
        self.crash_time_penalty = -0.0001
        self.crash_sum = -20

        #path is the folder of the replay buffer
        self.path = path
        if not os.path.exists(path):
            os.makedirs(path)

        self.IOlimit = 20
        self.buffer_limit = 1000

        self.cuda = cuda
        print(cuda)

    def collect(self, game_round):
        #data is stored in the python list using pickle
        #data list is a 2d list index is the batch index of collection
        if self.game == None:
            self.initialGame()
        
        if self.cuda:
            self.model.cuda()
        #collecting part
        while True:
            data = []

            collect_iter = 0

            while collect_iter < self.IOlimit:
                temp = []
                if self.game.active and self.game.state < game_round:
                    #observe part of the bot
                    # bot_observe = np.zeros((self.agent_numbers, 3, self.width, self.height))

                    # for i in range(self.agent_numbers):
                    #     bot_observe[i] = self.game.outputAgentImage(i)

                    if self.bot_observe is None:
                        self.bot_observe = [self.game.outputAgentImage(i) for i in range(self.agent_numbers)]
                        self.bot_observe = np.array(bot_observe).astype(np.uint8)

                    # bot_observe = np.uint8(bot_observe)
                    #batch * width * height * 3 (numpy array)[0]
                    temp.append(self.bot_observe)

                    temp.append(self.hx) #batch * 512[1](tensor)[1]
                    temp.append(self.cx) #batch * 512[1](tensor)[2]

                    #model output process
                    self.bot_observe = torch.from_numpy(self.bot_observe).float()

                    if self.cuda:
                        self.hx = self.hx.cuda()
                        self.cx = self.cx.cuda()
                        self.bot_observe = self.bot_observe.cuda()

                    critic_score, bot_command, (self.hx, self.cx) = self.model(self.bot_observe, self.hx, self.cx)

                    #bot action tensor
                    temp.append(bot_command.cpu()) #tensor[3]

                    #game score output
                    temp.append(self.game.outputScore()) #float[4]

                    #input the commands of the model to the game
                    commands = self.interpretAction(bot_command.cpu())
                    self.game.runOneRound(commands)

                    self.bot_observe = [self.game.outputAgentImage(i) for i in range(self.agent_numbers)]
                    self.bot_observe = np.array(self.bot_observe).astype(np.uint8)

                    #batch * width * height * 3 (numpy array)[5]
                    temp.append(self.bot_observe)

                    temp.append(self.hx.cpu()) #batch * 512[1](tensor)[6]
                    temp.append(self.cx.cpu()) #batch * 512[1](tensor)[7]

                    #game score output
                    temp.append(self.game.outputScore()) #float[8]

                    data.append(temp)

                    collect_iter += 1

                    if collect_iter % 5 == 0 and collect_iter is not 0:
                        print('Collecting Progress: ', collect_iter, ' / ', self.IOlimit, ' | Score: %02.5f' % self.game.outputScore())
                else:
                    #only occur when the game is not active
                    self.initialGame()

            #print('Collecting process done\nStart writing file...')
            
            for i in range(self.IOlimit):
                filename = self.path + '/' + datetime.datetime.now().isoformat() + '.pkl'
                save_object(filename, data[i])

            #print('All collection process done\n')

            del data
            self.loadWeight()

    def loadWeight(self):
        if os.path.exists(self.model_weight):
            self.model.load_state_dict(torch.load(self.model_weight))
        else:
            print('Model weight [{0}] not found'.format(self.model_weight))
        return

    def initialGame(self):
        #initial the game envirnment for th replay buffer
        t = (self.height + self.width) / 2

        self.game = Game(self.height, self.width)
        self.game.setRandomMap(self.agent_numbers, int(t * 0.3) ** 2, 1)
        self.game.setScore(self.acquisition_sum, self.explored_target_sum, self.explored_sum, self.time_decrease, self.crash_time_penalty, self.crash_sum)

        commands = [Command(i, 0, 0) for i in range(self.agent_numbers)]
        self.game.runOneRound(commands)

        #the memory tensor of the model in the lstm
        self.hx = torch.zeros([self.agent_numbers, 512], dtype=torch.float32)
        self.cx = torch.zeros([self.agent_numbers, 512], dtype=torch.float32)

        self.bot_observe = [self.game.outputAgentImage(i) for i in range(self.agent_numbers)]
        self.bot_observe = np.array(self.bot_observe).astype(np.uint8)

        print('New Game!')

    def resetGameScore(self, acquisition_sum, explored_target_sum, explored_sum, time_decrease, crash_time_penalty, crash_sum):
        #the score calculating standard of the game
        self.acquisition_sum = acquisition_sum
        self.explored_target_sum = explored_target_sum
        self.explored_sum = explored_sum
        self.time_decrease = time_decrease
        self.crash_time_penalty = crash_time_penalty
        self.crash_sum = crash_sum

    def countFileNum(self, path):
        file_list = os.listdir(path)
        count = 0
        for i in range(len(file_list)):
            if file_list[i].endswith('.pkl'):
                count += 1

        return count

    def grabFileName(self, path):
        file_list = os.listdir(path)
        name_list = []
        for i in range(len(file_list)):
            if file_list[i].endswith('.pkl'):
                name_list.append(file_list[i])

        return name_list

    def resetIOBatch(self, new_limit):
        self.IOlimit = new_limit

    def resetBufferLimit(self, new_limit):
        self.buffer_limit = new_limit

    def interpretAction(self, command_tensor):
        command_tensor = command_tensor.view(-1, 5)
        command = torch.max(command_tensor, 1)[1].tolist()
        commands = [self.intoCommand(i, command[i]) for i in range(len(command))]
        return commands

    def intoCommand(self, i, command):
        if command == 0: return Command(i, 0, 1)
        elif command == 1: return Command(i, 1, 0)
        elif command == 2: return Command(i, 0, -1)
        elif command == 3: return Command(i, -1, 0)
        return Command(i, 0, 0)
Exemplo n.º 7
0
class ReplayBuffer():
    def __init__(self, agent_num, height, width, model, modelpath, game_round, cuda=True):
        #initial the model in the replay buffer
        #model is be defined using pytorch lib

        #the game is the enviroment of route planning game in the project
        self.height = height
        self.width = width
        self.game_round = game_round
        self.agent_num = agent_num
        self.model = model
        self.modelpath = modelpath
        self.cuda = cuda

        self.state = torch.zeros([self.agent_num, 3, self.height, self.width], dtype=torch.float32)

        #default score setting in the game
        self.acquisition_sum = 400
        self.explored_target_sum = 70
        self.explored_sum = 40
        self.total_score = self.acquisition_sum + self.explored_target_sum + self.explored_sum
        self.time_decrease = -0.00005
        self.crash_time_penalty = -0.0001
        self.crash_sum = -400
        self.reg_val = 1

        # self.action = torch.tensor([random.randint(0, 4) for i in range(self.agent_num)])
        self.memory = []
        self.buffer_limit = 1000

        self.game = None
        self.score = None
        self.initialGame()

    def collect(self, model, verbose=1):
        if self.cuda:
            self.model.cuda()

        self.model.load_state_dict(model.state_dict())

        if self.game.active and self.game.state < self.game_round:
            #observe part of the bot
            action = self.select_action()

            self.game.runOneRound([intoCommand(i, action[i]) for i in range(self.agent_num)])

            next_state = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32)
            next_state = torch.from_numpy(next_state)

            if verbose == 1:
                print(self.game.outputScore())

            done = False
            if self.game.active is False:
                done = True

            data = Data(self.state, action, next_state)
            self.memory.append(data)

            self.state = next_state

            if len(self.memory) > self.buffer_limit:
                self.memory.remove(self.memory[0])
        else:
            #only occur when the game is not active
            self.initialGame()

    
    def select_action(self):
        return torch.tensor([random.randint(0, 4) for i in range(self.agent_num)])


    def save_state(self, path):
        for i in range(self.agent_num):
            img = np.zeros([32, 32, 3], dtype=np.uint8)
            arr = self.game.outputAgentImage(i)
            img[:,:,0] = arr[0,:,:]
            img[:,:,1] = arr[1,:,:]
            img[:,:,2] = arr[2,:,:]
            plt.subplot(2,2,i + 1)
            plt.imshow(img)
        plt.show()

    def play(self, model, game_round):
        while self.game.active and self.game.state < game_round:
            self.states[0] = self.states[1]
            self.states[1] = self.states[2]
            self.states[2] = self.states[3]
            s = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32)
            s = torch.from_numpy(s)
            self.states[3] = s

            action = model(self.states[1:].unsqueeze(0)).max(2)[1].view(self.agent_num)
            print(action)
            action = [intoCommand(i, action[i]) for i in range(self.agent_num)]
            self.game.runOneRound(action)
            print(self.game.outputScore())
            sleep(0.1)

    def initialGame(self):
        #initial the game envirnment for th replay buffer
        t = (self.height + self.width) / 2

        self.game = Game(self.height, self.width, self.game_round)
        self.game.setRandomMap(self.agent_num, int(t * 0.3) ** 2, int(t * 0.1) ** 2)
        self.game.setScore(self.acquisition_sum, self.explored_target_sum, self.explored_sum, self.time_decrease, self.crash_time_penalty, self.crash_sum, self.reg_val)

        self.game.runOneRound([Command(i, 0, 0) for i in range(self.agent_num)])
        self.score = np.array([self.game.outputScore() for i in range(self.agent_num)])
 
        self.state = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32)
        self.state = torch.from_numpy(self.state)

        print('New Game!')

    def loadWeight(self):
        if os.path.exists(self.modelpath):
            if self.cuda:
                self.model.load_state_dict(torch.load(self.modelpath))
            else:
                self.model.load_state_dict(torch.load(self.modelpath, map_location='cpu'))
        else:
            print('Model weight [{0}] not found'.format(self.modelpath))
        return