def __init__(self): # print("Initialising") m= "Test,10,10,4,0,4,9,1111211111100000000111100110011000000001100001011111011000111000000001100101010110000000011111311111" #"Test,10,10,4,0,4,9,1111211111100000000111100110011000000001100001011111011000111000000001100101010110000000011111311111" #"Test,6,6,4,5,1,0,131111100051105001100051150001111121" #"Test,10,10,4,0,4,9,1111211151100000000111100110011000000001100001511111011000111000000001100501050110000000011111311111" #"Test,4,4,2,0,2,3,1125100110011531" MazeGenerator() self.maze= Maze(m) self.s=Simulator(self.maze) self.span=6 self.number=2 self.pList=[] self.stateList=[] self.finishedP=[] self.history=m+"|"+"0" self.finished=0 self.count=0 for j in range(self.number): p=Worker(self.maze) self.s.add(p) self.pList.append(p) state= np.asarray(p.getView(p.getPos(),self.span)) self.stateList.append(state) self.history+="#"+p.getName()+"-"+p.getPos().CordToString() self.history+="|" action_space=[] for i in range(0,len(Action)): action_space.append(i) self.action_space_worker=np.asarray(action_space) self.observation_space_worker= math.pow(2*self.span+1,2) self.shortestRoute=len(self.maze.GetOptimalRoute()[0]) self.maze.printMaze() self.dqn_solver_worker = DQNSolver(int(self.observation_space_worker), len(self.action_space_worker)) self.reward=0
def __init__(self): # print("Initialising") m = "Test,6,6,4,5,1,0,131111100051105001100051150001111121" #"Test,10,10,4,0,4,9,1111211151100000000111100110011000000001100001511111011000111000000001100501050110000000011111311111" #"Test,4,4,2,0,2,3,1125100110011531" MazeGenerator() self.maze = Maze(m) self.s = Simulator(self.maze) self.span = 6 self.number = 2 self.pList = [] self.stateList = [] self.finishedP = [] self.history = m + "|" + "0" self.finished = 0 for j in range(self.number): p = Worker(self.maze) self.s.add(p) self.pList.append(p) state = np.asarray(p.getView(p.getPos(), self.span)) self.stateList.append(state) self.history += "#" + p.getName() + "-" + p.getPos().CordToString() self.history += "|" action_space = [] for i in range(0, len(Action)): action_space.append(i) import itertools possible_actions = [action_space] * self.number print(action_space) self.action_space = np.asarray( list(itertools.product(*possible_actions))) #print(self.action_space) #self.action_space=np.asarray(action_space) self.observation_space = math.pow(2 * self.span + 1, 2) * self.number self.shortestRoute = len(self.maze.GetOptimalRoute()[0]) self.maze.printMaze()
def resetNewMaze(self): m = MazeGenerator() self.maze = Maze(m) self.s = Simulator(self.maze) self.pList = [] self.stateList = [] self.history = m + "|" + "0" self.finished = 0 for j in range(self.number): p = Worker(self.maze) self.pList.append(p) self.s.add(p) state = np.asarray(p.getView(p.getPos(), self.span)) self.stateList.append(state) self.history += "#" + p.getName() + "-" + p.getPos().CordToString() self.history += "|" self.shortestRoute = len(self.maze.GetOptimalRoute()[0]) self.maze.printMaze() return self.stateList
def resetNewMaze(self): mazesizeh = self.maze.height + 10 mazesizew = self.maze.width + 10 if (self.maze.height == 5): mazesizeh = 10 mazesizew = 10 m = MazeGenerator(mazesizeh, mazesizew) #"Test,10,10,4,0,4,9,1111211111100000000111100110011000000001100001011111011000111000000001100101010110000000011111311111" #MazeGenerator() self.maze = Maze(m) self.s = Simulator(self.maze) self.pList = [] self.qList = [] self.mList = [] self.pPos = [] self.qPos = [] self.mPos = [] self.pStateList = [] self.qStateList = [] self.mStateList = [] self.history = m + "|" + "0" self.finished = 0 self.finishedP = [] self.finishedQ = [] self.count = 0 self.eaten = 0 self.queenEaten = False self.queenLeft = False #rewards self.wReward_not_possible = -50 self.wReward_wall = -50 self.wReward_entrance = -20 self.wReward_finished_before = 0 self.wReward_exit = self.maze.height * self.maze.width * 20 self.wReward_towards_exit = -1 self.wReward_toQueen = -1 self.wReward_atQueen = self.maze.height * self.maze.width * 20 self.wReward_repeat_pos = -20 self.wReward_else = -3 self.qReward_not_possible = -50 self.qReward_wall = -50 self.qReward_entrance = -20 self.qReward_finished_before = 0 self.qReward_exit = self.maze.height * self.maze.width * 20 self.qReward_towards_exit = -1 self.qReward_repeat_pos = -20 self.qReward_else = -3 self.sReward_not_possible = -50 self.sReward_wall = -50 self.sReward_eat = 1000 self.sReward_eatQueen = 5000 self.sReward_towards_prey = -1 self.sReward_repeat_pos = -20 self.sReward_else = -3 #for book-keeping #self.wNumber=self.wNumber+1 self.folderName = "Span_" + str(self.spanP) + "Dim_" + str( self.maze.height ) + "_" + str( self.maze.width ) #m+"_maze_"+str(self.wNumber)+"_workers_"+str(self.spanP)+"_span_"+str(self.wReward_not_possible)+"_not poss_"+str(self.wReward_wall)+"_wall_"+str(self.wReward_entrance)+"_ent_"+str(self.wReward_finished_before) +"_finished already_"+str(self.wReward_exit)+"_exit_"+str(self.wReward_towards_exit)+"_to exit_"+str(self.wReward_repeat_pos) +"_rep pos_"+str(self.wReward_else)+"_else " self.maxIter = 10 * self.maze.height * self.maze.width self.completeStop = False if (self.wReward_exit == 0): self.completeStop = True for j in range(self.wNumber): p = Worker(self.maze, self.spanP) self.s.add(p) self.pList.append(p) self.pPos.append(p.getPos()) for j in range(self.qNumber): q = Queen(self.maze) self.s.add(q) self.qList.append(q) self.qPos.append(q.getPos()) #self.history+="|" action_space = [] for j in range(self.sNumber): s = Spider(self.maze) self.s.add(s) self.mList.append(s) self.mPos.append(s.getPos()) for j in range(self.wNumber): state = np.asarray( p.getAugView(p.getPos(), self.spanP, self.pPos, self.qPos, self.mPos)) self.pStateList.append(state) self.history += "#" + p.getName() + "-" + p.getPos().CordToString() for j in range(self.qNumber): #state= np.asarray(q.getView()) #self.qStateList.append(state) self.history += "#" + q.getName() + "-" + q.getPos().CordToString() for j in range(self.sNumber): state = np.asarray( s.getAugView(s.getPos(), self.spanM, self.pPos, self.qPos, self.mPos)) self.mStateList.append(state) self.history += "#" + s.getName() + "-" + s.getPos().CordToString( ) #fix writeup self.history += "|" self.shortestRoute = len(self.maze.GetOptimalRoute()[0]) self.maze.printMaze() self.shortestRoute = len(self.maze.GetOptimalRoute()[0]) self.dqn_solver_worker = DQNSolver(int(self.observation_space_worker), len(self.action_space_worker)) self.dqn_solver_queen = DQNSolver(int(self.observation_space_queen), len(self.action_space_queen)) self.dqn_solver_spider = DQNSolver(int(self.observation_space_spider), len(self.action_space_spider)) self.pReward = 0 self.qReward = 0 self.mReward = 0 return self.pStateList
def reset(self): #print("Resetting") self.maze = Maze(self.maze.mazeString) self.pList = [] self.qList = [] self.mList = [] self.pPos = [] self.qPos = [] self.mPos = [] self.pStateList = [] self.mStateList = [] self.qStateList = [] self.finishedP = [] self.finishedQ = [] self.history = self.maze.mazeString + "|" + "0" self.finished = 0 self.finishedP = [] self.count = 0 self.queenLeft = False self.maxIter = 10 * self.maze.height * self.maze.width for j in range(self.wNumber): p = Worker(self.maze, self.spanP) self.pList.append(p) for k in range(self.qNumber): q = Queen(self.maze) self.qList.append(q) for h in range(self.sNumber): s = Spider(self.maze) self.mList.append(s) #print(len(self.pList),len(self.qList),len(self.mList)) self.eaten = 0 self.queenEaten = False for p in self.pList: p.setInitPos(Cord(self.maze.getInitialX(), self.maze.getInitialY())) self.pPos.append(p.getPos()) for q in self.qList: q.setInitPos(Cord(self.maze.getInitialX(), self.maze.getInitialY())) self.qPos.append(q.getPos()) for q in self.mList: q.setInitPos(Cord(q.start.X, q.start.Y)) self.mPos.append(q.getPos()) for p in self.pList: state = np.asarray( p.getAugView(p.getPos(), self.spanP, self.pPos, self.qPos, self.mPos)) self.pStateList.append(state) self.history += "#" + p.getName() + "-" + p.getPos().CordToString() for q in self.qList: #state=np.asarray(p.getAugView(p.getPos(),self.spanP,self.pPos, self.qPos, self.mPos)) #self.pStateList.append(state) self.history += "#" + q.getName() + "-" + q.getPos().CordToString() for q in self.mList: state = np.asarray( q.getAugView(q.getPos(), self.spanM, self.pPos, self.qPos, self.mPos)) self.mStateList.append(state) self.history += "#" + q.getName() + "-" + q.getPos().CordToString() self.history += "|" self.shortestRoute = len(self.maze.GetOptimalRoute()[0]) self.pReward = 0 self.qReward = 0 self.mReward = 0 #print(self.maze.mazeString) #self.maze.printMaze() return self.pStateList
def __init__(self): # print("Initialising") m = MazeGenerator( 10, 10 ) #"Test,10,10,4,0,4,9,1111211111100000000111100110011000000001100001011111011000111000000001100101010110000000011111311111" #"Test,10,10,4,0,4,9,1111211111100000000111100110011000000001100001011111011000111000000001100101010110000000011111311111" #"Test,6,6,4,5,1,0,131111100051105001100051150001111121" #"Test,10,10,4,0,4,9,1111211151100000000111100110011000000001100001511111011000111000000001100501050110000000011111311111" #"Test,4,4,2,0,2,3,1125100110011531" MazeGenerator() self.maze = Maze(m) self.s = Simulator(self.maze) self.spanP = 5 self.spanM = 5 self.wNumber = 5 self.qNumber = 0 self.sNumber = 0 self.pList = [] self.qList = [] self.mList = [] self.pPos = [] self.qPos = [] self.mPos = [] self.pStateList = [] self.qStateList = [] self.mStateList = [] self.finishedP = [] self.finishedQ = [] self.history = m + "|" + "0" self.finished = 0 self.count = 0 self.eaten = 0 self.queenEaten = False self.queenLeft = False #rewards self.wReward_not_possible = -50 self.wReward_wall = -50 self.wReward_entrance = -20 self.wReward_finished_before = 0 self.wReward_exit = self.maze.height * self.maze.width * 20 self.wReward_towards_exit = -1 self.wReward_toQueen = -1 self.wReward_atQueen = self.maze.height * self.maze.width * 20 self.wReward_repeat_pos = -20 self.wReward_else = -3 self.wReward_queenEaten = -5000 self.qReward_not_possible = -50 self.qReward_wall = -50 self.qReward_entrance = -20 self.qReward_finished_before = 0 self.qReward_exit = self.maze.height * self.maze.width * 20 self.qReward_towards_exit = -1 self.qReward_repeat_pos = -20 self.qReward_else = -3 self.sReward_not_possible = -50 self.sReward_wall = -50 self.sReward_eat = 1000 self.sReward_eatQueen = 5000 self.sReward_towards_prey = -1 self.sReward_repeat_pos = -20 self.sReward_else = -3 #for book-keeping/need to update with spiders self.folderName = "Span_" + str(self.spanP) + "Dim_" + str( self.maze.height) + "_" + str(self.maze.width) #m+"_maze_"+str(self.wNumber)+"_workers_"+str(self.spanP)+"_span_"+str(self.wReward_not_possible)+"_not poss_"+str(self.wReward_wall)+"_wall_"+str(self.wReward_entrance)+"_ent_"+str(self.wReward_finished_before) +"_finished already_"+str(self.wReward_exit)+"_exit_"+str(self.wReward_towards_exit)+"_to exit_"+str(self.wReward_repeat_pos) +"_rep pos_"+str(self.wReward_else)+"_else " #### self.maxIter = 10 * self.maze.height * self.maze.width self.completeStop = False for j in range(self.wNumber): p = Worker(self.maze, self.spanP) self.s.add(p) self.pList.append(p) self.pPos.append(p.getPos()) for j in range(self.qNumber): q = Queen(self.maze) self.s.add(q) self.qList.append(q) self.qPos.append(q.getPos()) #self.history+="|" action_space = [] for j in range(self.sNumber): s = Spider(self.maze) self.s.add(s) self.mList.append(s) self.mPos.append(s.getPos()) for j in range(self.wNumber): state = np.asarray( p.getAugView(p.getPos(), self.spanP, self.pPos, self.qPos, self.mPos)) self.pStateList.append(state) self.history += "#" + p.getName() + "-" + p.getPos().CordToString() for j in range(self.qNumber): #state= np.asarray(q.getView()) #self.qStateList.append(state) self.history += "#" + q.getName() + "-" + q.getPos().CordToString() for j in range(self.sNumber): state = np.asarray( s.getAugView(s.getPos(), self.spanM, self.pPos, self.qPos, self.mPos)) self.mStateList.append(state) self.history += "#" + s.getName() + "-" + s.getPos().CordToString( ) #fix writeup self.history += "|" action_space = [] for i in range(0, len(Action)): action_space.append(i) self.action_space_worker = np.asarray(action_space) self.observation_space_worker = math.pow(2 * self.spanP + 1, 2) self.action_space_queen = np.asarray(action_space) self.observation_space_queen = (self.maze.height + 2 * self.spanP) * ( self.maze.width + 2 * self.spanP) #math.pow(2*self.spanP+1,2) self.action_space_spider = np.asarray(action_space) self.observation_space_spider = math.pow(2 * self.spanM + 1, 2) self.shortestRoute = len(self.maze.GetOptimalRoute()[0]) self.maze.printMaze() self.dqn_solver_worker = DQNSolver(int(self.observation_space_worker), len(self.action_space_worker)) self.dqn_solver_queen = DQNSolver(int(self.observation_space_queen), len(self.action_space_queen)) self.dqn_solver_spider = DQNSolver(int(self.observation_space_spider), len(self.action_space_spider)) self.pReward = 0 self.qReward = 0 self.mReward = 0