return np.array(adv)[::-1] if __name__ == "__main__": env = UnityEnv('test.app', 0,use_visual=True) ppo = PPO(env) all_ep_r = [] t = 0 for ep in range(EP_MAX): s = env.reset() ep_r = 0 done = False while not done: t+=1 env.render() a,v = ppo.choose_action(s) s_, r, done, _ = env.step(a) ppo.buffer_s.append(s) ppo.buffer_a.append(a) ppo.buffer_r.append(r) ppo.buffer_v.append(v) ppo.buffer_done.append(done) s = s_ ep_r += r # update ppo if (t+1) % BATCH == 0: print("updating...") t = 0 v_s_ = v
def cartpole(): env = UnityEnv(environment_filename=ENV_NAME, worker_id=2, use_visual=False, multiagent=True) score_logger = ScoreLogger(ENV_NAME) agents_brain = [] agents_action = [] index_list = [] agents_alive = [] count = 0 count1 = 0 num_agents = env.number_agents print("___________Number of agents in cartpole __") print(num_agents) observation_space = env.observation_space.shape[0] action_space = env.action_space.n dqn_solver = DQNSolver(observation_space, action_space) print("__dqn solver______") print(dqn_solver) #model = tf.keras.models.load_model("") for x in range((env.number_agents)): agents_brain.append(dqn_solver) print("______agentbrain____") print(agents_brain) print("_Agent action___") print(agents_action) learning_brain = copy.deepcopy(agents_brain) run = 0 state = env.reset() initialstate = copy.deepcopy(state) while True: run += 1 env.reset() print("____________STATE____________-") print(state[0]) state = copy.deepcopy(initialstate) agents_brain = [] agents_action = [] index_list = [] agents_alive = [] count = 0 count1 = 0 num_agents = int(state[0][-5]) agents_brain = copy.deepcopy(learning_brain) print(learning_brain) print(agents_brain) print(state) #for x in range ( (env.number_agents - 1) ): step = 0 while True: step += 1 env.render() print("___________STatte Lenth_______") print(len(state)) print("______selffish___") print(state[0]) agents_action = [1] * len(state) copied_agents_alive = copy.deepcopy(agents_alive) print("__________numagents_____") for x in range(num_agents - 1): state[x] = np.reshape(state[x], [1, observation_space]) agents_action[x] = agents_brain[x].act(state[x]) print(agents_action) state_next, reward, terminal, info = env.step( agents_action, num_agents) print("_______Reward________") print(reward) print("_____________NEXT STATE LENGTH____________") print(len(state_next)) if (len(state_next) == 0): break agents_alive = state_next[0][-13:-5] num_agents = int(state_next[0][-5]) print("_______num agnets in cartpole________") print(num_agents) print("_____index list") print(index_list) print(agents_alive) agents_alive1 = np.delete(agents_alive, index_list) print("_______Alive agent list_______") print(agents_alive1) flag = False # del agents_alive[index_list[x]] for x in range(len(agents_alive)): if (agents_alive[x] == float(1)): for y in range(len(index_list)): if (index_list[y] == x): flag = True if (flag == False): index_list.append(x) flag = False index_to_remove = [] for x in range(len(agents_alive1)): if (agents_alive1[x] == float(1)): learning_brain[index_list[count]] = agents_brain[x] index_to_remove.append(x) count = count + 1 agents_brain = [ i for j, i in enumerate(agents_brain) if j not in index_to_remove ] print("____________AGENTS_BRAIN_________") print(len(agents_brain)) print("_______________Terminal_____________") print(terminal) if (terminal[0] == True): print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step)) score_logger.add_score(step, run) for x in range(len(copied_agents_alive)): learning_brain[x] = agents_brain[count1] count1 = count1 + 1 for x in range(len(learning_brain)): learning_brain[x].save(str(run) + "brain" + str(x) + ".h5") break for x in range(num_agents - 1): state[x] = np.reshape(state[x], [1, observation_space]) state_next[x] = np.reshape(state_next[x], [1, observation_space]) agents_brain[x].remember(state[x], agents_action[x], reward[x], state_next[x], terminal[x]) agents_brain[x].experience_replay() state = state_next
def cartpole(): env = UnityEnv(environment_filename=ENV_NAME, worker_id=5, use_visual=False, multiagent = True) score_logger = ScoreLogger(ENV_NAME) agents_brain = [] agents_action = [] num_agents = env.number_agents observation_space = env.observation_space.shape[0] print("____________Observation_space") print(observation_space) action_space = env.action_space.n dqn_solver = DQNSolver(observation_space, action_space) for x in range ((env.number_agents)): agents_brain.append(DQNSolver(observation_space, action_space)) print ("Length of BrainList: ",len(agents_brain)) run = 0 state = env.reset() print("______INITIAL______") print(state) #initialstate = copy.deepcopy(state) print("*****************************initial state for unity envirmonet**************") #print(initialstate) jk = 1 while True: run += 1 state = env.reset() #state = copy.deepcopy(initialstate) num_agents = int(state[0][-5]) print("_____________State _______________") print(int(state[0][12])) step = 0 print("################################This is loop################################# :" , jk) while True: step += 1 env.render() agents_action = [1] * len(state) print(state[0]) print("*******************Length of state******************") print(len(state)) for x in range(len(state)): state[x] = np.reshape(state[x], [1, observation_space]) agents_action[x] = agents_brain[int(state[x][0,12]) - 1].act(state[x]) print("Agents Actions List: ",agents_action) state_next, reward, terminal, info = env.step(agents_action) #print ("_____________STATE_NEXT___________") #print (state_next) if (len(state_next) == 0): break agents_alive = state_next[0][-13:-5] print ("Agents_alive: ", agents_alive) print ("Rewards: ",reward) num_agents = int(state_next[0][-5]) print ("Number of agents: ",num_agents) print("_________Terminal list_______" , terminal) if (terminal[0] == True): print("**************************Brain saved******************************") for x in range(len(agents_brain)): agents_brain[x].save(str(run) + "brain" + str(x) + ".h5") jk+=1 print("#####################################Loop is######################## :" , jk) #break for x in range(len(state_next)): state[x] = np.reshape(state[x], [1, observation_space]) state_next[x] = np.reshape(state_next[x], [1, observation_space]) agents_brain[int(state_next[x][0,12]) - 1].remember(state[x], agents_action[x], reward[x], state_next[x], terminal[x]) agents_brain[int(state_next[x][0,12]) - 1].experience_replay() state = state_next
def cartpole(): env = UnityEnv(environment_filename=ENV_NAME, worker_id=1, use_visual=False, multiagent=True) #score_logger = ScoreLogger(ENV_NAME) agents_brain = [] agents_action = [] pathname = "C:/HinaProgramm/testingFolder/Unity Environment" num_agents = env.number_agents print("Number of agents in enviroment : ", num_agents) observation_space = env.observation_space.shape[0] print("____________Observation_space______________") print(observation_space) print("__________Action Space________________") action_space = env.action_space.n print(action_space) dqn_solver = DQNSolver(observation_space, action_space) for x in range((num_agents)): agents_brain.append(DQNSolver(observation_space, action_space)) print("Length of BrainList: ", len(agents_brain)) run = 0 state = env.reset() #print("______INITIAL______") #print(state) initialstate = copy.deepcopy(state) #print("*****************************initial state for unity envirmonet**************") #print(initialstate) jk = 1 sharecount = 0 eatcount = 0 filecount = 0 #f = str(filecount) + "sahre.csv" f = open(str(filecount) + "sahre.csv", 'ab') #J = str(filecount) + "eat.csv" J = open(str(filecount) + "eat.csv", 'ab') while True: run += 1 env.reset() state = copy.deepcopy(initialstate) num_agents = int(state[0][-8]) print("_numagents__________", num_agents) print("_____________State _______________") print(int(state[0][12])) step = 0 print( "################################This is loop################################# :", jk) print("_____Run _______ :", run) while True: #print("************Number of agents *********") #print(env.number_agents) step += 1 env.render() agents_action = [1] * len(state) #print(state[0]) #print("*******************Length of state******************") #print(len(state)) for x in range(len(state)): state[x] = np.reshape(state[x], [1, observation_space]) agents_action[x] = agents_brain[int(state[x][0, 12]) - 1].act( state[x]) sharecount += agents_action.count(5) eatcount += agents_action.count(6) #print("Agents Actions List: ",agents_action) state_next, reward, terminal, info = env.step(agents_action) for x in range(len(agents_action)): if (agents_action[x] == 5): new = np.asarray([state_next[x]]) np.savetxt(f, new, delimiter=",") #f.write(str(state_next[x])+"\r\n") if (agents_action[x] == 6): #J.write(str(state_next[x])+"\r\n") new = np.asarray([state_next[x]]) np.savetxt(J, new, delimiter=",") print("_____________STATE_NEXT___________") print(state_next) if (len(state_next) == 0): #f.write(str(sharecount)) #J.write(str(eatcount)) #f.close() #J.close() filecount += 1 np.savetxt(f, sharecount, delimiter=",") np.savetxt(J, eatcount, delimiter=",") break agents_alive = state_next[0][-16:-8] print("Agents_alive: ", agents_alive) print("Rewards: ", reward) num_agents = int(state_next[0][-8]) print("Number of agents: ", num_agents) #print("_________Terminal list_______" , terminal) if (terminal[0] == True): print( "**************************Brain saved******************************" ) for x in range(len(agents_brain)): agents_brain[x].model.save(pathname + str(run) + "brain" + str(x) + ".h5") jk += 1 print( "#####################################Loop is######################## :", jk) #f.write(str(sharecount)) #J.write(str(eatcount)) #f.close() #J.close() filecount += 1 break for x in range(len(state_next)): state[x] = np.reshape(state[x], [1, observation_space]) state_next[x] = np.reshape(state_next[x], [1, observation_space]) agents_brain[int(state_next[x][0, 12]) - 1].remember( state[x], agents_action[x], reward[x], state_next[x], terminal[x]) agents_brain[int(state_next[x][0, 12]) - 1].experience_replay() state = state_next
class Drone_Racing(Environment): worker_id = 0 def __init__(self, platform): if platform == OSName.MAC: env_filename = EnvironmentName.DRONE_RACING_MAC.value elif platform == OSName.WINDOWS: env_filename = EnvironmentName.DRONE_RACING_WINDOWS.value else: env_filename = None self.env = UnityEnv( environment_filename=env_filename, worker_id=randrange(65536), use_visual=False, multiagent=False ).unwrapped super(Drone_Racing, self).__init__() Drone_Racing.worker_id += 1 self.action_shape = self.get_action_shape() self.action_space = self.env.action_space self.continuous = False self.skipping_state_fq = 3 self.skipping_state_index = 0 self.WIN_AND_LEARN_FINISH_SCORE = 200 def get_n_states(self): return self.env.observation_space.shape[0] def get_n_actions(self): return self.env.action_space.shape[0] def get_state_shape(self): return self.env.observation_space def get_action_shape(self): return self.env.action_space def get_action_space(self): return self.env.action_space @property def action_meanings(self): action_meanings = ["FORWARD", "BACKWARD", "RIGHT", "LEFT", "UP", "DOWN", "R_ROTATE", "L_ROTATE", "HOVER"] return action_meanings def reset(self): state = self.env.reset() return state def step(self, action): action_list = [0] * 9 if self.is_skip_phase(): action_list[8] = 1 # hover action else: action_list[action] = 1 next_state, reward, done, info = self.env.step(action_list) adjusted_reward = reward info["skipping"] = True if not self.is_skip_phase(): self.skipping_state_index = 0 info["skipping"] = False self.skipping_state_index += 1 return next_state, reward, adjusted_reward, done, info def render(self): self.env.render() def close(self): self.env.close() def is_skip_phase(self): return self.skipping_state_index != self.skipping_state_fq