def create_controls(self): src_add = self.srcTxt.GetValue() obj = stat.Stats() #print self.qlist,self.rlist qsent, qrecv, rsent, rrecv, avgRtt = obj.data_collector( src_add, self.qlist, self.rlist, self.qrlist) #print qsent,qrecv wx.StaticLine(self, -1, (25, 50), (300, 1)) wx.StaticText(self, -1, 'Queries Sent', (25, 100), style=wx.ALIGN_RIGHT) wx.StaticText(self, -1, 'Responses Received', (25, 120), style=wx.ALIGN_RIGHT) wx.StaticText(self, -1, 'Queries Received', (25, 140)) wx.StaticText(self, -1, 'Responses Sent', (25, 160)) #wx.StaticText(self, -1, '', (25, 180)) wx.StaticText(self, -1, 'Average RTT', (25, 180)) wx.StaticText(self, -1, 'Generated Msg/Recieved Msg', (25, 220)) wx.StaticText(self, -1, str(qsent), (250, 100)) wx.StaticText(self, -1, str(rrecv), (250, 120)) wx.StaticText(self, -1, str(qrecv), (250, 140)) wx.StaticText(self, -1, str(rsent), (250, 160)) wx.StaticText(self, -1, str(avgRtt), (250, 180)) wx.StaticText(self, -1, str(qsent + rsent) + '/' + str(qrecv + rrecv), (250, 220)) wx.StaticLine(self, -1, (25, 260), (300, 1)) self.Centre()
def stats(albumdir, options): """ Prints out statistics. """ import statistics stats = statistics.Stats() if options.recursive: for root, dirs, files in albumdir.walk(): if len(dirs) > 0: for dir in dirs: fp = FilePath(root, dir) if isIgnored(options, fp): continue try: stats = statistics.dirstat(fp, stats, options.verbose) except NamingMuseException, strerr: print strerr
def train(environment, agent, n_episodes=1000000, max_t=1000, solve_score=100.0): """ Run training loop. Params ====== environment: environment object agent: agent object n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode solve_score (float): criteria for considering the environment solved """ stats = statistics.Stats() stats_format = 'Buffer: {:6} NoiseW: {:.4}' for i_episode in range(1, n_episodes + 1): rewards = [] state = environment.reset() # loop over steps for t in range(max_t): # select an action if agent.evaluation_only: # disable noise on evaluation action = agent.act(state, add_noise=False) else: action = agent.act(state) # take action in environment # map from continuous actions to discrete actions, largest absolute value is the action chosen action = action.squeeze(0) action_tmp = [action[0:2], action[2:4], action[4:7], action[7:10]] env_action = [] for a in action_tmp: idx = np.argmax(np.abs( a)) # pick the largest absolute value for each agent is_negative = a[ idx] < 0 # True if the action value is negative else False env_action.append( (idx * 2) + is_negative ) # map from continuous action to discrete action env_action = np.array(env_action) #print('env_action: {}'.format(env_action)) # DEBUG action mapping #print('is_negative: {}'.format(is_negative)) #print('idx: {}'.format(idx)) #print('env_action: {}'.format(env_action)) # DEBUG playing against random agents #random_actions = np.random.random_sample(4) * 5 #env_action = random_actions # both teams take random actions #env_action = np.array((random_actions[0], env_action[1], random_actions[2], env_action[3])) # +1B only red team takes random actions #env_action = np.array((env_action[0], env_action[1], random_actions[2], env_action[3])) # +2B only red striker takes random actions #env_action = np.array((env_action[0], random_actions[1], random_actions[2], env_action[3])) # +3B red striker and blue goalie take random actions #env_action = np.array((env_action[0], random_actions[1], env_action[2], random_actions[3])) # +1R only blue team takes random actions #env_action = np.array((env_action[0], env_action[1], env_action[2], random_actions[3])) # +2R only blue striker takes random actions #env_action = np.array((random_actions[0], env_action[1], env_action[2], random_actions[3])) # +3R blue striker and red goalie take random actions #print('env_action: {}'.format(env_action)) next_state, reward, done = environment.step(env_action) # update agent with returned information agent.step(state, action, reward, next_state, done) state = next_state rewards.append(reward) if all(done): break # every episode buffer_len = len(agent.memory) per_agent_rewards = [] # calculate per agent rewards for i in range(agent.n_agents): per_agent_reward = 0 for step in rewards: per_agent_reward += step[i] per_agent_rewards.append(per_agent_reward) stats.update(t, [np.sum((per_agent_rewards[0], per_agent_rewards[2]))], [np.sum((per_agent_rewards[1], per_agent_rewards[3]))], i_episode) # track sum rewards across each team stats.print_episode( i_episode, t, stats_format, buffer_len, agent.noise_weight, agent.agents[0].critic_loss, agent.agents[1].critic_loss, agent.agents[2].critic_loss, agent.agents[3].critic_loss, agent.agents[0].actor_loss, agent.agents[1].actor_loss, agent.agents[2].actor_loss, agent.agents[3].actor_loss, agent.agents[0].noise_val, agent.agents[1].noise_val, agent.agents[2].noise_val, agent.agents[3].noise_val, per_agent_rewards[0], per_agent_rewards[1], per_agent_rewards[2], per_agent_rewards[3]) # every epoch (100 episodes) if i_episode % 100 == 0: stats.print_epoch(i_episode, stats_format, buffer_len, agent.noise_weight) save_name = 'checkpoints/episode.{}.'.format(i_episode) for i, save_agent in enumerate(agent.agents): torch.save(save_agent.actor_local.state_dict(), save_name + str(i) + '.actor.pth') torch.save(save_agent.critic_local.state_dict(), save_name + str(i) + '.critic.pth') # if solved if stats.is_solved(i_episode, solve_score): stats.print_solve(i_episode, stats_format, buffer_len, agent.noise_weight) save_name = 'checkpoints/solved.' for i, save_agent in enumerate(agent.agents): torch.save(save_agent.actor_local.state_dict(), save_name + str(i) + '.actor.pth') torch.save(save_agent.critic_local.state_dict(), save_name + str(i) + '.critic.pth') break
def train(environment, agent, n_episodes=10000, max_t=1000, solve_score=0.5): """ Run training loop. Params ====== environment: environment object agent: agent object n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode solve_score (float): criteria for considering the environment solved """ stats = statistics.Stats() stats_format = 'Buffer: {:6} NoiseW: {:.4}' for i_episode in range(1, n_episodes + 1): rewards = [] state = environment.reset() # loop over steps for t in range(max_t): # select an action if agent.evaluation_only: # disable noise on evaluation action = agent.act(state, add_noise=False) else: action = agent.act(state) # take action in environment next_state, reward, done = environment.step(action) # update agent with returned information agent.step(state, action, reward, next_state, done) state = next_state rewards.append(reward) if any(done): break # every episode buffer_len = len(agent.memory) per_agent_rewards = [] # calculate per agent rewards for i in range(agent.n_agents): per_agent_reward = 0 for step in rewards: per_agent_reward += step[i] per_agent_rewards.append(per_agent_reward) stats.update(t, [np.max(per_agent_rewards)], i_episode) # use max over all agents as episode reward stats.print_episode( i_episode, t, stats_format, buffer_len, agent.noise_weight, agent.agents[0].critic_loss, agent.agents[1].critic_loss, agent.agents[0].actor_loss, agent.agents[1].actor_loss, agent.agents[0].noise_val, agent.agents[1].noise_val, per_agent_rewards[0], per_agent_rewards[1]) # every epoch (100 episodes) if i_episode % 100 == 0: stats.print_epoch(i_episode, stats_format, buffer_len, agent.noise_weight) save_name = 'checkpoints/episode.{}.'.format(i_episode) for i, save_agent in enumerate(agent.agents): torch.save(save_agent.actor_local.state_dict(), save_name + str(i) + '.actor.pth') torch.save(save_agent.critic_local.state_dict(), save_name + str(i) + '.critic.pth') # if solved if stats.is_solved(i_episode, solve_score): stats.print_solve(i_episode, stats_format, buffer_len, agent.noise_weight) save_name = 'checkpoints/solved.' for i, save_agent in enumerate(agent.agents): torch.save(save_agent.actor_local.state_dict(), save_name + str(i) + '.actor.pth') torch.save(save_agent.critic_local.state_dict(), save_name + str(i) + '.critic.pth') break
def train(environment, agent, n_episodes=1000, max_t=1000, solve_score=30.0, graph_when_done=True): """ Run training loop for DQN. Params ====== environment: environment object agent: agent object n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode solve_score (float): criteria for considering the environment solved graph_when_done (bool): whether to show matplotlib graphs of the training run """ stats = statistics.Stats() for i_episode in range(1, n_episodes + 1): rewards = [] state = environment.reset() # loop over steps for t in range(max_t): # select an action action = agent.act(state) # take action in environment next_state, reward, done = environment.step(action) # update agent with returned information agent.step(state, action, reward, next_state, done) state = next_state rewards.append(reward) if any(done): break # every episode buffer_len = len(agent.memory) per_agent_rewards = [] # calculate per agent rewards for i in range(agent.n_agents): per_agent_reward = 0 for step in rewards: per_agent_reward += step[i] per_agent_rewards.append(per_agent_reward) stats.update(t, [np.mean(per_agent_rewards)], i_episode) stats.print_episode(i_episode, agent.alpha, buffer_len, t) # every epoch (100 episodes) if i_episode % 100 == 0: stats.print_epoch(i_episode, agent.alpha, buffer_len) save_name = 'checkpoints/episode.{}'.format(i_episode) torch.save(agent.actor_local.state_dict(), save_name + '.actor.pth') torch.save(agent.critic_local.state_dict(), save_name + '.critic.pth') # if solved if stats.is_solved(i_episode, solve_score): stats.print_solve(i_episode, agent.alpha, buffer_len) torch.save(agent.actor_local.state_dict(), 'checkpoints/solved.actor.pth') torch.save(agent.critic_local.state_dict(), 'checkpoints/solved.critic.pth') break # training finished if graph_when_done: stats.plot(agent.loss_list)
def train(PATH, environment, agent, timestamp, n_episodes=10000, max_t=1000, score_threshold=0.5): """Train with MADDPG.""" start = time.time() total_scores = deque(maxlen=100) stats = statistics.Stats() stats_format = "Buffer: {:6} NoiseW: {:.4}" for i_episode in range(1, n_episodes + 1): scores = [] states = environment.reset() for t in range(max_t): if agent.evaluation_only: action = agent.act(states, add_noise=False) else: action = agent.act(states) actions = agent.act(states) next_states, rewards, dones = environment.step(actions) agent.step(states, actions, rewards, next_states, dones) states = next_states scores.append(rewards) if np.any(dones): break buffer_len = len(agent.memory) per_agent_rewards = [] for i in range(agent.num_agents): per_agent_reward = 0 for step in scores: per_agent_reward += step[i] per_agent_rewards.append(per_agent_reward) stats.update(t, [np.max(per_agent_rewards)], i_episode) # use max over all agents as episode reward stats.print_episode( i_episode, t, stats_format, buffer_len, agent.noise_weight, agent.agents[0].critic_loss, agent.agents[1].critic_loss, agent.agents[0].actor_loss, agent.agents[1].actor_loss, agent.agents[0].noise_val, agent.agents[1].noise_val, per_agent_rewards[0], per_agent_rewards[1]) if i_episode % 500 == 0: stats.print_epoch(i_episode, stats_format, buffer_len, agent.noise_weight) save_name = f"../results/{timestamp}_episode_{i_episode}" for i, save_agent in enumerate(agent.agents): torch.save(save_agent.actor.state_dict(), save_name + f"A{i}_actor.pth") torch.save(save_agent.critic.state_dict(), save_name + f"A{i}_critic.pth") # if total_average_score>score_threshold: if stats.is_solved(i_episode, score_threshold): stats.print_solve(i_episode, stats_format, buffer_len, agent.noise_weight) save_name = f"../results/{timestamp}_solved_episode_{i_episode}" for i, save_agent in enumerate(agent.agents): torch.save(save_agent.actor.state_dict(), save_name + f"A{i}_actor.pth") torch.save(save_agent.critic.state_dict(), save_name + f"A{i}_critic.pth") break
def train(environment, agent, n_episodes=2500, max_t=1000, solve_score=0.5, best_margin=0.35, notebook=False): """ Run training loop. Params ====== environment: environment object agent: agent object n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode solve_score (float): criteria for considering the environment solved best_margin (float): criteria for stopping after best result (above solve_score) notebook (boolean): flag for returning scores for plotting in notebook """ stat = statistics.Stats() stats_format = 'Buffer: {:6} NoiseW: {:.4}' solved_flag = False for i_episode in range(1, n_episodes + 1): rewards = [] state = environment.reset() # loop over steps for t in range(max_t): # select an action action = agent.act( state) # if evalution noise will not be used by agent # take action in environment next_state, reward, done = environment.step(action) # update agent with returned information agent.step(state, action, reward, next_state, done) state = next_state rewards.append(reward) if any(done): break # every episode buffer_len = len(agent.memory) per_agent_rewards = [] # calculate per agent rewards for i in range(agent.n_agents): per_agent_reward = 0 for step in rewards: per_agent_reward += step[i] per_agent_rewards.append(per_agent_reward) stat.update(t, [np.max(per_agent_rewards)], i_episode) # use max over all agents as episode reward stat.print_episode( i_episode, t, stats_format, buffer_len, agent.noise_weight, agent.agents[0].critic_loss, agent.agents[1].critic_loss, agent.agents[0].actor_loss, agent.agents[1].actor_loss, agent.agents[0].noise_val, agent.agents[1].noise_val, per_agent_rewards[0], per_agent_rewards[1]) # every epoch (100 episodes) if i_episode % 100 == 0: stat.print_epoch(i_episode, stats_format, buffer_len, agent.noise_weight) save_name = 'checkpoints/episode.{}.'.format(i_episode) for i, save_agent in enumerate(agent.agents): torch.save(save_agent.actor_local.state_dict(), save_name + str(i) + '.actor.pth') torch.save(save_agent.critic_local.state_dict(), save_name + str(i) + '.critic.pth') # if solved if stat.is_solved(i_episode, solve_score) and not solved_flag: solved_flag = True best = False stat.print_solve(i_episode, stats_format, best, buffer_len, agent.noise_weight) save_name = 'checkpoints/solved.' for i, save_agent in enumerate(agent.agents): torch.save(save_agent.actor_local.state_dict(), save_name + str(i) + '.actor.pth') torch.save(save_agent.critic_local.state_dict(), save_name + str(i) + '.critic.pth') elif stat.is_solved(i_episode, solve_score + best_margin): best = True stat.print_solve(i_episode, stats_format, best, buffer_len, agent.noise_weight) save_name = 'checkpoints/solved_best.' for i, save_agent in enumerate(agent.agents): torch.save(save_agent.actor_local.state_dict(), save_name + str(i) + '.actor.pth') torch.save(save_agent.critic_local.state_dict(), save_name + str(i) + '.critic.pth') if notebook: return stat else: break