def simulate_training_episodes(agent, environment, episodes=10000, max_timesteps=1000, visual_evaluation_frequency=0, evaluate_trained_agent=False): utility.print_line() print("Simulating {} training episode(s) for a maximum of {} timestep(s) each".format(episodes, max_timesteps)) utility.print_line() for i in range(episodes): total_reward = 0 state = environment.reset() for t in range(max_timesteps): action = agent.determine_action(state) next_state, reward, done = environment.step(action) agent.step(state, action, reward, next_state, done) total_reward += reward state = next_state if done: break print("Episode: {:>5}".format(i + 1), sep=" ", end="", flush=True) print(" | Total timesteps: {:>4}".format(t + 1), sep=" ", end="", flush=True) print(" | Total reward gained: {:>5}".format(total_reward), sep=" ", end="", flush=True) print(" | Episode ended: {}".format(done)) if visual_evaluation_frequency and (i + 1) % visual_evaluation_frequency == 0: print() print("Visually evaluating agent after episode {}".format(i + 1)) simulate_visual_test_episode(agent, environment, verbose=True) if evaluate_trained_agent: print() print("Evaluating trained agent") simulate_test_episodes(agent, environment)
def simulate_visual_test_episode(agent, environment): utility.print_line() print("Simulating a visual test episode") utility.print_line() total_reward = 0 total_time_steps = 0 observation = environment.reset() done = False while not done: environment.render() time.sleep(0.05) action = agent.select_action(observation) next_observation, reward, done, _ = environment.step(action) total_reward += reward total_time_steps += 1 observation = next_observation print("Total time steps: {:>4}".format(total_time_steps), sep=" ", end="", flush=True) print(" | Total reward gained: {:>5}".format(total_reward)) print()
def simulate_training_experiments(agent_type, environment, experiments=10000, timesteps=10000, verbose=False, output_path="./output"): utility.print_line() print("Simulating {} training experiment(s) of {} timestep(s) each".format(experiments, timesteps)) utility.print_line() state_space_size = environment.compute_state_space_size() action_space_size = environment.compute_action_space_size() mean_rewards = np.zeros(timesteps) mean_starting_state_max_q_values = np.zeros(timesteps) mean_normalized_entropies = np.zeros(timesteps) starting_state = environment.reset() for i in range(experiments): if verbose: print("Simulating experiment: {:>5}/{}".format(i + 1, experiments)) state_visits = np.zeros(state_space_size) agent = utility.create_agent(agent_type, state_space_size, action_space_size) state = environment.reset() for t in range(timesteps): action = agent.determine_action(state) next_state, reward, done = environment.step(action) agent.step(state, action, reward, next_state, done) # For the current timestep, compute the incremental means of the rewards, the # starting state max q values, and the normalized entropies over the current experiment. mean_rewards[t] += (reward - mean_rewards[t]) / (i + 1) mean_starting_state_max_q_values[t] += (agent.compute_max_q_value(starting_state) - mean_starting_state_max_q_values[t]) / (i + 1) state_visits[next_state] += 1 probabilities = state_visits / np.sum(state_visits) normalized_entropy = -np.nansum(probabilities * np.log2(probabilities)) / np.log2(state_space_size) mean_normalized_entropies[t] += (normalized_entropy - mean_normalized_entropies[t]) / (i + 1) state = next_state if done: state = environment.reset() # Create dataframes and store the data in the given output path. environment_type = utility.determine_environment_type(environment) data = pd.DataFrame() data["mean_reward"] = mean_rewards data["mean_starting_state_max_q_value"] = mean_starting_state_max_q_values data["mean_normalized_entropy"] = mean_normalized_entropies meta_data = pd.DataFrame() meta_data["timesteps"] = [timesteps] meta_data["experiments"] = [experiments] meta_data["agent_type"] = [agent_type] meta_data["environment_type"] = [environment_type] meta_data["grid_dimension_size"] = [environment.grid.shape[0]] utility.save_dataframe(data, output_path, utility.join_strings("-", "training-experiments", agent_type, environment_type)) utility.save_dataframe(meta_data, output_path, utility.join_strings("-", "training-experiments", agent_type, environment_type, "meta"))
def handle(self): print "New Connection from: " + self.client_address[0], ', on port: ' , self.client_address[1] # M1 i = 0 while(i < 4): utility.print_user_log(self.client_address, "KEY ESTABLISHMENT PROTOCOL") self.key_est_result = self.key_establishment_protocol() if self.key_est_result is None: i = i + 1 utility.print_user_log(self.client_address, "Errors during protocol number: %d" %i) fail = utility.pack_message('FAIL') ret = utility.send_data(self.request, fail) if ret is False: utility.print_user_log(self.client_address,"[ERROR] Error during sending data ") return None #self.request.close() elif self.key_est_result == 201: #utility.print_user_log(self.client_address, 'DISC') utility.print_user_log(self.client_address, "Client Disconnected") return None elif self.key_est_result == 203: i = i + 1 utility.print_user_log(self.client_address, "Errors during protocol number: %d" %i) fail = utility.pack_message('COPY') ret = utility.send_data(self.request, fail) else: utility.print_user_log(self.client_address,"Key establishment protocol has done correctly \n") i = 0 break if (i >= 2): utility.print_user_log(self.client_address, "To many errors! Disconnecting...") return None # after return, finish() will be called while(True): utility.print_line() utility.print_user_log(self.client_address, "Listening for requests...") data = utility.recv_data(self.request, 0) #print data if not data or data is None: utility.print_user_log(self.client_address, 'Client Disconnected' ) break # after return, finish() will be called data_to_send = self.manage_request(data) if data_to_send[0] is False: utility.print_user_log(self.client_address,"[ERROR] Unable to manage the request ") ret = utility.send_data(self.request, "FAIL") if ret is False: utility.print_user_log(self.client_address,"[ERROR] Error during sending data ") else: #print data_to_send[1] ret = utility.send_data(self.request, data_to_send[1]) if ret is False: utility.print_user_log(self.client_address,"[ERROR] Error during sending data ")
def finish(self): #utility.print_user_log(self.client_address,"Finish function" ) utility.print_user_log(self.client_address,"Cleaning the connection..." ) self.request.close() if hasattr(self, 'session_key'): utility.print_user_log(self.client_address, "Deleting secret information...") del self.session_key #del self.client_key self.database.disconnect() del self.database utility.print_user_log(self.client_address,"Cleaning completed" ) utility.print_line()
def simulate_training_timesteps(agent, environment, timesteps=10000, verbose=False, output_path="./output"): utility.print_line() print("Simulating {} training timestep(s)".format(timesteps)) utility.print_line() rewards = [] max_q_values = [] starting_state_max_q_values = [] starting_state = environment.reset() state = environment.reset() for t in range(timesteps): if verbose: print("Simulating timestep: {:>5}/{}".format(t, timesteps)) action = agent.determine_action(state) next_state, reward, done = environment.step(action) agent.step(state, action, reward, next_state, done) rewards.append(reward) max_q_values.append(agent.compute_max_q_value(state)) starting_state_max_q_values.append(agent.compute_max_q_value(starting_state)) state = next_state if done: state = environment.reset() # Create dataframes and store the data in the given output path. environment_type = utility.determine_environment_type(environment) agent_type = utility.determine_agent_type(agent) grid_dimension_size = environment.grid.shape[0] data = pd.DataFrame() data["reward"] = rewards data["max_q_value"] = max_q_values data["starting_state_max_q_value"] = starting_state_max_q_values meta_data = pd.DataFrame() meta_data["timesteps"] = [timesteps] meta_data["agent_type"] = [agent_type] meta_data["environment_type"] = [environment_type] meta_data["grid_dimension_size"] = [grid_dimension_size] utility.save_dataframe(data, output_path, utility.join_strings("-", "training-timesteps", agent_type, environment_type)) utility.save_dataframe(meta_data, output_path, utility.join_strings("-", "training-timesteps", agent_type, environment_type, "meta"))
def simulate_training_experiments(algorithm_name, environment, experiments, episodes): utility.print_line() print("Simulating {} training experiment(s) of {} episode(s) each".format( experiments, episodes)) utility.print_line() observation_space_size, action_space_size = utility.compute_environment_space_sizes( environment) experiment_total_rewards = np.zeros((experiments, episodes)) for i in range(experiments): print("Simulating experiment: {:>5}/{}".format(i + 1, experiments)) utility.control_randomness(i, environment) agent = utility.create_agent(algorithm_name, observation_space_size, action_space_size) experiment_total_rewards[i] = simulate_training_episodes( agent, environment, episodes) return experiment_total_rewards
def simulate_training_episodes(agent, environment, episodes, visual_evaluation_frequency=0, verbose=False): utility.print_line() print("Simulating {} training episode(s)".format(episodes)) utility.print_line() episode_total_rewards = np.zeros(episodes) for i in range(episodes): total_reward = 0 total_time_steps = 0 observation = environment.reset() done = False while not done: action = agent.select_action(observation) next_observation, reward, done, _ = environment.step(action) agent.step(observation, action, reward, next_observation, done) total_reward += reward total_time_steps += 1 observation = next_observation episode_total_rewards[i] = total_reward if verbose: print("Episode: {:>5}".format(i + 1), sep=" ", end="", flush=True) print(" | Total time steps: {:>4}".format(total_time_steps), sep=" ", end="", flush=True) print(" | Total reward gained: {:>5}".format(total_reward)) if visual_evaluation_frequency and ( i + 1) % visual_evaluation_frequency == 0: print() print("Visually evaluating agent after episode {}".format(i + 1)) simulate_visual_test_episode(agent, environment) return episode_total_rewards
def simulate_test_episodes(agent, environment, episodes=10000): utility.print_line() print("Simulating {} test episode(s)".format(episodes)) utility.print_line() gamma = agent.gamma total_rewards = [] discounted_returns = [] for i in range(episodes): total_reward = 0 discounted_return_coefficient = 0 state = environment.reset() done = False t = 0 while not done: action = agent.determine_action(state) next_state, reward, done = environment.step(action) total_reward += reward discounted_return_coefficient += gamma ** t discounted_returns.append(reward * discounted_return_coefficient) state = next_state t += 1 total_rewards.append(total_reward) # Print the data in the console. environment_type = utility.determine_environment_type(environment) agent_type = utility.determine_agent_type(agent) grid_dimension_size = environment.grid.shape[0] print("Total timesteps: {}".format(len(discounted_returns)), sep=" ", end="", flush=True) print(" | Agent type: {}".format(agent_type), sep=" ", end="", flush=True) print(" | Environment type: {}".format(environment_type), sep=" ", end="", flush=True) print(" | Grid dimension size: {}".format(grid_dimension_size), sep=" ", end="", flush=True) print(" | Discount factor: {}".format(gamma)) print("Mean total reward over all episodes: {}".format(np.mean(total_rewards))) print("Mean discounted return over all timesteps: {}".format(np.mean(discounted_returns)))
def simulate_visual_test_episode(agent, environment, max_timesteps=40, verbose=False): utility.print_line() print("Simulating a visual test episode for a maximum of {} timestep(s)".format(max_timesteps)) utility.print_line() plt.ion() total_reward = 0 state = environment.reset() for t in range(max_timesteps): if verbose: print("Timestep: {:>4}".format(t), sep=" ", end="", flush=True) print(" | Agent's cell: {}".format(environment.agent_cell), sep=" ", end="", flush=True) utility.visualize_grid(agent, environment) plt.pause(2.5) action = agent.determine_action(state) next_state, reward, done = environment.step(action) if verbose: print(" | Action taken: {:>7}".format(environment.agent_action), sep=" ", end="", flush=True) print(" | Reward given: {:>3}".format(reward)) total_reward += reward state = next_state if done: break print("Total timesteps: {}".format(t + 1), sep=" ", end="", flush=True) print(" | Total reward gained: {}".format(total_reward), sep=" ", end="", flush=True) print(" | Episode ended: {}".format(done)) print() plt.ioff() plt.close()
def summary(self): print() print("ADVERSARIAL") utility.print_line() self._adversarial.summary() print() print("DISCRIMINATOR") utility.print_line() self._discriminator.summary() print() print("GENERATOR") utility.print_line() self._generator.summary()
def train(self, images, epochs, batch_size, saving_frequency, output_path): batches = int(images.shape[0] / batch_size) training_generator = self._data_generator.flow(images, batch_size=int( batch_size / 2)) discriminator_history_real = [] discriminator_history_fake = [] generator_history = [] for epoch in range(1, epochs + 1): discriminator_statistics_real = [] discriminator_statistics_fake = [] generator_statistics = [] for _ in range(batches): # Select a mini batch of real images randomly, with size half of batch size. Account for the # case where the size of images is not divisible by batch size. real_images = training_generator.next() if real_images.shape[0] != int(batch_size / 2): real_images = training_generator.next() real_labels = np.ones((int(batch_size / 2), 1)) # Generate fake images from noise, with size half of batch size. noise = np.random.normal(0, 1, (int(batch_size / 2), 100)) fake_images = self._generator.predict(noise) fake_labels = np.zeros((int(batch_size / 2), 1)) # Train the discriminator. discriminator_statistics_real.append( self._discriminator.train_on_batch(real_images, real_labels)) discriminator_statistics_fake.append( self._discriminator.train_on_batch(fake_images, fake_labels)) # Sample data points from the noise distribution, with size of batch size and create # real labels for them. noise = np.random.normal(0, 1, (batch_size, 100)) real_labels = np.ones((batch_size, 1)) # Train the generator. generator_statistics.append( self._adversarial.train_on_batch(noise, real_labels)) discriminator_history_real.append( np.average(discriminator_statistics_real, axis=0)) discriminator_history_fake.append( np.average(discriminator_statistics_fake, axis=0)) generator_history.append(np.average(generator_statistics, axis=0)) # Print the statistics for the current epoch. print() print("Epoch %d/%d" % (epoch, epochs)) utility.print_line() print( "Discriminator: [Loss real: %f | Accuracy real: %.2f%% | Loss fake: %f | Accuracy fake: %.2f%%]" % (discriminator_history_real[-1][0], 100 * discriminator_history_real[-1][1], discriminator_history_fake[-1][0], 100 * discriminator_history_fake[-1][1])) print("Generator: [Loss: %f]" % generator_history[-1]) if epoch % saving_frequency == 0: # Save a sample of fake images, the generator, the discriminator and the training history up # to the current epoch. saving_directory_path = "{}/epoch-{}".format( output_path, str(epoch)) images = utility.generate_images(self._generator, 10) utility.save(images, saving_directory_path) self.save_models(saving_directory_path) self._save_training_plots(saving_directory_path, discriminator_history_real, discriminator_history_fake, generator_history)