示例#1
0
def simulate_training_episodes(agent, environment, episodes=10000, max_timesteps=1000, visual_evaluation_frequency=0,
                               evaluate_trained_agent=False):
  utility.print_line()
  print("Simulating {} training episode(s) for a maximum of {} timestep(s) each".format(episodes, max_timesteps))
  utility.print_line()

  for i in range(episodes):
    total_reward = 0
    state = environment.reset()
    for t in range(max_timesteps):
      action = agent.determine_action(state)
      next_state, reward, done = environment.step(action)
      agent.step(state, action, reward, next_state, done)

      total_reward += reward
      state = next_state

      if done:
        break

    print("Episode: {:>5}".format(i + 1), sep=" ", end="", flush=True)
    print(" | Total timesteps: {:>4}".format(t + 1), sep=" ", end="", flush=True)
    print(" | Total reward gained: {:>5}".format(total_reward), sep=" ", end="", flush=True)
    print(" | Episode ended: {}".format(done))

    if visual_evaluation_frequency and (i + 1) % visual_evaluation_frequency == 0:
      print()
      print("Visually evaluating agent after episode {}".format(i + 1))
      simulate_visual_test_episode(agent, environment, verbose=True)

  if evaluate_trained_agent:
    print()
    print("Evaluating trained agent")
    simulate_test_episodes(agent, environment)
def simulate_visual_test_episode(agent, environment):
    utility.print_line()
    print("Simulating a visual test episode")
    utility.print_line()

    total_reward = 0
    total_time_steps = 0
    observation = environment.reset()
    done = False
    while not done:
        environment.render()
        time.sleep(0.05)

        action = agent.select_action(observation)
        next_observation, reward, done, _ = environment.step(action)

        total_reward += reward
        total_time_steps += 1
        observation = next_observation

    print("Total time steps: {:>4}".format(total_time_steps),
          sep=" ",
          end="",
          flush=True)
    print(" | Total reward gained: {:>5}".format(total_reward))
    print()
示例#3
0
def simulate_training_experiments(agent_type, environment, experiments=10000, timesteps=10000, verbose=False,
                                  output_path="./output"):
  utility.print_line()
  print("Simulating {} training experiment(s) of {} timestep(s) each".format(experiments, timesteps))
  utility.print_line()

  state_space_size = environment.compute_state_space_size()
  action_space_size = environment.compute_action_space_size()
  mean_rewards = np.zeros(timesteps)
  mean_starting_state_max_q_values = np.zeros(timesteps)
  mean_normalized_entropies = np.zeros(timesteps)
  starting_state = environment.reset()
  for i in range(experiments):
    if verbose:
      print("Simulating experiment: {:>5}/{}".format(i + 1, experiments))

    state_visits = np.zeros(state_space_size)
    agent = utility.create_agent(agent_type, state_space_size, action_space_size)
    state = environment.reset()
    for t in range(timesteps):
      action = agent.determine_action(state)
      next_state, reward, done = environment.step(action)
      agent.step(state, action, reward, next_state, done)

      # For the current timestep, compute the incremental means of the rewards, the
      # starting state max q values, and the normalized entropies over the current experiment.
      mean_rewards[t] += (reward - mean_rewards[t]) / (i + 1)
      mean_starting_state_max_q_values[t] += (agent.compute_max_q_value(starting_state)
                                              - mean_starting_state_max_q_values[t]) / (i + 1)
      state_visits[next_state] += 1
      probabilities = state_visits / np.sum(state_visits)
      normalized_entropy = -np.nansum(probabilities * np.log2(probabilities)) / np.log2(state_space_size)
      mean_normalized_entropies[t] += (normalized_entropy - mean_normalized_entropies[t]) / (i + 1)

      state = next_state

      if done:
        state = environment.reset()

  # Create dataframes and store the data in the given output path.
  environment_type = utility.determine_environment_type(environment)

  data = pd.DataFrame()
  data["mean_reward"] = mean_rewards
  data["mean_starting_state_max_q_value"] = mean_starting_state_max_q_values
  data["mean_normalized_entropy"] = mean_normalized_entropies

  meta_data = pd.DataFrame()
  meta_data["timesteps"] = [timesteps]
  meta_data["experiments"] = [experiments]
  meta_data["agent_type"] = [agent_type]
  meta_data["environment_type"] = [environment_type]
  meta_data["grid_dimension_size"] = [environment.grid.shape[0]]

  utility.save_dataframe(data, output_path, utility.join_strings("-", "training-experiments", agent_type, environment_type))
  utility.save_dataframe(meta_data, output_path, utility.join_strings("-", "training-experiments", agent_type,
                                                                      environment_type, "meta"))
示例#4
0
    def handle(self):
        print "New Connection from: " + self.client_address[0], ', on port: ' , self.client_address[1]
        # M1
        i = 0
        while(i < 4):
            utility.print_user_log(self.client_address, "KEY ESTABLISHMENT PROTOCOL")
            self.key_est_result = self.key_establishment_protocol()
            if self.key_est_result is None:
                i = i + 1
                utility.print_user_log(self.client_address, "Errors during protocol number: %d" %i)
                fail = utility.pack_message('FAIL')
                ret = utility.send_data(self.request, fail)
                if ret is False:
                    utility.print_user_log(self.client_address,"[ERROR] Error during sending data ")
                    return None
                #self.request.close()
            elif self.key_est_result == 201:
                #utility.print_user_log(self.client_address, 'DISC')
                utility.print_user_log(self.client_address, "Client Disconnected")
                return None
            elif self.key_est_result == 203:
                i = i + 1
                utility.print_user_log(self.client_address, "Errors during protocol number: %d" %i)
                fail = utility.pack_message('COPY')
                ret = utility.send_data(self.request, fail)

            else:
                utility.print_user_log(self.client_address,"Key establishment protocol has done correctly \n")
                i = 0
                break
        if (i >= 2):
            utility.print_user_log(self.client_address, "To many errors! Disconnecting...")
            return None
             # after return, finish() will be called
        
        while(True):
            utility.print_line()
            utility.print_user_log(self.client_address, "Listening for requests...")
            data = utility.recv_data(self.request, 0) 
            #print data
            if not data or data is None:
                utility.print_user_log(self.client_address, 'Client Disconnected' )      
                break
                # after return, finish() will be called
            data_to_send = self.manage_request(data)
            if data_to_send[0] is False:
                utility.print_user_log(self.client_address,"[ERROR] Unable to manage the request ")
                ret = utility.send_data(self.request, "FAIL") 
                if ret is False:
                    utility.print_user_log(self.client_address,"[ERROR] Error during sending data ")
            else:
                #print data_to_send[1]
                ret = utility.send_data(self.request, data_to_send[1]) 
                if ret is False:
                    utility.print_user_log(self.client_address,"[ERROR] Error during sending data ")
示例#5
0
 def finish(self):
     #utility.print_user_log(self.client_address,"Finish function"  )
     utility.print_user_log(self.client_address,"Cleaning the connection..."  )
     self.request.close()
     if hasattr(self, 'session_key'):
         utility.print_user_log(self.client_address, "Deleting secret information...")
         del self.session_key
         #del self.client_key
         self.database.disconnect()
         del self.database     
     utility.print_user_log(self.client_address,"Cleaning completed" )
     utility.print_line()
示例#6
0
def simulate_training_timesteps(agent, environment, timesteps=10000, verbose=False, output_path="./output"):
  utility.print_line()
  print("Simulating {} training timestep(s)".format(timesteps))
  utility.print_line()

  rewards = []
  max_q_values = []
  starting_state_max_q_values = []
  starting_state = environment.reset()
  state = environment.reset()
  for t in range(timesteps):
    if verbose:
      print("Simulating timestep: {:>5}/{}".format(t, timesteps))

    action = agent.determine_action(state)
    next_state, reward, done = environment.step(action)
    agent.step(state, action, reward, next_state, done)

    rewards.append(reward)
    max_q_values.append(agent.compute_max_q_value(state))
    starting_state_max_q_values.append(agent.compute_max_q_value(starting_state))

    state = next_state

    if done:
      state = environment.reset()

  # Create dataframes and store the data in the given output path.
  environment_type = utility.determine_environment_type(environment)
  agent_type = utility.determine_agent_type(agent)
  grid_dimension_size = environment.grid.shape[0]

  data = pd.DataFrame()
  data["reward"] = rewards
  data["max_q_value"] = max_q_values
  data["starting_state_max_q_value"] = starting_state_max_q_values

  meta_data = pd.DataFrame()
  meta_data["timesteps"] = [timesteps]
  meta_data["agent_type"] = [agent_type]
  meta_data["environment_type"] = [environment_type]
  meta_data["grid_dimension_size"] = [grid_dimension_size]

  utility.save_dataframe(data, output_path, utility.join_strings("-", "training-timesteps", agent_type, environment_type))
  utility.save_dataframe(meta_data, output_path, utility.join_strings("-", "training-timesteps", agent_type,
                                                                      environment_type, "meta"))
def simulate_training_experiments(algorithm_name, environment, experiments,
                                  episodes):
    utility.print_line()
    print("Simulating {} training experiment(s) of {} episode(s) each".format(
        experiments, episodes))
    utility.print_line()

    observation_space_size, action_space_size = utility.compute_environment_space_sizes(
        environment)

    experiment_total_rewards = np.zeros((experiments, episodes))
    for i in range(experiments):
        print("Simulating experiment: {:>5}/{}".format(i + 1, experiments))
        utility.control_randomness(i, environment)
        agent = utility.create_agent(algorithm_name, observation_space_size,
                                     action_space_size)
        experiment_total_rewards[i] = simulate_training_episodes(
            agent, environment, episodes)

    return experiment_total_rewards
def simulate_training_episodes(agent,
                               environment,
                               episodes,
                               visual_evaluation_frequency=0,
                               verbose=False):
    utility.print_line()
    print("Simulating {} training episode(s)".format(episodes))
    utility.print_line()

    episode_total_rewards = np.zeros(episodes)
    for i in range(episodes):
        total_reward = 0
        total_time_steps = 0
        observation = environment.reset()
        done = False
        while not done:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = environment.step(action)
            agent.step(observation, action, reward, next_observation, done)

            total_reward += reward
            total_time_steps += 1
            observation = next_observation

        episode_total_rewards[i] = total_reward

        if verbose:
            print("Episode: {:>5}".format(i + 1), sep=" ", end="", flush=True)
            print(" | Total time steps: {:>4}".format(total_time_steps),
                  sep=" ",
                  end="",
                  flush=True)
            print(" | Total reward gained: {:>5}".format(total_reward))

        if visual_evaluation_frequency and (
                i + 1) % visual_evaluation_frequency == 0:
            print()
            print("Visually evaluating agent after episode {}".format(i + 1))
            simulate_visual_test_episode(agent, environment)

    return episode_total_rewards
示例#9
0
def simulate_test_episodes(agent, environment, episodes=10000):
  utility.print_line()
  print("Simulating {} test episode(s)".format(episodes))
  utility.print_line()

  gamma = agent.gamma
  total_rewards = []
  discounted_returns = []
  for i in range(episodes):
    total_reward = 0
    discounted_return_coefficient = 0
    state = environment.reset()
    done = False
    t = 0
    while not done:
      action = agent.determine_action(state)
      next_state, reward, done = environment.step(action)

      total_reward += reward
      discounted_return_coefficient += gamma ** t
      discounted_returns.append(reward * discounted_return_coefficient)
      state = next_state

      t += 1

    total_rewards.append(total_reward)

  # Print the data in the console.
  environment_type = utility.determine_environment_type(environment)
  agent_type = utility.determine_agent_type(agent)
  grid_dimension_size = environment.grid.shape[0]

  print("Total timesteps: {}".format(len(discounted_returns)), sep=" ", end="", flush=True)
  print(" | Agent type: {}".format(agent_type), sep=" ", end="", flush=True)
  print(" | Environment type: {}".format(environment_type), sep=" ", end="", flush=True)
  print(" | Grid dimension size: {}".format(grid_dimension_size), sep=" ", end="", flush=True)
  print(" | Discount factor: {}".format(gamma))

  print("Mean total reward over all episodes: {}".format(np.mean(total_rewards)))
  print("Mean discounted return over all timesteps: {}".format(np.mean(discounted_returns)))
示例#10
0
def simulate_visual_test_episode(agent, environment, max_timesteps=40, verbose=False):
  utility.print_line()
  print("Simulating a visual test episode for a maximum of {} timestep(s)".format(max_timesteps))
  utility.print_line()

  plt.ion()

  total_reward = 0
  state = environment.reset()
  for t in range(max_timesteps):

    if verbose:
      print("Timestep: {:>4}".format(t), sep=" ", end="", flush=True)
      print(" | Agent's cell: {}".format(environment.agent_cell), sep=" ", end="", flush=True)

    utility.visualize_grid(agent, environment)
    plt.pause(2.5)

    action = agent.determine_action(state)
    next_state, reward, done = environment.step(action)

    if verbose:
      print(" | Action taken: {:>7}".format(environment.agent_action), sep=" ", end="", flush=True)
      print(" | Reward given: {:>3}".format(reward))

    total_reward += reward
    state = next_state

    if done:
      break

  print("Total timesteps: {}".format(t + 1), sep=" ", end="", flush=True)
  print(" | Total reward gained: {}".format(total_reward), sep=" ", end="", flush=True)
  print(" | Episode ended: {}".format(done))
  print()

  plt.ioff()
  plt.close()
 def summary(self):
     print()
     print("ADVERSARIAL")
     utility.print_line()
     self._adversarial.summary()
     print()
     print("DISCRIMINATOR")
     utility.print_line()
     self._discriminator.summary()
     print()
     print("GENERATOR")
     utility.print_line()
     self._generator.summary()
    def train(self, images, epochs, batch_size, saving_frequency, output_path):
        batches = int(images.shape[0] / batch_size)
        training_generator = self._data_generator.flow(images,
                                                       batch_size=int(
                                                           batch_size / 2))

        discriminator_history_real = []
        discriminator_history_fake = []
        generator_history = []
        for epoch in range(1, epochs + 1):
            discriminator_statistics_real = []
            discriminator_statistics_fake = []
            generator_statistics = []
            for _ in range(batches):
                # Select a mini batch of real images randomly, with size half of batch size. Account for the
                # case where the size of images is not divisible by batch size.
                real_images = training_generator.next()
                if real_images.shape[0] != int(batch_size / 2):
                    real_images = training_generator.next()
                real_labels = np.ones((int(batch_size / 2), 1))

                # Generate fake images from noise, with size half of batch size.
                noise = np.random.normal(0, 1, (int(batch_size / 2), 100))
                fake_images = self._generator.predict(noise)
                fake_labels = np.zeros((int(batch_size / 2), 1))

                # Train the discriminator.
                discriminator_statistics_real.append(
                    self._discriminator.train_on_batch(real_images,
                                                       real_labels))
                discriminator_statistics_fake.append(
                    self._discriminator.train_on_batch(fake_images,
                                                       fake_labels))

                # Sample data points from the noise distribution, with size of batch size and create
                # real labels for them.
                noise = np.random.normal(0, 1, (batch_size, 100))
                real_labels = np.ones((batch_size, 1))

                # Train the generator.
                generator_statistics.append(
                    self._adversarial.train_on_batch(noise, real_labels))

            discriminator_history_real.append(
                np.average(discriminator_statistics_real, axis=0))
            discriminator_history_fake.append(
                np.average(discriminator_statistics_fake, axis=0))
            generator_history.append(np.average(generator_statistics, axis=0))

            # Print the statistics for the current epoch.
            print()
            print("Epoch %d/%d" % (epoch, epochs))
            utility.print_line()
            print(
                "Discriminator: [Loss real: %f | Accuracy real: %.2f%% | Loss fake: %f | Accuracy fake: %.2f%%]"
                % (discriminator_history_real[-1][0],
                   100 * discriminator_history_real[-1][1],
                   discriminator_history_fake[-1][0],
                   100 * discriminator_history_fake[-1][1]))
            print("Generator: [Loss: %f]" % generator_history[-1])

            if epoch % saving_frequency == 0:
                # Save a sample of fake images, the generator, the discriminator and the training history up
                # to the current epoch.
                saving_directory_path = "{}/epoch-{}".format(
                    output_path, str(epoch))
                images = utility.generate_images(self._generator, 10)
                utility.save(images, saving_directory_path)
                self.save_models(saving_directory_path)
                self._save_training_plots(saving_directory_path,
                                          discriminator_history_real,
                                          discriminator_history_fake,
                                          generator_history)