Exemplo n.º 1
0
 def bootstrap(self, model_paths, env, mixture):
     if np.random.rand(1) < self.retraining_prob:
         path = np.random.choice(model_paths, 1, p = mixture)[0]
         path = glob.glob(path + "/*.zip")[0]
         return helper.loadAgent(path, self.model_type)
     else:
         return helper.make_new_model(self.model_type,
                                      self.policy,
                                      env,
                                      n_steps=self.training_steps)
Exemplo n.º 2
0
 def compute_initial_payoffs(self):
     # If I appended multiple entries all together
     if len(self.initial_pelicans) > 0:
         self.pelicans = self.pelicans[0]
     if len(self.initial_panthers) > 0:
         self.panthers = self.panthers[0]
     # If it is the first iteration and we are starting with initial models we need to build the corresponding payoff
     # Left out the last one for each (added in the normal cycle flow)
     # As we may start with a different number of agents per set, we need to deal with this
     for j, (pelican, panther) in enumerate(itertools.zip_longest(self.pelicans[:-1], self.panthers[:-1])):
         if pelican is not None:
             path = glob.glob(pelican + "/*.zip")[0]
             self.pelican_model = helper.loadAgent(path, self.model_type)
         else:
             self.pelican_model = None
         if panther is not None:
             path = glob.glob(panther + "/*.zip")[0]
             self.panther_model = helper.loadAgent(path, self.model_type)
         else:
             self.panther_model = None
         self.compute_payoff_matrix(self.pelicans[:min(j + 1, len(self.pelicans))],
                                    self.panthers[:min(j + 1, len(self.panthers))])
Exemplo n.º 3
0
    def run_pnm(self):

        panther_agent_filepath, pelican_agent_filepath = self.initialAgents()

        # Initialize old NE stuff for stopping criterion
        value_to_pelican = 0.
        mixture_pelicans = np.array([1.])
        mixture_panthers = np.array([1.])

        # Create DataFrames for plotting purposes
        df_cols = ["NE_Payoff", "Pelican_BR_Payoff", "Panther_BR_Payoff", "Pelican_supp_size", "Panther_supp_size"]
        df = pd.DataFrame(columns = df_cols)
        # second df for period rigorous exploitability checks
        exploit_df_cols = ["iter",  "NE_Payoff", "Pelican_BR_Payoffs", "Panther_BR_Payoffs"]
        exploit_df = pd.DataFrame(columns = exploit_df_cols)

        # Train best responses until Nash equilibrium is found or max_iterations are reached
        logger.info('Parallel Nash Memory (PNM)')
        for self.pnm_iteration in range(self.max_pnm_iterations):
            start = time.time()

            logger.info("*********************************************************")
            logger.info('PNM iteration ' + str(self.pnm_iteration + 1) + ' of ' + str(self.max_pnm_iterations))
            logger.info("*********************************************************")

            self.pelicans.append(pelican_agent_filepath)
            self.panthers.append(panther_agent_filepath)

            if self.pnm_iteration == 0:
                self.compute_initial_payoffs()

            # Computing the payoff matrices and solving the corresponding LPs
            # Only compute for pelican in the sparse env, that of panther is the negative traspose (game is zero-sum)
            logger.info('Computing payoffs and mixtures')
            self.compute_payoff_matrix(self.pelicans, self.panthers)
            logger.info("=================================================")
            logger.info("New matrix game:")
            logger.info("As numpy array:")
            logger.info('\n' + str(self.payoffs))
            logger.info("As dataframe:")
            tmp_df = pd.DataFrame(self.payoffs).rename_axis('Pelican', axis = 0).rename_axis('Panther', axis = 1)
            logger.info('\n' + str(tmp_df))

            # save payoff matrix
            np.save('%s/payoffs_%d.npy' % (self.pnm_logs_exp_path, self.pnm_iteration), self.payoffs)

            def get_support_size(mixture):
                # return size of the support of mixed strategy mixture
                return sum([1 if m > 0 else 0 for m in mixture])

            # Check if we found a stable NE, in that case we are done (and fitting DF)
            if self.pnm_iteration > 0:
                # Both BR payoffs (from against last time's NE) in terms of pelican payoff
                br_value_pelican = np.dot(mixture_pelicans, self.payoffs[-1, :-1])
                br_value_panther = np.dot(mixture_panthers, self.payoffs[:-1, -1])

                ssize_pelican = get_support_size(mixture_pelicans)
                ssize_panther = get_support_size(mixture_panthers)

                logger.info("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
                logger.info("\n\
                             Pelican BR payoff: %.3f,\n\
                             Value of Game: %.3f,\n\
                             Panther BR payoff: %.3f,\n\
                             Pelican Supp Size: %d,\n\
                             Panther Supp Size: %d,\n" % (
                                                          br_value_pelican,
                                                          value_to_pelican,
                                                          br_value_panther,
                                                          ssize_pelican,
                                                          ssize_panther
                                                          ))
                logger.info("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
                values = dict(zip(df_cols, [value_to_pelican, br_value_pelican,
                                                              br_value_panther,
                                                              ssize_pelican,
                                                              ssize_panther]))
                df = df.append(values, ignore_index = True)

                # Write to csv file
                df_path =  os.path.join(self.exp_path, 'values_iter_%02d.csv' % self.pnm_iteration)
                df.to_csv(df_path, index = False)
                helper.get_fig(df)
                fig_path = os.path.join(self.exp_path, 'values_iter_%02d.pdf' % self.pnm_iteration)
                plt.savefig(fig_path)
                print("==========================================")
                print("WRITTEN VALUES DF TO CSV: %s" % df_path)
                print("==========================================")

                # here value_to_pelican is from the last time the subgame was solved
                if abs(br_value_pelican - value_to_pelican) < self.stopping_eps and\
                   abs(br_value_panther - value_to_pelican) < self.stopping_eps:

                    print('Stable Nash Equilibrium found')
                    break

            logger.info("SOLVING NEW GAME:")
            # solve game for pelican
            (mixture_pelicans, value_to_pelican) = lp_solve.solve_zero_sum_game(self.payoffs)
            # with np.printoptions(precision=3):
            logger.info(mixture_pelicans)
            mixture_pelicans /= np.sum(mixture_pelicans)
            # with np.printoptions(precision=3):
            logger.info("After normalisation:")
            logger.info(mixture_pelicans)
            np.save('%s/mixture_pelicans_%d.npy' % (self.pnm_logs_exp_path, self.pnm_iteration), mixture_pelicans)

            # solve game for panther
            (mixture_panthers, value_panthers) = lp_solve.solve_zero_sum_game(-self.payoffs.transpose())
            # with np.printoptions(precision=3):
            logger.info(mixture_panthers)
            mixture_panthers /= np.sum(mixture_panthers)
            # with np.printoptions(precision=3):
            logger.info("After normalisation:")
            logger.info(mixture_panthers)
            np.save('%s/mixture_panthers_%d.npy' % (self.pnm_logs_exp_path, self.pnm_iteration), mixture_panthers)

            # end of logging matrix game and solution
            logger.info("=================================================")

            # Train from skratch or retrain an existing model for pelican
            logger.info('Training pelican')
            
            self.pelican_model = self.bootstrap(self.pelicans, self.pelican_env, mixture_pelicans)
                
            pelican_agent_filepath = self.train_agent_against_mixture('pelican',
                                                                      self.pelicans_tmp_exp_path,
                                                                      self.pelican_model,
                                                                      self.pelican_env,
                                                                      self.panthers,
                                                                      mixture_panthers,
                                                                      self.training_steps)

            
            
            
            # Train from scratch or retrain an existing model for panther
            logger.info('Training panther')
            
            self.panther_model = self.bootstrap(self.panthers, self.panther_env, mixture_panthers)
            
            panther_agent_filepath = self.train_agent_against_mixture('panther',
                                                                     self.panthers_tmp_exp_path,
                                                                     self.panther_model,
                                                                     self.panther_env,
                                                                     self.pelicans,
                                                                     mixture_pelicans,
                                                                     self.training_steps)

            logger.info("PNM iteration lasted: %d seconds" % (time.time() - start))

            if self.pnm_iteration  > 0 and self.pnm_iteration  % self.testing_interval == 0:
                # Find best pelican (protagonist) against panther (opponent) mixture
                candidate_pelican_rbbr_fpaths, candidate_pelican_rbbr_win_percentages = self.iter_train_against_mixture(
                                                self.exploit_n_rbbrs, # Number of resource bounded best responses
                                                self.pelicans_tmp_exp_path,
                                                self.pelican_model, # driving_agent, # agent that we train
                                                self.pelican_env, # env, # Can either be a single env or subvecproc
                                                self.pelicans, # Filepaths to existing models
                                                mixture_pelicans, # mixture for bootstrapping
                                                self.panthers, # opponent_policy_fpaths, # policies of opponent of driving agent
                                                mixture_panthers) # opponent_mixture)

                logger.info("################################################")
                logger.info('candidate_pelican_rbbr_win_percentages: %s' %  np.round(candidate_pelican_rbbr_win_percentages,2))
                logger.info("################################################")
                br_values_pelican = np.round(candidate_pelican_rbbr_win_percentages,2).tolist()

                candidate_panther_rbbr_fpaths, candidate_panther_rbbr_win_percentages = self.iter_train_against_mixture(
                                                self.exploit_n_rbbrs, # Number of resource bounded best responses
                                                self.panthers_tmp_exp_path,
                                                self.panther_model, # driving_agent, # agent that we train
                                                self.panther_env, # env, # Can either be a single env or subvecproc
                                                self.panthers, # Filepaths to existing models
                                                mixture_panthers, # mixture for bootstrapping
                                                self.pelicans, # opponent_policy_fpaths, # policies of opponent of driving agent
                                                mixture_pelicans) # opponent_mixture)

                logger.info("################################################")
                logger.info('candidate_panther_rbbr_win_percentages: %s' % np.round(candidate_panther_rbbr_win_percentages,2))
                logger.info("################################################")
                br_values_panther = [1-p for p in np.round(candidate_panther_rbbr_win_percentages,2)]

                values = dict(zip(exploit_df_cols, [self.pnm_iteration,
                                                    value_to_pelican, 
                                                    br_values_pelican,
                                                    br_values_panther]))
                exploit_df = exploit_df.append(values, ignore_index = True)

                # add medians
                exploit_df['pelican_median'] = exploit_df['Pelican_BR_Payoffs'].apply(np.median)
                exploit_df['panther_median'] = exploit_df['Panther_BR_Payoffs'].apply(np.median)

                # Write to csv file
                df_path =  os.path.join(self.exp_path, 'exploit_iter_%02d.csv' % self.pnm_iteration)

                tmp_df = exploit_df.set_index('iter')
                tmp_df.to_csv(df_path, index = True)

                helper.get_fig_with_exploit(df, tmp_df)
                fig_path = os.path.join(self.exp_path, 'values_with_exploit_iter_%02d.pdf' % self.pnm_iteration)
                plt.savefig(fig_path)
                print("==========================================")
                print("WRITTEN EXPLOIT DF TO CSV: %s" % df_path)
                print("==========================================")

                if self.video_flag:
                    # occasionally ouput useful things along the way
                    # Make videos
                    verbose = False
                    video_path =  os.path.join(self.exp_path, 'pelican_pnm_iter_%02d.mp4' % self.pnm_iteration)
                    basewidth,hsize = helper.make_video_VEC_ENV(self.pelican_model, 
                                                                self.pelican_env, 
                                                                video_path,
                                                                fps=self.fps,
                                                                basewidth=self.basewidth,
                                                                n_steps=self.video_steps,
                                                                verbose=verbose)
                                                                
                    video_path =  os.path.join(self.exp_path, 'panther_pnm_iter_%02d.mp4' % self.pnm_iteration)
                    basewidth,hsize = helper.make_video_VEC_ENV(self.panther_model, 
                                                                self.panther_env, 
                                                                video_path,
                                                                fps=self.fps,
                                                                basewidth=self.basewidth,
                                                                n_steps=self.video_steps,
                                                                verbose=verbose)


        # Saving final mixture and corresponding agents
        logger.info("################################################")
        logger.info("Saving final pelican mixtures and agents:")
        support_pelicans = np.nonzero(mixture_pelicans)[0]
        mixture_pelicans = mixture_pelicans[support_pelicans]
        np.save(self.exp_path + '/final_mixture_pelicans.npy', mixture_pelicans)
        logger.info("Final pelican mixture saved to: %s" % self.exp_path + '/final_mixture_pelicans.npy')
        for i, idx in enumerate(mixture_pelicans):
            self.pelican_model = helper.loadAgent(glob.glob(self.pelicans[i]+ "/*.zip")[0], self.model_type)
            agent_filepath ,_, _= helper.save_model_with_env_settings(self.pelicans_tmp_exp_path,
                                                                      self.pelican_model,
                                                                      self.model_type,
                                                                      self.pelican_env,
                                                                      self.basicdate + "_ps_" + str(i))
            logger.info("Saving  pelican %d to %s" % (i, agent_filepath))
        support_panthers = np.nonzero(mixture_panthers)[0]
        mixture_panthers = mixture_panthers[support_panthers]
        np.save(self.exp_path + '/final_mixture_panthers.npy', mixture_panthers)
        logger.info("Final panther mixture saved to: %s" % self.exp_path + '/final_mixture_panthers.npy')
        for i, idx in enumerate(mixture_panthers):
            self.panther_model = helper.loadAgent(glob.glob(self.panthers[i]+ "/*.zip")[0], self.model_type)
            agent_filepath ,_, _= helper.save_model_with_env_settings(self.panthers_tmp_exp_path,
                                                                      self.panther_model,
                                                                      self.model_type,
                                                                      self.panther_env,
                                                                      self.basicdate + "_ps_" + str(i))

            logger.info("Saving  panther %d to %s" % (i, agent_filepath))