def __init__(
            self,
            # alpha=0.01, gamma=0.9, epsilon=0.9,
            verbose=False):

        self.states = [0, 1, 2, 3, 4, 5, 6, 7]
        self.actions = [-4, -1, 0, -1, 4]
        self.S = SoccerEnv().S
        self.A = SoccerEnv().A
        self.q_table_A = pd.DataFrame(
            1,
            # np.random.rand(len(self.S), len(self.A)),
            index=pd.MultiIndex.from_tuples(self.S),
            columns=pd.MultiIndex.from_tuples(self.A),
            dtype=np.float64)
        self.q_table_B = pd.DataFrame(
            1,
            # np.random.rand(len(self.S), len(self.A)),
            index=pd.MultiIndex.from_tuples(self.S),
            columns=pd.MultiIndex.from_tuples(self.A),
            dtype=np.float64)
        self.verbose = verbose
        self.vi_A = pd.DataFrame(1,
                                 index=pd.MultiIndex.from_tuples(self.S),
                                 columns=['value'],
                                 dtype=np.float64)
        self.vi_B = pd.DataFrame(1,
                                 index=pd.MultiIndex.from_tuples(self.S),
                                 columns=['value'],
                                 dtype=np.float64)
    def __init__(
            self,
            # alpha=0.01, gamma=0.9, epsilon=0.9,
            verbose=False):

        self.states = [0, 1, 2, 3, 4, 5, 6, 7]
        self.actions = [-4, -1, 0, -1, 4]
        self.S = SoccerEnv().S
        self.A = SoccerEnv().A
        # self.q_table_A = pd.DataFrame(np.random.randint(-10,10, size=(len(self.S),len(self.A))),
        #                               index=pd.MultiIndex.from_tuples(self.S),
        #                               columns=pd.MultiIndex.from_tuples(self.A), dtype=np.float64) /1000
        # self.q_table_B = pd.DataFrame(np.random.randint(-10,10, size=(len(self.S),len(self.A))),
        #                               index=pd.MultiIndex.from_tuples(self.S),
        #                               columns=pd.MultiIndex.from_tuples(self.A), dtype=np.float64) /1000
        self.q_table_A = pd.DataFrame(0,
                                      index=pd.MultiIndex.from_tuples(self.S),
                                      columns=pd.MultiIndex.from_tuples(
                                          self.A),
                                      dtype=np.float64)
        self.q_table_B = pd.DataFrame(0,
                                      index=pd.MultiIndex.from_tuples(self.S),
                                      columns=pd.MultiIndex.from_tuples(
                                          self.A),
                                      dtype=np.float64)
        self.verbose = verbose
    def __init__(self,
                 # alpha=0.01, gamma=0.9, epsilon=0.9,
                 verbose=False):

        self.states = [0, 1, 2, 3, 4, 5, 6, 7]
        self.actions = [-4,-1,0,1,4]
        self.S = SoccerEnv().S
        self.A = SoccerEnv().A
        self.q_table_A = pd.DataFrame(0, index=self.states, columns= self.actions, dtype=np.float64)
        self.q_table_B = pd.DataFrame(0, index=self.states, columns=self.actions, dtype=np.float64)
        self.verbose = verbose
    def __init__(
            self,
            # alpha=0.01, gamma=0.9, epsilon=0.9,
            verbose=False):

        self.states = [0, 1, 2, 3, 4, 5, 6, 7]
        self.actions = [-4, 4, 1, -1, 0]
        self.S = SoccerEnv().S
        self.A = SoccerEnv().A
        self.q_table_A = pd.DataFrame(0,
                                      index=pd.MultiIndex.from_tuples(self.S),
                                      columns=pd.MultiIndex.from_tuples(
                                          self.A),
                                      dtype=np.float64)
        self.q_table_B = pd.DataFrame(0,
                                      index=pd.MultiIndex.from_tuples(self.S),
                                      columns=pd.MultiIndex.from_tuples(
                                          self.A),
                                      dtype=np.float64)
        self.verbose = verbose
    #     "ce_Q/time_ce_a_%0.5f_adecay_%0.5f_timeout%f.png" % (start_alpha, alpha_decay_rate, timeout))
    # plt.clf()
    #
    # q_output_A.to_csv('ce_Q/ce_A_a_%0.5f_adecay_%0.5f_timeout%f.csv' % (start_alpha, alpha_decay_rate, timeout))
    # q_output_B.to_csv('ce_Q/ce_B_a_%0.5f_adecay_%0.5f_timeout%f.csv' % (start_alpha, alpha_decay_rate, timeout))

    # end of game
    print('game over')
    # env.destroy()


if __name__ == "__main__":
    seed = 1  # seed
    np.random.seed(seed)
    '''soccer env'''
    env = SoccerEnv()
    env.seed(seed)

    for alpha in [
            # 1,
            # 0.75,
            0.5,
            # 0.25
    ]:
        for alpha_decay_rate in [
                # 1,
                # 0.99999,
                # # 0.9999,
                0.9995,
                #    0.999,
        ]: