def __init__( self, # alpha=0.01, gamma=0.9, epsilon=0.9, verbose=False): self.states = [0, 1, 2, 3, 4, 5, 6, 7] self.actions = [-4, -1, 0, -1, 4] self.S = SoccerEnv().S self.A = SoccerEnv().A self.q_table_A = pd.DataFrame( 1, # np.random.rand(len(self.S), len(self.A)), index=pd.MultiIndex.from_tuples(self.S), columns=pd.MultiIndex.from_tuples(self.A), dtype=np.float64) self.q_table_B = pd.DataFrame( 1, # np.random.rand(len(self.S), len(self.A)), index=pd.MultiIndex.from_tuples(self.S), columns=pd.MultiIndex.from_tuples(self.A), dtype=np.float64) self.verbose = verbose self.vi_A = pd.DataFrame(1, index=pd.MultiIndex.from_tuples(self.S), columns=['value'], dtype=np.float64) self.vi_B = pd.DataFrame(1, index=pd.MultiIndex.from_tuples(self.S), columns=['value'], dtype=np.float64)
def __init__( self, # alpha=0.01, gamma=0.9, epsilon=0.9, verbose=False): self.states = [0, 1, 2, 3, 4, 5, 6, 7] self.actions = [-4, -1, 0, -1, 4] self.S = SoccerEnv().S self.A = SoccerEnv().A # self.q_table_A = pd.DataFrame(np.random.randint(-10,10, size=(len(self.S),len(self.A))), # index=pd.MultiIndex.from_tuples(self.S), # columns=pd.MultiIndex.from_tuples(self.A), dtype=np.float64) /1000 # self.q_table_B = pd.DataFrame(np.random.randint(-10,10, size=(len(self.S),len(self.A))), # index=pd.MultiIndex.from_tuples(self.S), # columns=pd.MultiIndex.from_tuples(self.A), dtype=np.float64) /1000 self.q_table_A = pd.DataFrame(0, index=pd.MultiIndex.from_tuples(self.S), columns=pd.MultiIndex.from_tuples( self.A), dtype=np.float64) self.q_table_B = pd.DataFrame(0, index=pd.MultiIndex.from_tuples(self.S), columns=pd.MultiIndex.from_tuples( self.A), dtype=np.float64) self.verbose = verbose
def __init__(self, # alpha=0.01, gamma=0.9, epsilon=0.9, verbose=False): self.states = [0, 1, 2, 3, 4, 5, 6, 7] self.actions = [-4,-1,0,1,4] self.S = SoccerEnv().S self.A = SoccerEnv().A self.q_table_A = pd.DataFrame(0, index=self.states, columns= self.actions, dtype=np.float64) self.q_table_B = pd.DataFrame(0, index=self.states, columns=self.actions, dtype=np.float64) self.verbose = verbose
def __init__( self, # alpha=0.01, gamma=0.9, epsilon=0.9, verbose=False): self.states = [0, 1, 2, 3, 4, 5, 6, 7] self.actions = [-4, 4, 1, -1, 0] self.S = SoccerEnv().S self.A = SoccerEnv().A self.q_table_A = pd.DataFrame(0, index=pd.MultiIndex.from_tuples(self.S), columns=pd.MultiIndex.from_tuples( self.A), dtype=np.float64) self.q_table_B = pd.DataFrame(0, index=pd.MultiIndex.from_tuples(self.S), columns=pd.MultiIndex.from_tuples( self.A), dtype=np.float64) self.verbose = verbose
# "ce_Q/time_ce_a_%0.5f_adecay_%0.5f_timeout%f.png" % (start_alpha, alpha_decay_rate, timeout)) # plt.clf() # # q_output_A.to_csv('ce_Q/ce_A_a_%0.5f_adecay_%0.5f_timeout%f.csv' % (start_alpha, alpha_decay_rate, timeout)) # q_output_B.to_csv('ce_Q/ce_B_a_%0.5f_adecay_%0.5f_timeout%f.csv' % (start_alpha, alpha_decay_rate, timeout)) # end of game print('game over') # env.destroy() if __name__ == "__main__": seed = 1 # seed np.random.seed(seed) '''soccer env''' env = SoccerEnv() env.seed(seed) for alpha in [ # 1, # 0.75, 0.5, # 0.25 ]: for alpha_decay_rate in [ # 1, # 0.99999, # # 0.9999, 0.9995, # 0.999, ]: