from Env.CustomEnv.StablizerOneD import StablizerOneD import random import matplotlib.pyplot as plt env = StablizerOneD() state = env.reset() print(state) stateSet = [] stateSet.append(state) for i in range(1000): action = random.randint(0, env.nbActions - 1) state, reward, done, _ = env.step(action) #print('step ' + str(i)) #print(state) #print(reward) #print(done) stateSet.append(state) if done: break print(stateSet) fig = plt.figure(2) ax = plt.axes() env.render_traj(stateSet, ax) plt.show()
config['epsThreshold'] = 0.1 config['targetNetUpdateStep'] = 100 config['memoryCapacity'] = 200 config['trainBatchSize'] = 32 config['gamma'] = 0.9 config['learningRate'] = 0.001 config['netGradClip'] = 1 config['logFlag'] = True config['logFileName'] = 'StabilizerOneDLog/traj' config['logFrequency'] = 100 config['priorityMemoryOption'] = False config['netUpdateOption'] = 'doubleQ' config['netUpdateFrequency'] = 1 config['priorityMemory_absErrUpper'] = 5 env = StablizerOneD() N_S = env.stateDim N_A = env.nbActions netParameter = dict() netParameter['n_feature'] = N_S netParameter['n_hidden'] = [100] netParameter['n_output'] = N_A policyNet = MultiLayerNetRegression(netParameter['n_feature'], netParameter['n_hidden'], netParameter['n_output']) targetNet = deepcopy(policyNet) optimizer = optim.Adam(policyNet.parameters(), lr=config['learningRate'])
def _thunk(): env = StablizerOneD(config, i) return env