from Env.CustomEnv.StablizerOneD import StablizerOneD
import random
import matplotlib.pyplot as plt

env = StablizerOneD()
state = env.reset()
print(state)


stateSet = []
stateSet.append(state)
for i in range(1000):
    action = random.randint(0, env.nbActions - 1)
    state, reward, done, _ = env.step(action)
    #print('step ' + str(i))
    #print(state)
    #print(reward)
    #print(done)
    stateSet.append(state)
    if done:
        break


print(stateSet)
fig = plt.figure(2)
ax = plt.axes()
env.render_traj(stateSet, ax)
plt.show()
config['epsThreshold'] = 0.1
config['targetNetUpdateStep'] = 100
config['memoryCapacity'] = 200
config['trainBatchSize'] = 32
config['gamma'] = 0.9
config['learningRate'] = 0.001
config['netGradClip'] = 1
config['logFlag'] = True
config['logFileName'] = 'StabilizerOneDLog/traj'
config['logFrequency'] = 100
config['priorityMemoryOption'] = False
config['netUpdateOption'] = 'doubleQ'
config['netUpdateFrequency'] = 1
config['priorityMemory_absErrUpper'] = 5

env = StablizerOneD()
N_S = env.stateDim
N_A = env.nbActions

netParameter = dict()
netParameter['n_feature'] = N_S
netParameter['n_hidden'] = [100]
netParameter['n_output'] = N_A

policyNet = MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output'])

targetNet = deepcopy(policyNet)

optimizer = optim.Adam(policyNet.parameters(), lr=config['learningRate'])
    def _thunk():
        env = StablizerOneD(config, i)

        return env