from VFA_Net import NeuralNetwork nn_arq = [ { "input_dim": 1, "output_dim": 64, "activation": "quadratic" }, { "input_dim": 64, "output_dim": 1, "activation": "none" }, ] model = NeuralNetwork(nn_arq, bias=True, double=False) A = np.array(1).reshape(1, 1) B = np.array(1).reshape(1, 1) Q = np.array(1).reshape(1, 1) R = np.array(1).reshape(1, 1) x0 = np.array(-1).reshape(1, 1) u0 = np.array(0).reshape(1, 1) # number of time steps to simulate T = 30 # number of iterations of the dynamical systems for training NUM_TRIALS = 250 ALPHA = 100 GAMMA = 0.9
"output_dim": 1, "activation": "none" }, ] ALPHA = 100 GAMMA = 1 NUM_TRIALS = 100000 BATCH_SIZE = 1 def loss(target, prediction, alpha=1): return float((target - alpha * prediction)**2) model = NeuralNetwork(nn_arq, bias=True, double=True, seed=1) env = Blackjack() # %% def process(state): """ inputs a state, returns a parameterization of the state uncomment return statements for different types of feature mappings, don't forget to change the input dim of the neural net """ ## normalized vector
ctc_arq = [ { "input_dim": 3, "output_dim": 512, "activation": "relu" }, { "input_dim": 512, "output_dim": 1, "activation": "none" }, ] ACTOR = PGNet(act_arq, bias=True, double=True) CRITIC = NeuralNetwork(ctc_arq, bias=True, double=True) ALPHA = 100 GAMMA = 1 NUM_TRIALS = 500000 BATCH_SIZE = 1 def loss(target, prediction, alpha=1): return float((target - alpha * prediction)**2) env = Blackjack() # %%
import numpy as np nn_arq = [ {"input_dim": 3, "output_dim": 512, "activation": "relu"}, {"input_dim": 512, "output_dim": 1, "activation": "none"}, ] ALPHA = 100 GAMMA = 1 NUM_TRIALS = 100000 BATCH_SIZE = 1 def loss(target, prediction, alpha=1): return float((target-alpha*prediction)**2) model = NeuralNetwork(nn_arq, bias=True, double=False, zero=True, seed=1) env = Blackjack() # %% def process(state): """ inputs a state, returns a parameterization of the state uncomment return statements for different types of feature mappings, don't forget to change the input dim of the neural net """ ## normalized vector return np.array([state[0]/(max(env.state_space)[0]), state[1]/(max(env.state_space)[1]), state[2]]).reshape((3,1))
#TMAX = 20000 seems not needed? does the same thing as NUM_SAMPLES - I've replaced it in the implementation, you can always revert that ## slope and offset of function to be learnied AA = 0.7 BB = 0.3 def f_star(x): return AA * x + BB def loss(target, prediction, alpha=1): return float((1 / (alpha**2)) * np.square(target - alpha * prediction)) model = NeuralNetwork(nn_arq, bias=True, double=True, initVar=1, initVarLast=1) def process(x): return np.array(x).reshape(1, 1) def sample_train(nsample): xtrain = np.random.randn(nsample).reshape(nsample, 1) return xtrain, f_star(xtrain) #xtrain, ytrain def plot_loss(y): fig, ax = plt.subplots() label_fontsize = 18
from VFA_Net import NeuralNetwork nn_arq = [ { "input_dim": 1, "output_dim": 64, "activation": "quadratic" }, { "input_dim": 64, "output_dim": 1, "activation": "none" }, ] model = NeuralNetwork(nn_arq, bias=False, double="yes") A = np.array(1).reshape(1, 1) B = np.array(1).reshape(1, 1) Q = np.array(1).reshape(1, 1) R1 = np.array(1).reshape(1, 1) R2 = np.array(1.2).reshape(1, 1) x0 = np.array(-1).reshape(1, 1) u0 = np.array(0).reshape(1, 1) # number of time steps to simulate T = 30 # number of iterations of the dynamical systems for training NUM_TRIALS = 1000 ALPHA = 100