示例#1
0
from VFA_Net import NeuralNetwork

nn_arq = [
    {
        "input_dim": 1,
        "output_dim": 64,
        "activation": "quadratic"
    },
    {
        "input_dim": 64,
        "output_dim": 1,
        "activation": "none"
    },
]

model = NeuralNetwork(nn_arq, bias=True, double=False)

A = np.array(1).reshape(1, 1)
B = np.array(1).reshape(1, 1)
Q = np.array(1).reshape(1, 1)
R = np.array(1).reshape(1, 1)

x0 = np.array(-1).reshape(1, 1)
u0 = np.array(0).reshape(1, 1)

# number of time steps to simulate
T = 30
# number of iterations of the dynamical systems for training
NUM_TRIALS = 250
ALPHA = 100
GAMMA = 0.9
示例#2
0
        "output_dim": 1,
        "activation": "none"
    },
]

ALPHA = 100
GAMMA = 1
NUM_TRIALS = 100000
BATCH_SIZE = 1


def loss(target, prediction, alpha=1):
    return float((target - alpha * prediction)**2)


model = NeuralNetwork(nn_arq, bias=True, double=True, seed=1)

env = Blackjack()

# %%


def process(state):
    """
    inputs a state, returns a parameterization of the state
    
    uncomment return statements for different types of feature mappings, 
    don't forget to change the input dim of the neural net
    """

    ## normalized vector
示例#3
0
ctc_arq = [
    {
        "input_dim": 3,
        "output_dim": 512,
        "activation": "relu"
    },
    {
        "input_dim": 512,
        "output_dim": 1,
        "activation": "none"
    },
]

ACTOR = PGNet(act_arq, bias=True, double=True)
CRITIC = NeuralNetwork(ctc_arq, bias=True, double=True)

ALPHA = 100
GAMMA = 1
NUM_TRIALS = 500000
BATCH_SIZE = 1


def loss(target, prediction, alpha=1):
    return float((target - alpha * prediction)**2)


env = Blackjack()

# %%
import numpy as np

nn_arq = [
    {"input_dim": 3, "output_dim": 512, "activation": "relu"},
    {"input_dim": 512, "output_dim": 1, "activation": "none"},
]

ALPHA = 100
GAMMA = 1
NUM_TRIALS = 100000
BATCH_SIZE = 1

def loss(target, prediction, alpha=1):
    return float((target-alpha*prediction)**2)

model = NeuralNetwork(nn_arq, bias=True, double=False, zero=True, seed=1)
   
env = Blackjack()

# %% 

def process(state):
    """
    inputs a state, returns a parameterization of the state
    
    uncomment return statements for different types of feature mappings, 
    don't forget to change the input dim of the neural net
    """
    
    ## normalized vector
    return np.array([state[0]/(max(env.state_space)[0]), state[1]/(max(env.state_space)[1]), state[2]]).reshape((3,1))
#TMAX = 20000 seems not needed? does the same thing as NUM_SAMPLES - I've replaced it in the implementation, you can always revert that

## slope and offset of function to be learnied
AA = 0.7
BB = 0.3


def f_star(x):
    return AA * x + BB


def loss(target, prediction, alpha=1):
    return float((1 / (alpha**2)) * np.square(target - alpha * prediction))


model = NeuralNetwork(nn_arq, bias=True, double=True, initVar=1, initVarLast=1)


def process(x):
    return np.array(x).reshape(1, 1)


def sample_train(nsample):
    xtrain = np.random.randn(nsample).reshape(nsample, 1)
    return xtrain, f_star(xtrain)  #xtrain, ytrain


def plot_loss(y):
    fig, ax = plt.subplots()
    label_fontsize = 18
示例#6
0
from VFA_Net import NeuralNetwork

nn_arq = [
    {
        "input_dim": 1,
        "output_dim": 64,
        "activation": "quadratic"
    },
    {
        "input_dim": 64,
        "output_dim": 1,
        "activation": "none"
    },
]

model = NeuralNetwork(nn_arq, bias=False, double="yes")

A = np.array(1).reshape(1, 1)
B = np.array(1).reshape(1, 1)
Q = np.array(1).reshape(1, 1)
R1 = np.array(1).reshape(1, 1)
R2 = np.array(1.2).reshape(1, 1)

x0 = np.array(-1).reshape(1, 1)
u0 = np.array(0).reshape(1, 1)

# number of time steps to simulate
T = 30
# number of iterations of the dynamical systems for training
NUM_TRIALS = 1000
ALPHA = 100