Exemplo n.º 1
0
def DQN(state_dim,
        action_dim,
        n_actions,
        gamma,
        layers=(32, ),
        initial_params=None,
        target_update_freq=500):

    Q = MLPQFunction(state_dim,
                     n_actions,
                     layers=layers,
                     initial_params=initial_params)
    Q_target = MLPQFunction(state_dim,
                            n_actions,
                            layers=tuple(layers),
                            initial_params=initial_params)

    if initial_params is None:
        Q_target._w = Q._w
    operator = DQNOperator(state_dim, action_dim, gamma, Q_target,
                           target_update_freq)

    return Q, operator
Exemplo n.º 2
0
elif env == "three-room-gw":
    mdps = [
        ThreeRoomGridworld(np.array([gw_size, gw_size]), door_x=(d1, d2))
        for (d1, d2) in zip(doors, doors2)
    ]
eval_states = [np.array([0., 0.]) for _ in range(10)]

state_dim = mdps[0].state_dim
action_dim = 1
n_actions = mdps[0].action_space.n
K = n_basis**2

# Create BellmanOperator
operator = MellowBellmanOperator(kappa, tau, xi, mdps[0].gamma, K, action_dim)
# Create Q Function
Q = MLPQFunction(K, n_actions, layers=None)
# Create RBFs
rbf = build_features_gw_state(gw_size, n_basis, state_dim)


def run(mdp, seed=None):
    return learn(mdp,
                 Q,
                 operator,
                 max_iter=max_iter,
                 buffer_size=buffer_size,
                 batch_size=batch_size,
                 alpha_adam=alpha_adam,
                 alpha_sgd=alpha_sgd,
                 lambda_=lambda_,
                 n_weights=n_weights,
Exemplo n.º 3
0
]
mdps = [MountainCarEnv(vel[i]) for i in range(n_runs)]
n_eval_episodes = 5

state_dim = mdps[0].state_dim
action_dim = 1
n_actions = mdps[0].action_space.n

# Create BellmanOperator
operator = MellowBellmanOperator(kappa, tau, xi, mdps[0].gamma, state_dim,
                                 action_dim)
# Create Q Function
layers = [l1]
if l2 > 0:
    layers.append(l2)
Q = MLPQFunction(state_dim, n_actions, layers=layers)


def run(mdp, seed=None):
    return learn(mdp,
                 Q,
                 operator,
                 max_iter=max_iter,
                 buffer_size=buffer_size,
                 batch_size=batch_size,
                 alpha_adam=alpha_adam,
                 alpha_sgd=alpha_sgd,
                 lambda_=lambda_,
                 n_weights=n_weights,
                 train_freq=train_freq,
                 eval_freq=eval_freq,
Exemplo n.º 4
0
    gamma = 0.99
    state_dim = 2
    action_dim = 1
    n_actions = 10

    # torch.manual_seed(300)
    # np.random.seed(300)
    # Create BellmanOperator
    operator = MellowBellmanOperator(kappa, tau, xi, gamma, state_dim,
                                     action_dim)
    operator2 = mellow(kappa, tau, xi, gamma, state_dim, action_dim)
    # Create Q Function
    layers = [l1]
    if l2 > 0:
        layers.append(l2)
    Q = MLPQFunction(state_dim, n_actions, layers=layers)
    Q2 = mlp(state_dim, n_actions, layers)

    Q._w = np.random.randn(Q._w.size)

    w = Q._w
    w = torch.randn(w.size).numpy()
    Q._w = w
    Q2._w = w
    weights = torch.randn(5, w.shape[0], requires_grad=True)

    samples = np.random.randn(10,
                              1 + state_dim + action_dim + 1 + state_dim + 1)
    samples[:, -1] = 0.
    samples[:, action_dim + state_dim] = np.random.random_integers(
        0, n_actions - 1, size=samples.shape[0])