Exemplos de include_bias em Python, exemplos de simplepg.simple_utils.include_bias em Python

Exemplo n.º 1

0

Exibir arquivo

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)

    act = theta.dot(include_bias(ob))

    grad = np.outer((action - act), include_bias(ob))

    return grad

Exemplo n.º 2

0

Exibir arquivo

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE ***"
    # See Lec4a page 49 for full equation with indices
    # grad = (alpha - theta^T s) s^T
    #
    # Keep in mind that:
    # - a^T b denotes a dot product  (first one is transposed)
    # - ab^T means and outer product (the second one is transposed)
    # - we have to add bias to the state/s/observations/ob and it *changes the shape*
    ob_1 = include_bias(ob)
    grad = np.outer(action - np.dot(theta, ob_1), ob_1)

    # # A Messy way of doing by using * as an implied outer product
    # Outer=>sum = dot product
    # mean=(theta.T * ob_1[:,None]).sum(0)
    # zs = action-mean
    # # expand dimensions
    # grad = zs[:,np.newaxis]*ob_1[:,np.newaxis].T
    return grad

Exemplo n.º 3

0

Exibir arquivo

Arquivo: main.py Projeto: stjordanis/Deep-RL-Bootcamp-Labs

def point_get_action(theta, ob, rng=np.random):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :return: A vector of size |A|
    """
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    return rng.normal(loc=mean, scale=1.)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: main.py Projeto: stjordanis/Deep-RL-Bootcamp-Labs

def compute_logits(theta, ob):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :return: A vector of size |A|
    """
    ob_1 = include_bias(ob)
    logits = ob_1.dot(theta.T)
    return logits

Exemplo n.º 5

0

Exibir arquivo

Arquivo: main.py Projeto: vmuthuk2/deepbootcamp

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    ob_1 = include_bias(ob)
    return np.outer(action - theta.dot(ob_1), ob_1)

Exemplo n.º 6

0

Exibir arquivo

def point_get_action(theta, ob, rng=np.random):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :return: A vector of size |A|
    """
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    return rng.normal(loc=mean, scale=1.)

Exemplo n.º 7

0

Exibir arquivo

def compute_logits(theta, ob):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :return: A vector of size |A|
    """
    ob_1 = include_bias(ob)
    logits = ob_1.dot(theta.T)
    return logits

Exemplo n.º 8

0

Exibir arquivo

Arquivo: main.py Projeto: stjordanis/Deep-RL-Bootcamp-Labs

def point_get_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A scalar
    """
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    return -0.5 * np.log(2 * np.pi) * theta.shape[0] - 0.5 * np.sum(np.square(zs))

Exemplo n.º 9

0

Exibir arquivo

Arquivo: main.py Projeto: krprls/DeepRLBootcamp2017

def point_get_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A scalar
    """
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    return -0.5 * np.log(2 * np.pi) * theta.shape[0] - 0.5 * np.sum(np.square(zs))

Exemplo n.º 10

0

Exibir arquivo

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    log_soft = -softmax(compute_logits(theta, ob))
    log_soft[action] += 1
    grad = np.outer(log_soft, include_bias(ob))
    return grad

Exemplo n.º 11

0

Exibir arquivo

Arquivo: main.py Projeto: arvindpereira/deeprlbootcamp

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    ob_1 = include_bias(ob)
    grad = np.outer(action - np.dot(theta, ob_1), ob_1)
    # (a - theta'.s).outer(S)
    return grad

Exemplo n.º 12

0

Exibir arquivo

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    return np.outer(zs, ob_1)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: main.py Projeto: stjordanis/Deep-RL-Bootcamp-Labs

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    return np.outer(zs, ob_1)

Exemplo n.º 14

0

Exibir arquivo

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    grad = np.outer(action - theta.dot(ob_1), ob_1)
    return grad

Exemplo n.º 15

0

Exibir arquivo

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE ***"
    a = np.zeros(theta.shape[0])
    a[action] = 1
    p = softmax(compute_logits(theta, ob))
    ob_1 = include_bias(ob)
    return np.outer(a - p, ob_1)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: main.py Projeto: amalF/DeepRL-Bootcamp

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    #grad = np.zeros_like(theta)
    one_hot_actions = np.eye(theta.shape[0])
    ob_1 = include_bias(ob)
    pi = softmax(compute_logits(theta, ob))
    grad = np.outer((one_hot_actions[action] - pi), ob_1)
    return grad

Exemplo n.º 17

0

Exibir arquivo

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    grad = np.dot(zs.reshape(zs.shape[0], 1), ob_1.reshape(1, ob_1.shape[0]))
    return grad

Exemplo n.º 18

0

Exibir arquivo

Arquivo: main.py Projeto: stjordanis/Deep-RL-Bootcamp-Labs

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE ***"
    a = np.zeros(theta.shape[0])
    a[action] = 1
    p = softmax(compute_logits(theta, ob))
    ob_1 = include_bias(ob)
    return np.outer(a - p, ob_1)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: main.py Projeto: gongbo-yang-cloudminds/deeprlbootcamp

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE *** --> DONE"
    grad = np.zeros_like(theta)
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    grad = np.outer((action - mean), np.transpose(ob_1))
    return grad

Exemplo n.º 20

0

Exibir arquivo

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    ob_1 = include_bias(ob)
    logits = ob_1.dot(theta.T)
    ea = np.zeros(theta.shape[0])
    ea[action] = 1.
    grad = np.outer(ea - softmax(logits), ob_1)
    return grad

Exemplo n.º 21

0

Exibir arquivo

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    #grad = np.zeros_like(theta)
    #"*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    ea = np.zeros(theta.shape[0])
    ea[action] = 1
    grad = np.outer(ea - softmax(compute_logits(theta, ob)), ob_1)
    return grad

Exemplo n.º 22

0

Exibir arquivo

Arquivo: main.py Projeto: swiatkowski/deep-rl-bootcamp

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """

    softmax_logits = softmax(compute_logits(theta, ob))
    e = np.zeros_like((softmax_logits))
    e[action] = 1
    ob_1 = include_bias(ob)
    grad = np.outer(e - softmax_logits, ob_1.T)
    return grad

Exemplo n.º 23

0

Exibir arquivo

Arquivo: main.py Projeto: sagarchaturvedi1/deep_rl_bootcamp

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    e = np.zeros(2)
    ob_1 = include_bias(ob)
    e[action] = 1
    step_1 = e - softmax(ob_1.dot(theta.T))
    grad = np.outer(step_1, ob_1)
    "*** YOUR CODE HERE ***"
    return grad

Exemplo n.º 24

0

Exibir arquivo

Arquivo: main.py Projeto: xipengwang/Deep-RL-Bootcamp

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    #If s and a is column vector
    # -0.5 * (a - \theta * s)^T * I * (a - \theta * s)
    #(a^T-(s^T*\theta^T)*s

    ob_1 = include_bias(ob)
    zs = action - theta.dot(ob_1)
    grad = np.outer(zs,ob_1)
    return grad

Exemplo n.º 25

0

Exibir arquivo

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    e_a = np.zeros((theta.shape[0], ))
    e_a[action] = 1
    probs = softmax(theta.dot(ob_1))
    grad = np.outer(e_a - probs, ob_1)
    return grad

Exemplo n.º 26

0

Exibir arquivo

Arquivo: main.py Projeto: mabirck/Deep_RL_Bootcamp

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    #print(theta.shape)
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    p = softmax(compute_logits(theta, ob))
    one_hot = np.zeros(theta.shape[0])
    one_hot[action] = 1
    grad = np.outer((one_hot - p), include_bias(ob))
    return grad

Exemplo n.º 27

0

Exibir arquivo

Arquivo: main.py Projeto: Steven1791/DeepRL-Bootcamp

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    
    ob_1 = include_bias(ob)#we don't need a separate bias term - so we include it 
    mean = theta.dot(ob_1)
    zs = action - mean
    grad = np.outer(zs, ob_1)
    return grad

Exemplo n.º 28

0

Exibir arquivo

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    ob_1 = include_bias(ob)
    logits = theta.dot(ob_1)
    probs = softmax(logits)
    dlogits = -probs
    dlogits[action] += 1
    grad = np.outer(dlogits, ob_1)
    "*** YOUR CODE HERE ***"
    return grad

Exemplo n.º 29

0

Exibir arquivo

Arquivo: main.py Projeto: vmuthuk2/deepbootcamp

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """

    ob_1 = include_bias(ob)
    e = np.zeros(theta.shape[0])
    e[action] = 1

    pi_exp = np.exp(theta.dot(ob_1))
    pi_soft = pi_exp / np.sum(pi_exp)

    return np.outer(e - pi_soft, ob_1)

Exemplo n.º 30

0

Exibir arquivo

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    # grad = (a-theta_transpose*s)s_transpose
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)  # theta_transpose * state vector, ob
    zs = action - mean
    grad = np.outer(zs, np.transpose(ob_1))

    return grad

Exemplo n.º 31

0

Exibir arquivo

Arquivo: main.py Projeto: whathelll/DeepRLBootCampLabs

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    # grad = np.zeros_like(theta)
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    grad = np.outer(zs, ob_1)

    # print(grad.shape)
    "*** YOUR CODE HERE ***"
    return grad

Exemplo n.º 32

0

Exibir arquivo

Arquivo: main.py Projeto: JirenJin/DRL-Labs-Solutions

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    ea = np.zeros(theta.shape[0])
    ea[action] = 1
    logits = compute_logits(theta, ob)
    pi_theta = np.exp(logits) / np.sum(np.exp(logits))
    grad = np.outer((ea - pi_theta), ob_1)
    return grad

Exemplo n.º 33

0

Exibir arquivo

def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    # log = -d/2 log(2pi) -1/2 [x-miu].T [x-miu]
    # del =  del(-1/2 [a - theta.T*s~].T [a - theta.T*s~])
    # del =  -1/2*2*-1 *[a - theta*s~](s~)
    # del = [a - theta.T*s~](s~)
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)  # miu = theta.T.dot(s~)
    grad = np.outer((action - mean), ob_1)

    "*** YOUR CODE HERE ***"
    return grad

Exemplo n.º 34

0

Exibir arquivo

def point_get_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A scalar
    """
    # Gaussian = 1/[2pi^(d/2)] * det(cov)^(-1/2) * e(-1/2 [x-miu].T cov^(-1) [x-miu])
    # log = -d/2 log(2pi) - 1/2 log (det(cov)) - 1/2 [x-miu].T cov^(-1) [x-miu] log(e)
    # log = -d/2 log(2pi) - 1/2 log(det(I)) -1/2 [x-miu].T I^(-1) [x-miu] 1
    # log = -d/2 log(2pi) -1/2 log(1) -1/2 [x-miu].T I [x-miu]
    # log = -d/2 log(2pi) -1/2 [x-miu].T [x-miu]
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)  # miu = theta.T.dot(s~)
    zs = action - mean  # a - miu
    return -0.5 * np.log(2 * np.pi) * theta.shape[0] - 0.5 * np.sum(
        np.square(zs))

Exemplo n.º 35

0

Exibir arquivo

def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)

    "*** YOUR CODE HERE ***"
    action_vec = np.zeros(theta.shape[0])

    action_vec[action] = 1
    diff = action_vec - softmax(compute_logits(theta, ob))
    grad = np.outer(diff, include_bias(ob))

    return grad

Exemplo n.º 36

0

Exibir arquivo

Arquivo: rollout.py Projeto: stjordanis/Deep-RL-Bootcamp-Labs

def cartpole_get_action(theta, ob, rng=np.random):
    ob_1 = include_bias(ob)
    logits = ob_1.dot(theta.T)
    return weighted_sample(logits, rng=rng)

Exemplo n.º 37

0

Exibir arquivo

Arquivo: rollout.py Projeto: stjordanis/Deep-RL-Bootcamp-Labs

def point_get_action(theta, ob, rng=np.random):
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    return rng.normal(loc=mean, scale=1.)