Пример #1
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)

    act = theta.dot(include_bias(ob))

    grad = np.outer((action - act), include_bias(ob))

    return grad
Пример #2
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE ***"
    # See Lec4a page 49 for full equation with indices
    # grad = (alpha - theta^T s) s^T
    #
    # Keep in mind that:
    # - a^T b denotes a dot product  (first one is transposed)
    # - ab^T means and outer product (the second one is transposed)
    # - we have to add bias to the state/s/observations/ob and it *changes the shape*
    ob_1 = include_bias(ob)
    grad = np.outer(action - np.dot(theta, ob_1), ob_1)

    # # A Messy way of doing by using * as an implied outer product
    # Outer=>sum = dot product
    # mean=(theta.T * ob_1[:,None]).sum(0)
    # zs = action-mean
    # # expand dimensions
    # grad = zs[:,np.newaxis]*ob_1[:,np.newaxis].T
    return grad
Пример #3
0
def point_get_action(theta, ob, rng=np.random):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :return: A vector of size |A|
    """
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    return rng.normal(loc=mean, scale=1.)
Пример #4
0
def compute_logits(theta, ob):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :return: A vector of size |A|
    """
    ob_1 = include_bias(ob)
    logits = ob_1.dot(theta.T)
    return logits
Пример #5
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    ob_1 = include_bias(ob)
    return np.outer(action - theta.dot(ob_1), ob_1)
Пример #6
0
def point_get_action(theta, ob, rng=np.random):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :return: A vector of size |A|
    """
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    return rng.normal(loc=mean, scale=1.)
Пример #7
0
def compute_logits(theta, ob):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :return: A vector of size |A|
    """
    ob_1 = include_bias(ob)
    logits = ob_1.dot(theta.T)
    return logits
Пример #8
0
def point_get_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A scalar
    """
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    return -0.5 * np.log(2 * np.pi) * theta.shape[0] - 0.5 * np.sum(np.square(zs))
Пример #9
0
def point_get_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A scalar
    """
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    return -0.5 * np.log(2 * np.pi) * theta.shape[0] - 0.5 * np.sum(np.square(zs))
Пример #10
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    log_soft = -softmax(compute_logits(theta, ob))
    log_soft[action] += 1
    grad = np.outer(log_soft, include_bias(ob))
    return grad
Пример #11
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    ob_1 = include_bias(ob)
    grad = np.outer(action - np.dot(theta, ob_1), ob_1)
    # (a - theta'.s).outer(S)
    return grad
Пример #12
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    return np.outer(zs, ob_1)
Пример #13
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    return np.outer(zs, ob_1)
Пример #14
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    grad = np.outer(action - theta.dot(ob_1), ob_1)
    return grad
Пример #15
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE ***"
    a = np.zeros(theta.shape[0])
    a[action] = 1
    p = softmax(compute_logits(theta, ob))
    ob_1 = include_bias(ob)
    return np.outer(a - p, ob_1)
Пример #16
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    #grad = np.zeros_like(theta)
    one_hot_actions = np.eye(theta.shape[0])
    ob_1 = include_bias(ob)
    pi = softmax(compute_logits(theta, ob))
    grad = np.outer((one_hot_actions[action] - pi), ob_1)
    return grad
Пример #17
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    grad = np.dot(zs.reshape(zs.shape[0], 1), ob_1.reshape(1, ob_1.shape[0]))
    return grad
Пример #18
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE ***"
    a = np.zeros(theta.shape[0])
    a[action] = 1
    p = softmax(compute_logits(theta, ob))
    ob_1 = include_bias(ob)
    return np.outer(a - p, ob_1)
Пример #19
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    "*** YOUR CODE HERE *** --> DONE"
    grad = np.zeros_like(theta)
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    grad = np.outer((action - mean), np.transpose(ob_1))
    return grad
Пример #20
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    ob_1 = include_bias(ob)
    logits = ob_1.dot(theta.T)
    ea = np.zeros(theta.shape[0])
    ea[action] = 1.
    grad = np.outer(ea - softmax(logits), ob_1)
    return grad
Пример #21
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    #grad = np.zeros_like(theta)
    #"*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    ea = np.zeros(theta.shape[0])
    ea[action] = 1
    grad = np.outer(ea - softmax(compute_logits(theta, ob)), ob_1)
    return grad
Пример #22
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """

    softmax_logits = softmax(compute_logits(theta, ob))
    e = np.zeros_like((softmax_logits))
    e[action] = 1
    ob_1 = include_bias(ob)
    grad = np.outer(e - softmax_logits, ob_1.T)
    return grad
Пример #23
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    e = np.zeros(2)
    ob_1 = include_bias(ob)
    e[action] = 1
    step_1 = e - softmax(ob_1.dot(theta.T))
    grad = np.outer(step_1, ob_1)
    "*** YOUR CODE HERE ***"
    return grad
Пример #24
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    #If s and a is column vector
    # -0.5 * (a - \theta * s)^T * I * (a - \theta * s)
    #(a^T-(s^T*\theta^T)*s

    ob_1 = include_bias(ob)
    zs = action - theta.dot(ob_1)
    grad = np.outer(zs,ob_1)
    return grad
Пример #25
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    e_a = np.zeros((theta.shape[0], ))
    e_a[action] = 1
    probs = softmax(theta.dot(ob_1))
    grad = np.outer(e_a - probs, ob_1)
    return grad
Пример #26
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    #print(theta.shape)
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    p = softmax(compute_logits(theta, ob))
    one_hot = np.zeros(theta.shape[0])
    one_hot[action] = 1
    grad = np.outer((one_hot - p), include_bias(ob))
    return grad
Пример #27
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    
    ob_1 = include_bias(ob)#we don't need a separate bias term - so we include it 
    mean = theta.dot(ob_1)
    zs = action - mean
    grad = np.outer(zs, ob_1)
    return grad
Пример #28
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    ob_1 = include_bias(ob)
    logits = theta.dot(ob_1)
    probs = softmax(logits)
    dlogits = -probs
    dlogits[action] += 1
    grad = np.outer(dlogits, ob_1)
    "*** YOUR CODE HERE ***"
    return grad
Пример #29
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """

    ob_1 = include_bias(ob)
    e = np.zeros(theta.shape[0])
    e[action] = 1

    pi_exp = np.exp(theta.dot(ob_1))
    pi_soft = pi_exp / np.sum(pi_exp)

    return np.outer(e - pi_soft, ob_1)
Пример #30
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    # grad = (a-theta_transpose*s)s_transpose
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)  # theta_transpose * state vector, ob
    zs = action - mean
    grad = np.outer(zs, np.transpose(ob_1))

    return grad
Пример #31
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    # grad = np.zeros_like(theta)
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    zs = action - mean
    grad = np.outer(zs, ob_1)

    # print(grad.shape)
    "*** YOUR CODE HERE ***"
    return grad
Пример #32
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"
    ob_1 = include_bias(ob)
    ea = np.zeros(theta.shape[0])
    ea[action] = 1
    logits = compute_logits(theta, ob)
    pi_theta = np.exp(logits) / np.sum(np.exp(logits))
    grad = np.outer((ea - pi_theta), ob_1)
    return grad
Пример #33
0
def point_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A matrix of size |A| * (|S|+1)
    """
    # log = -d/2 log(2pi) -1/2 [x-miu].T [x-miu]
    # del =  del(-1/2 [a - theta.T*s~].T [a - theta.T*s~])
    # del =  -1/2*2*-1 *[a - theta*s~](s~)
    # del = [a - theta.T*s~](s~)
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)  # miu = theta.T.dot(s~)
    grad = np.outer((action - mean), ob_1)

    "*** YOUR CODE HERE ***"
    return grad
Пример #34
0
def point_get_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: A vector of size |A|
    :return: A scalar
    """
    # Gaussian = 1/[2pi^(d/2)] * det(cov)^(-1/2) * e(-1/2 [x-miu].T cov^(-1) [x-miu])
    # log = -d/2 log(2pi) - 1/2 log (det(cov)) - 1/2 [x-miu].T cov^(-1) [x-miu] log(e)
    # log = -d/2 log(2pi) - 1/2 log(det(I)) -1/2 [x-miu].T I^(-1) [x-miu] 1
    # log = -d/2 log(2pi) -1/2 log(1) -1/2 [x-miu].T I [x-miu]
    # log = -d/2 log(2pi) -1/2 [x-miu].T [x-miu]
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)  # miu = theta.T.dot(s~)
    zs = action - mean  # a - miu
    return -0.5 * np.log(2 * np.pi) * theta.shape[0] - 0.5 * np.sum(
        np.square(zs))
Пример #35
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    grad = np.zeros_like(theta)

    "*** YOUR CODE HERE ***"
    action_vec = np.zeros(theta.shape[0])

    action_vec[action] = 1
    diff = action_vec - softmax(compute_logits(theta, ob))
    grad = np.outer(diff, include_bias(ob))

    return grad
Пример #36
0
def cartpole_get_action(theta, ob, rng=np.random):
    ob_1 = include_bias(ob)
    logits = ob_1.dot(theta.T)
    return weighted_sample(logits, rng=rng)
Пример #37
0
def point_get_action(theta, ob, rng=np.random):
    ob_1 = include_bias(ob)
    mean = theta.dot(ob_1)
    return rng.normal(loc=mean, scale=1.)