def grad_max_2deg_penalty(a, f_vals, X, X_grad, ind, n, beta): """ Gradient for the smooth maximum """ d = X.shape[1] b = a[:d] B = a[d:].reshape((d, d)) Y = f_vals[:, ind] + X_grad @ b + qform_q(B + B.T, X_grad, X) + 2 * np.trace(B) Y_max = np.max(np.abs(Y)) grad_exp = np.exp(beta * (np.abs(Y) - Y_max)) / np.sum( np.exp(beta * (np.abs(Y) - Y_max))) #gradient w.r.t. b nabla_b = X_grad.T @ (grad_exp * np.sign(Y)) #gradient w.r.t B nabla_f_B = np.matmul(X_grad.reshape((n, d, 1)), X.reshape((n, 1, d))) nabla_f_B = nabla_f_B + nabla_f_B.transpose( (0, 2, 1)) + 2 * np.eye(d).reshape((1, d, d)) nabla_B = np.sum(nabla_f_B * (grad_exp * np.sign(Y)).reshape((n, 1, 1)), axis=0) #stack gradients together grad = np.zeros((d + 1) * d, dtype=np.float64) grad[:d] = nabla_b grad[d:] = nabla_B.ravel() return grad
def grad_qform_2_ESVM(a, f_vals, X, X_grad, W, ind, n, alpha=0.0): """ Arguments: a - np.array of shape (d+1,d), a[0,:] - np.array of shape(d) - corresponds to coefficients via linear variables a[1:,:] - np.array of shape (d,d) - to quadratic terms """ d = X_grad.shape[1] b = a[:d] B = a[d:].reshape((d, d)) Y = f_vals[:, ind] + X_grad @ b + qform_q(B + B.T, X_grad, X) + 2 * np.trace(B) #gradient w.r.t. b nabla_b = 2. / n * (X_grad * PWP_fast(Y, W).reshape((n, 1))).sum(axis=0) #gradient w.r.t B nabla_f_B = np.matmul(X_grad.reshape((n, d, 1)), X.reshape((n, 1, d))) nabla_f_B = nabla_f_B + nabla_f_B.transpose( (0, 2, 1)) + 2 * np.eye(d).reshape((1, d, d)) nabla_B = 2. / n * np.sum(nabla_f_B * PWP_fast(Y, W).reshape((n, 1, 1)), axis=0) #add ridge nabla_B += 2 * alpha * B #stack gradients together grad = np.zeros((d + 1) * d, dtype=np.float64) grad[:d] = nabla_b grad[d:] = nabla_B.ravel() return grad
def qform_2_ESVM(a, f_vals, X, X_grad, W, ind, n, alpha=0.0): """ Arguments: a - np.array of shape (d+1,d), a[0,:] - np.array of shape(d) - corresponds to coefficients via linear variables a[1:,:] - np.array of shape (d,d) - to quadratic terms """ d = X_grad.shape[1] b = a[:d] B = a[d:].reshape((d, d)) x_cur = f_vals[:, ind] + X_grad @ b + qform_q(B + B.T, X_grad, X) + 2 * np.trace(B) return Spectral_var(x_cur, W) + alpha * np.sum(B**2)
def qform_2_LS(a, f_vals, X, X_grad, ind, n): """ Least squares evaluation for 2nd order polynomials as control variates; Arguments: a - np.array of shape (d+1,d), a[0,:] - np.array of shape(d) - corresponds to coefficients via linear variables a[1:,:] - np.array of shape (d,d) - to quadratic terms Returns: function value for index ind, scalar variable """ d = X.shape[1] b = a[:d] B = a[d:].reshape((d, d)) x_cur = f_vals[:, ind] + X_grad @ b + qform_q(B + B.T, X_grad, X) + 2 * np.trace(B) return np.mean(x_cur**2)
def qform_2_ZV(a, f_vals, X, X_grad, ind, n): """ Least squares evaluated for ZV-2 method Arguments: a - np.array of shape (d+1,d), a[0,:] - np.array of shape(d) - corresponds to coefficients via linear variables a[1:,:] - np.array of shape (d,d) - to quadratic terms Returns: function value for index ind, scalar variable """ d = X.shape[1] b = a[:d] B = a[d:].reshape((d, d)) x_cur = f_vals[:, ind] + X_grad @ b + qform_q(B + B.T, X_grad, X) + 2 * np.trace(B) return 1. / (n - 1) * np.dot(x_cur - np.mean(x_cur), x_cur - np.mean(x_cur))
def max_2deg_penalty(a, f_vals, X, X_grad, ind, n, beta): """ Smooth maximum penalization for 2nd order polynomials as control variables; Arguments: a - np.array of shape (d+1,d), a[0,:] - coefficients corresponding to 1st order terms; a[1:,:] - np.array of shape (d,d) - coefficients for 2nd order terms beta - smoothness penalization Returns: function value for index ind, scalar """ d = X.shape[1] b = a[:d] B = a[d:].reshape((d, d)) Y = f_vals[:, ind] + X_grad @ b + qform_q(B + B.T, X_grad, X) + 2 * np.trace(B) Y_max = np.max(np.abs(Y)) return Y_max + 1. / beta * np.log( np.sum(np.exp(beta * (np.abs(Y) - Y_max))))
def grad_qform_2_LS(a, f_vals, X, X_grad, ind, n): """ Gradient for quadratic form in ZV-2 method """ d = X.shape[1] b = a[:d] B = a[d:].reshape((d, d)) Y = f_vals[:, ind] + X_grad @ b + qform_q(B + B.T, X_grad, X) + 2 * np.trace(B) #gradient w.r.t. b nabla_b = 2. / n * X_grad.T @ Y #gradient w.r.t B nabla_f_B = np.matmul(X_grad.reshape((n, d, 1)), X.reshape((n, 1, d))) nabla_f_B = nabla_f_B + nabla_f_B.transpose( (0, 2, 1)) + 2 * np.eye(d).reshape((1, d, d)) nabla_B = 2. / n * np.sum(nabla_f_B * Y.reshape((n, 1, 1)), axis=0) #stack gradients together grad = np.zeros((d + 1) * d, dtype=np.float64) grad[:d] = nabla_b grad[d:] = nabla_B.ravel() return grad