def rings_log_pdf_grad(X, sigma=0.1, radia=np.array([1, 3])): weights = 2 * np.pi * radia weights /= np.sum(weights) norms = np.linalg.norm(X[:, :2], axis=1) result = np.zeros(np.shape(X)) grads = [] for i in range(len(X)): log_pdf_components = -0.5 * (norms[i] - radia)**2 / (sigma**2) log_pdf = logsumexp(log_pdf_components + np.log(weights)) neg_log_neg_ratios = log_pdf_components - log_pdf gs_inner = np.zeros((len(radia), 1)) for k in range(len(gs_inner)): gs_inner[k] = -(norms[i] - radia[k]) / (sigma**2) grad_1d = np.dot(gs_inner.T, np.exp(neg_log_neg_ratios + np.log(weights))) angle = np.arctan2(X[i, 1], X[i, 0]) grad_2d = np.array([np.cos(angle), np.sin(angle)]) * grad_1d grads += [grad_2d] result[:, :2] = np.array(grads) if X.shape[1] > 2: # standard normal log pdf gradient result[:, 2:] = -X[:, 2:] / (sigma**2) return result
def calculate_marginal_and_conditional(self, confidence_mass): assert len(self.received_msgs) == len(self.neighbours), "message from at least one factor is missing" if self.clamped_value is not None: marginal = np.zeros(1) conditional = np.zeros(1) else: marginal = np.zeros(self.n_states) conditional = np.zeros(self.n_states) for fac in self.factors: fac_msg_sp, fac_msg_ms = self.received_msgs[fac] marginal += fac_msg_sp conditional += fac_msg_ms # renormalize self.log_marginal = marginal - logsumexp(marginal) self.log_conditional = conditional - logsumexp(conditional) self.marginal = np.exp(self.log_marginal) self.conditional = np.exp(self.log_conditional) # calculate means and variances vals = np.arange(self.n_states) self.marginal_mean = np.sum(self.marginal * vals) self.marginal_variance = np.sum(self.marginal * vals**2) - self.marginal_mean**2 self.conditional_mean = np.sum(self.conditional * vals) self.conditional_variance = np.sum(self.conditional * vals**2) - self.conditional_mean**2 # calculate confidence interval if not np.all(np.isnan(self.conditional)): center = np.nanargmax(self.conditional) conditional_cdf = np.cumsum(self.conditional) # debug # if self.name == "f_40": # import matplotlib.pyplot as plt # plt.plot(self.conditional) # plt.figure() # plt.plot(conditional_cdf) # plt.figure() left, right = self.cumulate_mass_around(conditional_cdf, confidence_mass, center) self.conditional_conf_lower = center - left self.conditional_conf_upper = right - center else: self.conditional_conf_upper = 0 self.conditional_conf_lower = 0
def update(self, Z, num_new=1, log_weights=None): assert (len(Z) >= num_new) # dont do anything if no data observed if num_new == 0: return if log_weights is not None: assert len(log_weights) == len(Z) else: log_weights = np.zeros(len(Z)) Z_new = Z[-num_new:] log_weights_new = log_weights[-num_new:] # first update: use first of X and log_weights, and then discard if self.log_sum_weights is None: # assume have observed fake terms, which is needed for making the system well-posed # the L_C says that the fake terms had covariance self.lmbda, which is a regulariser self.L_C = np.eye(self.D) * np.sqrt(self.gamma2) self.log_sum_weights = log_weights_new[0] self.mu = Z_new[0] Z_new = Z_new[1:] log_weights_new = log_weights_new[1:] num_new -= 1 # dont do anything if no data observed if len(Z_new) == 0: return # generate lmbdas that correspond to weighted averages lmbdas = log_weights_to_lmbdas(self.log_sum_weights, log_weights_new) # low-rank update of Cholesky, costs O(d^2) only old_L_C = np.array(self.L_C, copy=True) self.mu, self.L_C = update_mean_cov_L_lmbda(Z_new, self.mu, self.L_C, lmbdas) if np.any(np.isnan(self.L_C)) or np.any(np.isinf(self.L_C)): logger.warning( "Numerical error while updating Cholesky factor of C.\n" "Before update:\n%s\n" "After update:\n%s\n" "Updating data:\n%s\n" "Updating log weights:\n%s\n" "Updating lmbdas:\n%s\n" % (str(old_L_C), str( self.L_C), str(Z_new), str(log_weights_new), str(lmbdas))) raise RuntimeError( "Numerical error while updating Cholesky factor of C.") # update terms and weights self.log_sum_weights = logsumexp( list(log_weights) + [self.log_sum_weights])
def update(self, Z, num_new=1, log_weights=None): assert(len(Z) >= num_new) # dont do anything if no data observed if num_new == 0: return if log_weights is not None: assert len(log_weights) == len(Z) else: log_weights = np.zeros(len(Z)) Z_new = Z[-num_new:] log_weights_new = log_weights[-num_new:] # first update: use first of X and log_weights, and then discard if self.log_sum_weights is None: # assume have observed fake terms, which is needed for making the system well-posed # the L_C says that the fake terms had covariance self.lmbda, which is a regulariser self.L_C = np.eye(self.D) * np.sqrt(self.gamma2) self.log_sum_weights = log_weights_new[0] self.mu = Z_new[0] Z_new = Z_new[1:] log_weights_new = log_weights_new[1:] num_new -= 1 # dont do anything if no data observed if len(Z_new) == 0: return # generate lmbdas that correspond to weighted averages lmbdas = log_weights_to_lmbdas(self.log_sum_weights, log_weights_new) # low-rank update of Cholesky, costs O(d^2) only old_L_C = np.array(self.L_C, copy=True) self.mu, self.L_C = update_mean_cov_L_lmbda(Z_new, self.mu, self.L_C, lmbdas) if np.any(np.isnan(self.L_C)) or np.any(np.isinf(self.L_C)): logger.warning("Numerical error while updating Cholesky factor of C.\n" "Before update:\n%s\n" "After update:\n%s\n" "Updating data:\n%s\n" "Updating log weights:\n%s\n" "Updating lmbdas:\n%s\n" % (str(old_L_C), str(self.L_C), str(Z_new), str(log_weights_new), str(lmbdas)) ) raise RuntimeError("Numerical error while updating Cholesky factor of C.") # update terms and weights self.log_sum_weights = logsumexp(list(log_weights) + [self.log_sum_weights])
def log_weights_to_lmbdas(log_sum_old_weights, log_new_weights, boundary_check_min_number=1e-5): N = len(log_new_weights) lmbdas = np.zeros(N) for i, log_new_weight in enumerate(log_new_weights): log_sum_old_weights = logsumexp([log_sum_old_weights, log_new_weight]) log_lmbda = log_new_weight - log_sum_old_weights lmbdas[i] = np.exp(log_lmbda) # numerical checks for lambdas. Must be in (0,1) lmbdas[lmbdas < boundary_check_min_number] = boundary_check_min_number lmbdas[(1 - lmbdas) < boundary_check_min_number] = 1 - boundary_check_min_number return lmbdas
def predict_log_proba(self,X): if isinstance(X, ndarray): logps = [] for clazz in self.classes_: data_object = self.\ _convert_to_data_object_in_scoring( X, y=array([clazz]*len(X)) ) logps += [self._anomaly_detector._LogProbabilityOfData(data_object, len(X))] LogPs = [x-logsumexp(x) for x in array(logps).T] return array(LogPs)
def log_weights_to_lmbdas(log_sum_old_weights, log_new_weights, boundary_check_min_number=1e-5): N = len(log_new_weights) lmbdas = np.zeros(N) for i, log_new_weight in enumerate(log_new_weights): log_sum_old_weights = logsumexp([log_sum_old_weights, log_new_weight]) log_lmbda = log_new_weight - log_sum_old_weights lmbdas[i] = np.exp(log_lmbda) # numerical checks for lambdas. Must be in (0,1) lmbdas[lmbdas < boundary_check_min_number] = boundary_check_min_number lmbdas[(1 - lmbdas ) < boundary_check_min_number] = 1 - boundary_check_min_number return lmbdas
def predict_log_proba(self, X): if isinstance(X, ndarray): logps = [] for clazz in self.classes_: data_object = self.\ _convert_to_data_object_in_scoring( X, y=array([clazz]*len(X)) ) logps += [ self._anomaly_detector._LogProbabilityOfData( data_object, len(X)) ] LogPs = [x - logsumexp(x) for x in array(logps).T] return array(LogPs)
def _logsum(self, x, dim): """Calculates the sum of x[:^(dim-1), i, ...] for each value of i. Returns a vector of size x.shape[dim].""" # handle case when there is no summation to be done if x.ndim == 1 and dim == 0: return x # make stationary axis the first one x = np.rollaxis(x, dim) # flatten other axes xflat = np.reshape(x, (x.shape[0], -1)) # calculate sum x_sum = logsumexp(xflat, axis=1) return x_sum
def predict_log_proba(self, X): assert self.class_column > -1 X1 = None if isinstance(X, pyisc.DataObject): assert X.class_column == self.class_column X1 = X.as_2d_array() elif isinstance(X, ndarray): X1 = X.copy() if X1 is not None: logps = self.compute_logp(X1) LogPs = [x - logsumexp(x) for x in array(logps).T] #normalized return array(LogPs) else: raise ValueError("Unknown type of data to score:", type(X))