def __init__(self, original_mean=0, a=-1, b=1, original_std=0.1): a, b = (a - original_mean) / original_std, (b - original_mean) / original_std self.a = a self.b = b self.true_sigma = original_std self.true_mean = original_mean self.mean, self.sigma = truncnorm.stats(a=self.a, b=self.b, loc=self.true_mean, scale=self.true_sigma)
def limit(self, value): self._limit = value # Need to scale the standard deviation to achieve sample standard # deviation based on the truncation. Given truncation of 2 standard # deviations, the input standard deviation must be increased to # 1.136847 to maintain a unit standard deviation for the random # samples. self._scale = 1 / np.sqrt(truncnorm.stats(-value, value, moments="v"))
def get_expected_mean(self): assert len(self.mean) == 1 # Only coded case for now. mean = self.mean[0] return float( truncnorm.stats(-mean / self.variance_sqrt, 100.0 * self.variance_sqrt, loc=mean, scale=self.variance_sqrt, moments='m'))
def log_prob(self, value): if self._validate_args: self._validate_sample(value) return super(TruncatedNormal, self).log_prob( self._to_std_rv(value)) - self._log_scale if __name__ == '__main__': from scipy.stats import truncnorm loc, scale, a, b = 1., 2., 1., 2. tn_pt = TruncatedNormal(loc, scale, a, b) mean_pt, var_pt = tn_pt.mean.item(), tn_pt.variance.item() alpha, beta = (a - loc) / scale, (b - loc) / scale mean_sp, var_sp = truncnorm.stats(alpha, beta, loc=loc, scale=scale, moments='mv') print('mean', mean_pt, mean_sp) print('var', var_pt, var_sp) print('cdf', tn_pt.cdf(1.4).item(), truncnorm.cdf(1.4, alpha, beta, loc=loc, scale=scale)) print('icdf', tn_pt.icdf(0.333).item(), truncnorm.ppf(0.333, alpha, beta, loc=loc, scale=scale)) print('logpdf', tn_pt.log_prob(1.5).item(), truncnorm.logpdf(1.5, alpha, beta, loc=loc, scale=scale)) print('entropy', tn_pt.entropy.item(), truncnorm.entropy(alpha, beta, loc=loc, scale=scale))
def impute(self, censored_X, censored_y, uncensored_X, uncensored_y): """ impute runs and returns imputed y values Parameters ---------- censored_X : array X matrix of censored data censored_y : array y matrix of censored data uncensored_X : array X matrix of uncensored data uncensored_y : array y matrix of uncensored data """ if censored_X.shape[0] == 0: self.logger.critical("Nothing to impute, return None") return None censored_y = censored_y.flatten() uncensored_y = uncensored_y.flatten() # first learn model without censored data self.model.train(uncensored_X, uncensored_y) self.logger.debug("Going to impute %d y-values with %s" % (censored_X.shape[0], str(self.model))) imputed_y = None # define this, if imputation fails # Define variables y = None it = 1 change = 0 while True: self.logger.debug("Iteration %d of %d" % (it, self.max_iter)) # predict censored y values y_mean, y_var = self.model.predict(censored_X) y_var[y_var < self.var_threshold] = self.var_threshold y_stdev = np.sqrt(y_var)[:, 0] y_mean = y_mean[:, 0] # ignore the warnings of truncnorm.stats # since we handle them appropriately with warnings.catch_warnings(): warnings.filterwarnings( 'ignore', r'invalid value encountered in (subtract|true_divide).*') warnings.filterwarnings( 'ignore', r'divide by zero encountered in (true_divide|log).*') imputed_y = truncnorm.stats(a=(censored_y - y_mean) / y_stdev, b=(self.threshold - y_mean) / y_stdev, loc=y_mean, scale=y_stdev, moments='m') imputed_y = np.array(imputed_y) nans = ~np.isfinite(imputed_y) n_nans = sum(nans) if n_nans > 0: # Replace all nans with maximum of predicted perf and censored value # this happens if the prediction is far smaller than the # censored data point self.logger.debug("Going to replace %d nan-value(s) with " "max(captime, predicted mean)" % n_nans) imputed_y[nans] = np.max([censored_y[nans], y_mean[nans]], axis=0) if it > 1: # Calc mean difference between imputed values this and last # iteration, assume imputed values are always concatenated # after uncensored values change = np.mean( np.abs(imputed_y - y[uncensored_y.shape[0]:]) / y[uncensored_y.shape[0]:]) # lower all values that are higher than threshold # should probably never happen imputed_y[imputed_y >= self.threshold] = self.threshold self.logger.debug("Change: %f" % change) X = np.concatenate((uncensored_X, censored_X)) y = np.concatenate((uncensored_y, imputed_y)) if change > self.change_threshold or it == 1: self.model.train(X, y) else: break it += 1 if it > self.max_iter: break self.logger.debug("Imputation used %d/%d iterations, last_change=%f" % (it - 1, self.max_iter, change)) # replace all y > cutoff with PAR10 values (i.e., threshold) imputed_y = np.array(imputed_y, dtype=np.float) imputed_y[imputed_y >= self.cutoff] = self.threshold if not np.isfinite(imputed_y).all(): self.logger.critical("Imputed values are not finite, %s" % str(imputed_y)) return np.reshape(imputed_y, [imputed_y.shape[0], 1])
robust_pdf = torch.exp( norm.log_prob((x - mu) / std) - (torch.log(torch.Tensor([std])) + logZhat)) print(f"Robust pdf: {robust_pdf[0]}\nTheoretical pdf: {theoretical_pdf}") print("=============================") ######################################################## # Variances ######################################################## # set the upper limit VERY high to simulate infinite upper bound theoretical_variance = truncnorm.stats(clip_a, clip_b, loc=mu, scale=std, moments='v') print( f"Theoretical variance: {theoretical_variance}\nRobust variance: {sigmaHat.tolist()[0]}" ) print("=============================\n\n") theoretical_entropy = truncnorm.entropy(clip_a, clip_b, loc=mu, scale=std) print( f"Theoretical entropy: {theoretical_entropy}\nRobust entropy: {entropy.tolist()[0]}" ) print("=============================\n\n") #fig = plt.figure(figsize=(20,20)) #sns.distplot(samples, kde=False)
def impute(self, censored_X, censored_y, uncensored_X, uncensored_y): """ impute runs and returns imputed y values Parameters ---------- censored_X : array X matrix of censored data censored_y : array y matrix of censored data uncensored_X : array X matrix of uncensored data uncensored_y : array y matrix of uncensored data """ if censored_X.shape[0] == 0: self.logger.critical("Nothing to impute, return None") return None # first learn model without censored data self.model.train(uncensored_X, uncensored_y) self.logger.debug("Going to impute %d y-values with %s" % (censored_X.shape[0], str(self.model))) imputed_y = None # define this, if imputation fails # Define variables y = None it = 0 change = 0 while True: self.logger.debug("Iteration %d of %d" % (it, self.max_iter)) # predict censored y values y_mean, y_var = self.model.predict(censored_X) y_var[y_var < self.var_threshold] = self.var_threshold y_stdev = numpy.sqrt(y_var) imputed_y = \ [truncnorm.stats(a=(censored_y[index] - y_mean[index]) / y_stdev[index], b=(self.cutoff * 10 - y_mean[index]) / y_stdev[index], loc=y_mean[index], scale=y_stdev[index], moments='m') for index in range(len(censored_y))] imputed_y = numpy.array(imputed_y) if sum(numpy.isfinite(imputed_y) == False) > 0: # Replace all nans with threshold self.logger.debug("Going to replace %d nan-value(s) with " "threshold" % sum(numpy.isfinite(imputed_y) == False)) imputed_y[numpy.isfinite(imputed_y) == False] = self.threshold if it > 0: # Calc mean difference between imputed values this and last # iteration, assume imputed values are always concatenated # after uncensored values change = numpy.mean( abs(imputed_y - y[uncensored_y.shape[0]:]) / y[uncensored_y.shape[0]:]) # lower all values that are higher than threshold imputed_y[imputed_y >= self.threshold] = self.threshold self.logger.debug("Change: %f" % change) X = numpy.concatenate((uncensored_X, censored_X)) y = numpy.concatenate((uncensored_y, imputed_y)) if change > self.change_threshold or it == 0: self.model.train(X, y) else: break it += 1 if it > self.max_iter: break self.logger.debug("Imputation used %d/%d iterations, last_change=%f" % (it, self.max_iter, change)) imputed_y = numpy.array(imputed_y, dtype=numpy.float) imputed_y[imputed_y >= self.threshold] = self.threshold if not numpy.isfinite(imputed_y).all(): self.logger.critical("Imputed values are not finite, %s" % str(imputed_y)) return imputed_y
def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback: """Obtain batch logged bandit feedback. Parameters ---------- n_rounds: int Number of rounds for synthetic bandit feedback data. Returns --------- bandit_feedback: BanditFeedback Generated synthetic bandit feedback dataset. """ if not isinstance(n_rounds, int) or n_rounds <= 0: raise ValueError( f"n_rounds must be a positive integer, but {n_rounds} is given" ) context = self.random_.normal(size=(n_rounds, self.dim_context)) # sample actions for each round based on the behavior policy if self.behavior_policy_function is None: behavior_policy_ = np.tile(self.behavior_policy, (n_rounds, 1)) action = self.random_.choice(np.arange(self.n_actions), p=self.behavior_policy, size=n_rounds) else: behavior_policy_ = self.behavior_policy_function( context=context, action_context=self.action_context, random_state=self.random_state, ) action = np.array([ self.random_.choice( np.arange(self.n_actions), p=behavior_policy_[i], ) for i in np.arange(n_rounds) ]) pscore = behavior_policy_[np.arange(n_rounds), action] expected_reward_ = self.calc_expected_reward(context) reward = self.sample_reward_given_expected_reward( expected_reward_, action) if self.reward_type == "continuous": # correct expected_reward_, as we use truncated normal distribution here mean = expected_reward_ a = (self.reward_min - mean) / self.reward_std b = (self.reward_max - mean) / self.reward_std expected_reward_ = truncnorm.stats(a=a, b=b, loc=mean, scale=self.reward_std, moments="m") return dict( n_rounds=n_rounds, n_actions=self.n_actions, context=context, action_context=self.action_context, action=action, position=np.zeros(n_rounds, dtype=int), reward=reward, expected_reward=expected_reward_, pscore=pscore, )
def _em_step_body(Z_row, r_lower_row, r_upper_row, sigma, num_ord, num_ord_updates): """ The body of the em algorithm for each row Returns a new latent row, latent imputed row and C matrix, which, when added to the empirical covariance gives the expected covariance Args: Z_row (array): (potentially missing) latent entries for one data point r_lower_row (array): (potentially missing) lower range of ordinal entries for one data point r_upper_row (array): (potentially missing) upper range of ordinal entries for one data point sigma (matrix): estimate of covariance num_ord (int): the number of ordinal columns num_ord_updates (int): number of times to estimate ordinal mean Returns: C (matrix): results in the updated covariance when added to the empircal covariance Z_imp_row (array): Z_row with latent ordinals updated and missing entries imputed Z_row (array): inpute Z_row with latent ordinals updated """ p = Z_row.shape[0] Z_imp_row = np.copy(Z_row) C = np.zeros((p, p)) obs_indices = np.where(~np.isnan(Z_row))[0] missing_indices = np.where(np.isnan(Z_row))[0] ord_in_obs = np.where(obs_indices < num_ord)[0] ord_obs_indices = obs_indices[ord_in_obs] # obtain correlation sub-matrices # obtain submatrices by indexing a "cartesian-product" of index arrays sigma_obs_obs = sigma[np.ix_(obs_indices, obs_indices)] sigma_obs_missing = sigma[np.ix_(obs_indices, missing_indices)] sigma_missing_missing = sigma[np.ix_(missing_indices, missing_indices)] # precompute psuedo-inverse sigma_obs_obs_inv = np.linalg.pinv(sigma_obs_obs) # precompute sigma_obs_obs_inv * simga_obs_missing if len(missing_indices) > 0: J_obs_missing = np.matmul(sigma_obs_obs_inv, sigma_obs_missing) # initialize vector of variances for observed ordinal dimensions var_ordinal = np.zeros(p) # OBSERVED ORDINAL ELEMENTS # when there is an observed ordinal to be imputed and another observed dimension, impute this ordinal if len(obs_indices) >= 2 and len(ord_obs_indices) >= 1: for update_iter in range(num_ord_updates): # used to efficiently compute conditional mean sigma_obs_obs_inv_Z_row = np.matmul(sigma_obs_obs_inv, Z_row[obs_indices]) for j in ord_obs_indices: j_in_obs = np.where(obs_indices == j)[0] not_j_in_obs = np.where(obs_indices != j)[0] v = sigma_obs_obs_inv[:, j_in_obs] new_var_ij = np.asscalar(1.0 / v[j_in_obs]) new_mean_ij = Z_row[ j] - new_var_ij * sigma_obs_obs_inv_Z_row[j_in_obs] # the boundaries must be de-meaned and normalized mean, var = truncnorm.stats( a=(r_lower_row[j] - new_mean_ij) / np.sqrt(new_var_ij), b=(r_upper_row[j] - new_mean_ij) / np.sqrt(new_var_ij), loc=new_mean_ij, scale=np.sqrt(new_var_ij), moments='mv') if np.isfinite(var): var_ordinal[j] = var if update_iter == num_ord_updates - 1: # update the variance estimate C[j, j] = C[j, j] + var if np.isfinite(mean): Z_row[j] = mean Z_obs = Z_row[obs_indices] # mean expection and imputation Z_imp_row[obs_indices] = Z_obs # MISSING ELEMENTS if len(missing_indices) > 0: Z_imp_row[missing_indices] = np.matmul(J_obs_missing.T, Z_obs) # variance expectation and imputation if len(ord_obs_indices) >= 1 and len(obs_indices) >= 2 and np.sum( var_ordinal) > 0: diag_var_ord = np.diag(var_ordinal[ord_obs_indices]) cov_missing_obs_ord = np.matmul(J_obs_missing[ord_in_obs].T, diag_var_ord) C[np.ix_(missing_indices, ord_obs_indices)] += cov_missing_obs_ord C[np.ix_(ord_obs_indices, missing_indices)] += cov_missing_obs_ord.T C[np.ix_(missing_indices, missing_indices)] += sigma_missing_missing - np.matmul( J_obs_missing.T, sigma_obs_missing) + np.matmul( cov_missing_obs_ord, J_obs_missing[ord_in_obs]) else: C[np.ix_(missing_indices, missing_indices)] += sigma_missing_missing - np.matmul( J_obs_missing.T, sigma_obs_missing) return C, Z_imp_row, Z_row
raise Exception('BilinearTensorLayer must be called on a list of tensors ' '(at least 2). Got: ' + str(inputs)) e1 = inputs[0] e2 = inputs[1] batch_size = K.shape(e1)[0] k = self.output_dim # print([e1,e2]) feed_forward_product = K.dot(K.concatenate([e1,e2]), self.V) # print(feed_forward_product) bilinear_tensor_products = [ K.sum((e2 * K.dot(e1, self.W[0])) + self.b, axis=1) ] # print(bilinear_tensor_products) for i in range(k)[1:]: btp = K.sum((e2 * K.dot(e1, self.W[i])) + self.b, axis=1) bilinear_tensor_products.append(btp) result = K.tanh(K.reshape(K.concatenate(bilinear_tensor_products, axis=0), (batch_size, k)) + feed_forward_product) # print(result) return result def compute_output_shape(self, input_shape): # print (input_shape) batch_size = input_shape[0][0] return (batch_size, self.output_dim) if __name__ == '__main__': import matplotlib.pyplot as plt from scipy.stats import truncnorm fig,ax =plt.subplots(1,1) a,b =0.1,2.0 mean, var, skew, kurt = truncnorm.stats(a, b, moments='mvsk') print mean,var,skew,kurt
# -*- coding: utf-8 -*- """ Created on Fri Jan 10 14:22:02 2020 @author: Paul Vincent Nonat """ from scipy.stats import truncnorm import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) a, b = 0, np.inf mean, var, skew, kurt = truncnorm.stats(a, b, moments='mvsk') x = np.linspace(truncnorm.ppf(0, a, b), truncnorm.ppf(0.99, a, b), 100) ax.plot(x, truncnorm.pdf(x, a, b), 'r-', lw=5, alpha=1, label='truncnorm pdf') mean = 2 rv = truncnorm(a, b) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = truncnorm.ppf([0.1, 0.1, b], a, b) np.allclose([0.0001, 1.5, 2], truncnorm.cdf(vals, a, b)) r = truncnorm.rvs(a, b, size=1000) ax.hist(r, density=True, histtype='stepfilled', alpha=1) ax.legend(loc='best', frameon=False)
def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback: """Obtain batch logged bandit data. Parameters ---------- n_rounds: int Data size of the synthetic logged bandit data. Returns --------- bandit_feedback: BanditFeedback Synthesized logged bandit data. """ check_scalar(n_rounds, "n_rounds", int, min_val=1) contexts = self.random_.normal(size=(n_rounds, self.dim_context)) # calc expected reward given context and action expected_reward_ = self.calc_expected_reward(contexts) if RewardType(self.reward_type) == RewardType.CONTINUOUS: # correct expected_reward_, as we use truncated normal distribution here mean = expected_reward_ a = (self.reward_min - mean) / self.reward_std b = (self.reward_max - mean) / self.reward_std expected_reward_ = truncnorm.stats(a=a, b=b, loc=mean, scale=self.reward_std, moments="m") # calculate the action choice probabilities of the behavior policy if self.behavior_policy_function is None: pi_b_logits = expected_reward_ else: pi_b_logits = self.behavior_policy_function( context=contexts, action_context=self.action_context, random_state=self.random_state, ) # create some deficient actions based on the value of `n_deficient_actions` if self.n_deficient_actions > 0: pi_b = np.zeros_like(pi_b_logits) n_supported_actions = self.n_actions - self.n_deficient_actions supported_actions = np.argsort( self.random_.gumbel(size=(n_rounds, self.n_actions)), axis=1)[:, ::-1][:, :n_supported_actions] supported_actions_idx = ( np.tile(np.arange(n_rounds), (n_supported_actions, 1)).T, supported_actions, ) pi_b[supported_actions_idx] = softmax( self.beta * pi_b_logits[supported_actions_idx]) else: pi_b = softmax(self.beta * pi_b_logits) # sample actions for each round based on the behavior policy actions = sample_action_fast(pi_b, random_state=self.random_state) # sample rewards based on the context and action rewards = self.sample_reward_given_expected_reward( expected_reward_, actions) return dict( n_rounds=n_rounds, n_actions=self.n_actions, context=contexts, action_context=self.action_context, action=actions, position=None, # position effect is not considered in synthetic data reward=rewards, expected_reward=expected_reward_, pi_b=pi_b[:, :, np.newaxis], pscore=pi_b[np.arange(n_rounds), actions], )
def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback: """Obtain batch logged bandit feedback. Parameters ---------- n_rounds: int Number of rounds for synthetic bandit feedback data. Returns --------- bandit_feedback: BanditFeedback Generated synthetic bandit feedback dataset. """ assert n_rounds > 0 and isinstance( n_rounds, int ), f"n_rounds must be a positive integer, but {n_rounds} is given" context = self.random_.normal(size=(n_rounds, self.dim_context)) # sample actions for each round based on the behavior policy if self.behavior_policy_function is None: behavior_policy_ = np.tile(self.behavior_policy, (n_rounds, 1)) action = self.random_.choice(np.arange(self.n_actions), p=self.behavior_policy, size=n_rounds) else: behavior_policy_ = self.behavior_policy_function( context=context, action_context=self.action_context, random_state=self.random_state, ) action = np.array([ self.random_.choice( np.arange(self.n_actions), p=behavior_policy_[i], ) for i in np.arange(n_rounds) ]) pscore = behavior_policy_[np.arange(n_rounds), action] # sample reward for each round based on the reward function if self.reward_function is None: expected_reward_ = np.tile(self.expected_reward, (n_rounds, 1)) else: expected_reward_ = self.reward_function( context=context, action_context=self.action_context, random_state=self.random_state, ) expected_reward_factual = expected_reward_[np.arange(n_rounds), action] if self.reward_type == "binary": reward = self.random_.binomial(n=1, p=expected_reward_factual) elif self.reward_type == "continuous": min_, max_ = 0, 1e10 mean, std = expected_reward_factual, 1.0 a, b = (min_ - mean) / std, (max_ - mean) / std reward = truncnorm.rvs(a=a, b=b, loc=mean, scale=std, random_state=self.random_state) # correct expected_reward_, as we use truncated normal distribution here mean = expected_reward_ a, b = (min_ - mean) / std, (max_ - mean) / std expected_reward_ = truncnorm.stats(a=a, b=b, loc=mean, scale=std, moments="m") return dict( n_rounds=n_rounds, n_actions=self.n_actions, context=context, action_context=self.action_context, action=action, position=np.zeros(n_rounds, dtype=int), reward=reward, expected_reward=expected_reward_, pscore=pscore, )