def cumsum(v,strict=False): if not strict: return np.cumsum(v,axis=0) else: out = np.zeros_like(v) out[1:] = np.cumsum(v[:-1],axis=0) return out
def lagrangian(self, angles, omegas): y = np.cumsum(self.lengths * np.cos(angles)) x_dot = np.cumsum( self.lengths * np.cos(angles) * omegas) y_dot = np.cumsum(-self.lengths * np.sin(angles) * omegas) V = np.sum(y * self.masses) * self.g T = 0.5 * np.sum(self.masses * (x_dot**2 + y_dot**2)) return T - V
def transition_matrix(self): if self._transition_matrix is not None: return self._transition_matrix As, rs, ps = self.Ps, self.rs, self.ps # Fill in the transition matrix one block at a time K_total = self.total_num_states P = np.zeros((K_total, K_total)) starts = np.concatenate(([0], np.cumsum(rs)[:-1])) ends = np.cumsum(rs) for (i, j), Aij in np.ndenumerate(As): block = P[starts[i]:ends[i], starts[j]:ends[j]] # Diagonal blocks (stay in sub-state or advance to next sub-state) if i == j: for k in range(rs[i]): # p(z_{t+1} = (.,i+k) | z_t = (.,i)) = (1-p)^k p # for 0 <= k <= r - i block += (1 - ps[i])**k * ps[i] * np.diag(np.ones(rs[i]-k), k=k) # Off-diagonal blocks (exit to a new super state) else: # p(z_{t+1} = (j,1) | z_t = (k,i)) = (1-p_k)^{r_k-i+1} * A[k, j] block[:,0] = (1-ps[i]) ** np.arange(rs[i], 0, -1) * Aij assert np.allclose(P.sum(1),1) assert (0 <= P).all() and (P <= 1.).all() # Cache the transition matrix self._transition_matrix = P return P
def get_repaneled_airfoil(self, n_points_per_side=100): # Returns a repaneled version of the airfoil with cosine-spaced coordinates on the upper and lower surfaces. # Inputs: # # n_points_per_side is the number of points PER SIDE (upper and lower) of the airfoil. 100 is a good number. # Notes: The number of points defining the final airfoil will be n_points_per_side*2-1, # since one point (the leading edge point) is shared by both the upper and lower surfaces. upper_original_coors = self.upper_coordinates( ) # Note: includes leading edge point, be careful about duplicates lower_original_coors = self.lower_coordinates( ) # Note: includes leading edge point, be careful about duplicates # Find distances between coordinates, assuming linear interpolation upper_distances_between_points = np.sqrt( np.power( upper_original_coors[:-1, 0] - upper_original_coors[1:, 0], 2) + np.power( upper_original_coors[:-1, 1] - upper_original_coors[1:, 1], 2)) lower_distances_between_points = np.sqrt( np.power( lower_original_coors[:-1, 0] - lower_original_coors[1:, 0], 2) + np.power( lower_original_coors[:-1, 1] - lower_original_coors[1:, 1], 2)) upper_distances_from_TE = np.hstack( (0, np.cumsum(upper_distances_between_points))) lower_distances_from_LE = np.hstack( (0, np.cumsum(lower_distances_between_points))) upper_distances_from_TE_normalized = upper_distances_from_TE / upper_distances_from_TE[ -1] lower_distances_from_LE_normalized = lower_distances_from_LE / lower_distances_from_LE[ -1] # Generate a cosine-spaced list of points from 0 to 1 s = cosspace(n_points=n_points_per_side) x_upper_func = sp_interp.PchipInterpolator( x=upper_distances_from_TE_normalized, y=upper_original_coors[:, 0]) y_upper_func = sp_interp.PchipInterpolator( x=upper_distances_from_TE_normalized, y=upper_original_coors[:, 1]) x_lower_func = sp_interp.PchipInterpolator( x=lower_distances_from_LE_normalized, y=lower_original_coors[:, 0]) y_lower_func = sp_interp.PchipInterpolator( x=lower_distances_from_LE_normalized, y=lower_original_coors[:, 1]) x_coors = np.hstack((x_upper_func(s), x_lower_func(s)[1:])) y_coors = np.hstack((y_upper_func(s), y_lower_func(s)[1:])) coordinates = np.column_stack((x_coors, y_coors)) # Make a new airfoil with the coordinates name = self.name + ", repaneled to " + str(n_points_per_side) + " pts" new_airfoil = Airfoil(name=name, coordinates=coordinates, repanel=False) return new_airfoil
def project_simplex_bounded(r, lb, ub): assert lb.sum() <= 1 and ub.sum() >= 1 and np.all(lb <= ub), 'not feasible' lambdas = np.append(lb - r, ub - r) idx = np.argsort(lambdas) lambdas = lambdas[idx] active = np.cumsum((idx < r.size) * 2 - 1)[:-1] diffs = np.diff(lambdas, n=1) totals = lb.sum() + np.cumsum(active * diffs) i = np.searchsorted(totals, 1.0) lam = (1 - totals[i]) / active[i] + lambdas[i + 1] return np.clip(r + lam, lb, ub)
def flatten(value): """Flattens any nesting of tuples, arrays, or dicts. Returns 1D numpy array and an unflatten function. Doesn't preserve mixed numeric types (e.g. floats and ints). Assumes dict keys are sortable.""" if isinstance(getval(value), np.ndarray): shape = value.shape def unflatten(vector): return np.reshape(vector, shape) return np.ravel(value), unflatten elif isinstance(getval(value), (float, int)): return np.array([value]), lambda x: x[0] elif isinstance(getval(value), (tuple, list)): constructor = type(getval(value)) if not value: return np.array([]), lambda x: constructor() flat_pieces, unflatteners = zip(*map(flatten, value)) split_indices = np.cumsum([len(vec) for vec in flat_pieces[:-1]]) def unflatten(vector): pieces = np.split(vector, split_indices) return constructor( unflatten(v) for unflatten, v in zip(unflatteners, pieces)) return np.concatenate(flat_pieces), unflatten elif isinstance(getval(value), dict): items = sorted(iteritems(value), key=itemgetter(0)) keys, flat_pieces, unflatteners = zip(*[(k, ) + flatten(v) for k, v in items]) split_indices = np.cumsum([len(vec) for vec in flat_pieces[:-1]]) def unflatten(vector): pieces = np.split(vector, split_indices) return { key: unflattener(piece) for piece, unflattener, key in zip(pieces, unflatteners, keys) } return np.concatenate(flat_pieces), unflatten else: raise Exception("Don't know how to flatten type {}".format( type(value)))
def simple_five_pop_demo(x=np.random.normal(size=30)): assert len(x) == 30 # make all params positive x = np.exp(x) # # allow negative growth rates # for i in range(15,20): # x[i] = np.log(x[i]) # # make times increasing # for i in range(1,15): # x[i] = x[i] + x[i-1] t = np.cumsum(x[:15]) # allow negative growth rates g = np.log(x[15:20]) model = momi.DemographicModel(1.0, .25) for pop in range(1, 6): model.add_leaf(pop) model.set_size(5, t[0], g=g[0]) model.set_size(4, t[1], g=g[1]) model.set_size(3, t[2], g=g[2]) model.set_size(2, t[3], g=g[3]) model.set_size(1, t[4], g=g[4]) model.move_lineages(5, 4, t=t[5], N=x[20]) model.set_size(3, t=t[6], N=x[21]) model.set_size(2, t=t[7], N=x[22]) model.set_size(1, t[8], N=x[23]) model.move_lineages(4, 3, t[9], N=x[24]) model.set_size(2, t[10], N=x[25]) model.set_size(1, t[11], N=x[26]) model.move_lineages(3, 2, t[12], N=x[27]) model.set_size(1, t[13], N=x[28]) model.move_lineages(2, 1, t[14], N=x[29]) return model
def Fit(self, X, Y, **kwargs): self.cov = np.cov(Y.T) if not self.cov.shape: # you could be spllied with a 1 feature data set, in which cas self.cov is just a number self.eigval = self.cov self.eigvec = np.eye(1) self.cov = self.cov.reshape(-1, 1) else: self.eigval, self.eigvec = np.linalg.eigh(self.cov) idx = self.eigval.argsort()[::-1] self.eigval = self.eigval[idx] self.eigvec = self.eigvec[:, idx] if self.percentage is not None: total_val = sum(self.eigval) running_fraction = np.cumsum(self.eigval) / total_val self.component = np.searchsorted(running_fraction, self.percentage) if self.component == 0: self.component = 1 assert (self.component <= Y.shape[1] ), "number of components cannot exceed number of variables" self.reconstruction_error = np.sum( self.eigval[self.component:]) / self.cov.shape[0] if self.reconstruction_error is None or np.isnan( self.reconstruction_error): self.reconstruction_error = 0 self.eigval = self.eigval[0:self.component] self.eigvec = self.eigvec[:, 0:self.component]
def _initialize_with_pca(self, datas, inputs=None, masks=None, tags=None, num_iters=20): for data in datas: assert data.shape[1] == self.N N_offsets = np.cumsum(self.N_vec)[:-1] pcas = [] split_datas = list( zip(*[np.split(data, N_offsets, axis=1) for data in datas])) split_masks = list( zip(*[np.split(mask, N_offsets, axis=1) for mask in masks])) assert len(split_masks) == len(split_datas) == self.P for em, dps, mps in zip(self.emissions_models, split_datas, split_masks): pcas.append(em._initialize_with_pca(dps, inputs, mps, tags)) # Combine the PCA objects from sklearn.decomposition import PCA pca = PCA(self.D) pca.components_ = block_diag(*[p.components_ for p in pcas]) pca.mean_ = np.concatenate([p.mean_ for p in pcas]) # Not super pleased with this, but it should work... pca.noise_variance_ = np.concatenate( [p.noise_variance_ * np.ones(n) for p, n in zip(pcas, self.N_vec)]) return pca
def pca_with_imputation(D, datas, masks, num_iters=20): if isinstance(datas, (list, tuple)) and isinstance(masks, (list, tuple)): data = np.concatenate(datas) mask = np.concatenate(masks) if np.any(~mask): # Fill in missing data with mean to start fulldata = data.copy() for n in range(fulldata.shape[1]): fulldata[~mask[:, n], n] = fulldata[mask[:, n], n].mean() for itr in range(num_iters): # Run PCA on imputed data pca = PCA(D) x = pca.fit_transform(fulldata) # Fill in missing data with PCA predictions pred = pca.inverse_transform(x) fulldata[~mask] = pred[~mask] else: pca = PCA(D) x = pca.fit_transform(data) # Unpack xs xs = np.split(x, np.cumsum([len(data) for data in datas])[:-1]) assert len(xs) == len(datas) assert all([x.shape[0] == data.shape[0] for x, data in zip(xs, datas)]) return pca, xs
def render(self): low, high = 0.1, 0.9 angles, _ = self.state canvas = np.zeros((self.num_pix, self.num_pix)) + low radius = np.sum(self.lengths) joint_coords_x = np.cumsum(self.lengths * np.sin(angles)) / radius / 1.2 joint_coords_y = np.cumsum(self.lengths * np.cos(angles)) / radius / 1.2 joint_coords_x = np.concatenate((np.zeros(1), joint_coords_x)) joint_coords_y = np.concatenate((np.zeros(1), joint_coords_y)) joint_coords = np.concatenate((joint_coords_x[:, None], joint_coords_y[:, None]), axis=1) canvas_coords = array_meshgrid(self.num_pix) for point_A, point_B in zip(joint_coords[:-1], joint_coords[1:]): D = distance_to_segment(point_A, point_B, canvas_coords) canvas = np.maximum(canvas, high * np.exp(-((D/self.width)*20)**4)) return canvas
def inv_cdf_sampler(target, n=1, bounds=(-10, 10, 1000)): """ random variable sampler using the interpolated inverse cdf method Args: n (int) : number of samples. must be either a positive integer or None. if n is a positive int, rvs returns an np.array of length n if n is None, rvs returns a scalar sample from the distribution bounds (tuple or list) : (lower bound, upper bound, number of ticks) [-10, 10, 10000] / (-10, 10, 10000) create 10000 ticks between -10 and 10 Return: float or np.array([float]) of samples """ ll = np.linspace(*bounds) cdf_data = np.cumsum(target(ll))*(ll[1]-ll[0]) cdf_data /=cdf_data[-1] cdf_inv = sp.interpolate.interp1d(cdf_data, ll) return cdf_inv(np.random.uniform(size=n))
def unflatten(vector): split_ixs = np.cumsum(lengths) pieces = np.split(vector, split_ixs) return {key: unflattener(piece) for piece, unflattener, key in zip(pieces, unflatteners, keys)}
def initialize(self, x, u, **kwargs): localize = kwargs.get('localize', True) Ts = [_x.shape[0] for _x in x] if localize: from sklearn.cluster import KMeans km = KMeans(self.nb_states, random_state=1) km.fit((np.vstack(x))) zs = np.split(km.labels_, np.cumsum(Ts)[:-1]) zs = [z[:-1] for z in zs] else: zs = [npr.choice(self.nb_states, size=T - 1) for T in Ts] _cov = np.zeros((self.nb_states, self.dm_obs, self.dm_obs)) for k in range(self.nb_states): ts = [np.where(z == k)[0] for z in zs] xs = [ np.hstack((_x[t, :], _u[t, :])) for t, _x, _u in zip(ts, x, u) ] ys = [_x[t + 1, :] for t, _x in zip(ts, x)] coef_, intercept_, sigma = linear_regression(xs, ys) self.A[k, ...] = coef_[:, :self.dm_obs] self.B[k, ...] = coef_[:, self.dm_obs:] self.c[k, :] = intercept_ _cov[k, ...] = sigma self.cov = _cov
def get_downsampled_mcl(self, mcl_fractions): # Returns the mean camber line in downsampled form mcl = self.mcl_coordinates # Find distances along mcl, assuming linear interpolation mcl_distances_between_points = np.sqrt( np.power(mcl[:-1, 0] - mcl[1:, 0], 2) + np.power(mcl[:-1, 1] - mcl[1:, 1], 2) ) mcl_distances_cumulative = np.hstack((0, np.cumsum(mcl_distances_between_points))) mcl_distances_cumulative_normalized = mcl_distances_cumulative / mcl_distances_cumulative[-1] mcl_downsampled_x=np.interp( x=mcl_fractions, xp=mcl_distances_cumulative_normalized, fp=mcl[:,0] ) mcl_downsampled_y = np.interp( x=mcl_fractions, xp=mcl_distances_cumulative_normalized, fp=mcl[:, 1] ) mcl_downsampled = np.column_stack((mcl_downsampled_x, mcl_downsampled_y)) return mcl_downsampled
def forward(self, x, input, tag): assert x.shape[1] == self.D D_offsets = np.cumsum(self.D_vec)[:-1] datas = [] for em, xp in zip(self.emissions_models, np.split(x, D_offsets, axis=1)): datas.append(em.forward(xp, input, tag)) return np.concatenate(datas, axis=2)
def main(argv): del argv # Unused. x_scale = 0.1 y_scale = 1. T = 50 x_list = np.cumsum(x_scale * np.random.randn(T)) y_list = np.array([x_list[t] + y_scale * np.random.randn() for t in range(T)]) marginal = make_marginal_fn() marginal_grad = grad(lambda y_list, scales: marginal(y_list, *scales), 1) x_scale_est = 0.1 y_scale_est = 1. step_size = 0.5 / T for i in range(100): t0 = time.time() x_scale_grad, y_scale_grad = marginal_grad( y_list, (x_scale_est, y_scale_est)) x_scale_est *= np.exp(step_size * x_scale_est * x_scale_grad) y_scale_est *= np.exp(step_size * y_scale_est * y_scale_grad) print('{}\t{}\t{}\t{}\t{}'.format( time.time() - t0, i, marginal(y_list, x_scale_est, y_scale_est), x_scale_est, y_scale_est))
def initialize(self, datas, inputs=None, masks=None, tags=None, init_method="random"): Ts = [data.shape[0] for data in datas] # Get initial discrete states if init_method.lower() == 'kmeans': # KMeans clustering from sklearn.cluster import KMeans km = KMeans(self.K) km.fit(np.vstack(datas)) zs = np.split(km.labels_, np.cumsum(Ts)[:-1]) elif init_method.lower() == 'random': # Random assignment zs = [npr.choice(self.K, size=T) for T in Ts] else: raise Exception( 'Not an accepted initialization type: {}'.format(init_method)) # Make a one-hot encoding of z and treat it as HMM expectations Ezs = [one_hot(z, self.K) for z in zs] expectations = [(Ez, None, None) for Ez in Ezs] # Set the variances all at once to use the setter self.m_step(expectations, datas, inputs, masks, tags)
def to_diffable_arr(proba_KV, min_eps=MIN_EPS, do_force_safe=False): ''' Transform normalized topics to unconstrained space. Args ---- proba_KV : 2D array, size K x V minimum value of any entry must be min_eps each row should sum to 1.0 Returns ------- reals_KVm1 : 2D array, size K x (V-1) unconstrained real values Examples -------- >>> np.set_printoptions(precision=3) >>> V = 4 >>> unif_1V = np.ones((1,V)) / float(V) >>> to_diffable_arr(unif_1V) array([[ 2.22e-16, -1.11e-16, 0.00e+00]]) >>> rand_1V = np.asarray([[ 0.11, 0.22, 0.33, 0.20, 0.14 ]]) >>> to_diffable_arr(rand_1V) array([[-0.704, -0.015, 0.663, 0.357]]) ''' assert proba_KV.ndim == 2 K, V = proba_KV.shape offset_Vm1 = -1.0 * np.log(V - np.arange(1.0, V)) cumsum_KV1m = np.maximum(1e-100, 1.0 - np.cumsum(proba_KV[:, :-1], axis=1)) fracs_KV = np.hstack([proba_KV[:, :1], proba_KV[:, 1:] / cumsum_KV1m]) reals_KVm1 = (inv_logistic_sigmoid(fracs_KV[:, :-1]) - offset_Vm1) return reals_KVm1
def _simplex_projection(x): u = np.sort(x)[::-1] idcs = np.arange(1, u.shape[0] + 1) rho_nz = u + 1. / idcs * (1. - np.cumsum(u)) > 0 rho = idcs[rho_nz].max() lmb = 1. / rho * (1. - u[:rho].sum()) out = np.maximum(x + lmb, 0.) return out / out.sum()
def get_e_log_cluster_probabilities_from_e_log_stick(e_log_v, e_log_1mv): zeros_shape = e_log_v.shape[0:-1] + (1,) e_log_stick_remain = np.concatenate([np.zeros(zeros_shape), \ np.cumsum(e_log_1mv, axis = -1)], axis = -1) e_log_new_stick = np.concatenate((e_log_v, np.zeros(zeros_shape)), axis = -1) return (e_log_stick_remain + e_log_new_stick).squeeze()
def rank_by_variance(X, q, var_percentage=0.8): if q is not None: return q [U, Σ, V] = np.linalg.svd(X, full_matrices=False) rank_sorted = np.cumsum(Σ) / np.sum(Σ) rank = np.sum(rank_sorted < var_percentage) + 1 return rank
def sample(self, n_samples=2000, observed_states=None, random_state=None): """Generate random samples from the self. Parameters ---------- n : int Number of samples to generate. observed_states : array If provided, states are not sampled. random_state: RandomState or an int seed A random number generator instance. If None is given, the object's random_state is used Returns ------- samples : array_like, length (``n_samples``) List of samples states : array_like, shape (``n_samples``) List of hidden states (accounting for tied states by giving them the same index) """ if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) samples = np.zeros(n_samples) states = np.zeros(n_samples) if observed_states is None: startprob_pdf = np.exp(np.copy(self._log_startprob)) startdist = stats.rv_discrete(name='custm', values=(np.arange(startprob_pdf.shape[0]), startprob_pdf), seed=random_state) states[0] = startdist.rvs(size=1)[0] transmat_pdf = np.exp(np.copy(self._log_transmat)) transmat_cdf = np.cumsum(transmat_pdf, 1) nrand = random_state.rand(n_samples) for idx in range(1,n_samples): newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax() states[idx] = newstate else: states = observed_states mu = np.copy(self._mu_) precision = np.copy(self._precision_) for idx in range(n_samples): mean_ = self._mu_[states[idx]] var_ = np.sqrt(1/precision[states[idx]]) samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1, random_state=random_state) states = self._process_sequence(states) return samples, states
def stochastic_iterate_averaging(estimate, start): N = estimate.shape[0] if N - start <= 0: raise "Start of stationary distribution must be lower than number of iterates" window_lengths = np.reshape(np.arange(start, N) - start + 1, [-1, 1]) estimate_iters = np.cumsum(estimate[start:, :], axis=0) / window_lengths estimate_mean = estimate_iters[-1] return (estimate_iters, estimate_mean)
def _invert(self, data, input, mask, tag): assert data.shape[1] == self.N N_offsets = np.cumsum(self.N_vec)[:-1] states = [] for em, dp, mp in zip(self.emissions_models, np.split(data, N_offsets, axis=1), np.split(mask, N_offsets, axis=1)): states.append(em._invert(dp, input, mp, tag)) return np.column_stack(states)
def mpi_split(work_size, comm_size): base = work_size // comm_size leftover = int(work_size % comm_size) sizes = numpy.ones(comm_size, dtype=int) * base sizes[:leftover] += 1 offsets = numpy.zeros(comm_size, dtype=int) offsets[1:] = numpy.cumsum(sizes)[:-1] return sizes, offsets
def stick_forward(x_): x = x_.T # reverse cumsum x0 = x[:-1] s = np.cumsum(x0[::-1], 0)[::-1] + x[-1] z = x0 / s Km1 = x.shape[0] - 1 k = np.arange(Km1)[(slice(None), ) + (None, ) * (x.ndim - 1)] eq_share = logit(1. / (Km1 + 1 - k)) # - np.log(Km1 - k) y = logit(z) - eq_share return y.T
def resampling(w, rs): """ Stratified resampling with "nograd_primitive" to ensure autograd takes no derivatives through it. """ N = w.shape[0] bins = np.cumsum(w) ind = np.arange(N) u = (ind + rs.rand(N)) / N return np.digitize(u, bins)
def flatten(value): """Flattens any nesting of tuples, arrays, or dicts. Returns 1D numpy array and an unflatten function. Doesn't preserve mixed numeric types (e.g. floats and ints). Assumes dict keys are sortable.""" if isinstance(getval(value), np.ndarray): shape = value.shape def unflatten(vector): return np.reshape(vector, shape) return np.ravel(value), unflatten elif isinstance(getval(value), (float, int)): return np.array([value]), lambda x : x[0] elif isinstance(getval(value), (tuple, list)): constructor = type(getval(value)) if not value: return np.array([]), lambda x : constructor() flat_pieces, unflatteners = zip(*map(flatten, value)) split_indices = np.cumsum([len(vec) for vec in flat_pieces[:-1]]) def unflatten(vector): pieces = np.split(vector, split_indices) return constructor(unflatten(v) for unflatten, v in zip(unflatteners, pieces)) return np.concatenate(flat_pieces), unflatten elif isinstance(getval(value), dict): items = sorted(iteritems(value), key=itemgetter(0)) keys, flat_pieces, unflatteners = zip(*[(k,) + flatten(v) for k, v in items]) split_indices = np.cumsum([len(vec) for vec in flat_pieces[:-1]]) def unflatten(vector): pieces = np.split(vector, split_indices) return {key: unflattener(piece) for piece, unflattener, key in zip(pieces, unflatteners, keys)} return np.concatenate(flat_pieces), unflatten else: raise Exception("Don't know how to flatten type {}".format(type(value)))
def get_e_num_large_clusters_from_ez(e_z, threshold = 0, n_samples = None, unif_samples = None): """ Computes the expected number of clusters with at least t observations from cluster belongings e_z. Parameters ---------- e_z : ndarray Array whose (n, k)th entry is the probability of the nth datapoint belonging to cluster k n_obs : int Number of observations in a dataset. n_samples : int Number of Monte Carlo samples used to compute the expected number of clusters. unv_norm_samples : ndarray, optional The user may pass in a precomputed array of uniform random variables on which the reparameterization trick is applied to compute the expected number of clusters. Returns ------- float The expected number of clusters with at least ``threshold`` observations in a dataset the same size as e_z """ n_obs = e_z.shape[0] n_clusters = e_z.shape[1] # draw uniform samples if unif_samples is None: assert n_samples is not None unif_samples = np.random.random((n_obs, n_samples)) else: assert unif_samples is not None assert unif_samples.shape[0] == n_obs n_samples = unif_samples.shape[1] e_z_cumsum = np.cumsum(e_z, axis = 1) num_heavy_clusters_vec = np.zeros(n_samples) # z_sample is a n_obs x n_samples matrix of cluster belongings z_sample = _get_clusters_from_ez_and_unif_samples(e_z_cumsum, unif_samples) for i in range(n_clusters): # get number of clusters with at least enough points above the threshold num_heavy_clusters_vec += np.sum(z_sample == i, axis = 0) > threshold return np.mean(num_heavy_clusters_vec), np.var(num_heavy_clusters_vec)
def tau_update(e_z, alpha): k_approx = np.shape(e_z)[1] sum_e_z = np.sum(e_z, axis = 0) sum_e_z_upper = np.cumsum(sum_e_z[::-1])[::-1] #cum_sum_z = np.concatenate(([0.0], np.cumsum(sum_e_z)[:-2])) tau_update = np.zeros((k_approx - 1, 2)) tau_update[:, 0] = sum_e_z[:-1] + 1 tau_update[:, 1] = alpha + sum_e_z_upper[1:] return tau_update
def simple_admixture_demo(x=np.random.normal(size=7)): t = np.cumsum(np.exp(x[:5])) p = 1.0 / (1.0 + np.exp(x[5:])) ret = momi.DemographicModel(1., .25) ret.add_leaf("b") ret.add_leaf("a") ret.move_lineages("a", 2, t[1], p=1. - p[1]) ret.move_lineages("a", 3, t[0], p=1. - p[0]) ret.move_lineages(2, 3, t[2]) ret.move_lineages(3, "b", t[3]) ret.move_lineages("a", "b", t[4]) return ret
def inds_to_effect_change(leverage, desired_delta): # Argsort sorts low to high. # We are removing points, so multiply by -1. sort_inds = np.argsort(leverage * np.sign(desired_delta)) deltas = -1 * np.cumsum(leverage[sort_inds]) change_sign_inds = np.argwhere( np.sign(desired_delta) * (desired_delta - deltas) <= 0.) if len(change_sign_inds) > 0: first_ind_change_sign = np.min(change_sign_inds) remove_inds = sort_inds[:(first_ind_change_sign + 1)] return remove_inds else: return None
def projectSimplex(mat): """ project each row vector to the simplex """ nPoints, nVars = mat.shape mu = np.fliplr(np.sort(mat, axis=1)) sum_hist = np.cumsum(mu, axis=1) flag = (mu - 1./np.tile(np.arange(1,nVars+1),(nPoints,1))*(sum_hist-1) > 0) f_flag = lambda flagPoint: len(flagPoint) - 1 - \ flagPoint[::-1].argmax() lastTrue = map(f_flag, flag) sm_row = sum_hist[np.arange(nPoints), lastTrue] theta = (sm_row - 1)*1./(np.array(lastTrue)+1.) w = np.maximum(mat - np.tile(theta, (nVars,1)).T, 0.) return w
def projectSimplex_vec(v): """ project vector v onto the probability simplex Parameter --------- v: shape(nVars,) input vector Returns ------- w: shape(nVars,) projection of v onto the probability simplex """ nVars = v.shape[0] mu = np.sort(v,kind='quicksort')[::-1] sm_hist = np.cumsum(mu) flag = (mu - 1./np.arange(1,nVars+1)*(sm_hist-1) > 0) lastTrue = len(flag) - 1 - flag[::-1].argmax() sm_row = sm_hist[lastTrue] theta = 1./(lastTrue+1) * (sm_row - 1) w = np.maximum(v-theta, 0.) return w
def fun(x): return to_scalar(np.cumsum(x)) d_fun = lambda x : to_scalar(grad(fun)(x))
def moving_average(a, n=10) : ret = np.cumsum(a, dtype=float) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1:] / n
def unpack_all_params(all_params): all_layer_params = np.array_split(all_params,np.cumsum(num_params_each_layer)) return all_layer_params
def unpack_layer_params(params): gp_params = np.array_split(params, np.cumsum(num_params_each_output)) return gp_params
def sample(self, n_samples=2000, observed_states=None, init_samples=None, init_state=None, random_state=None): """Generate random samples from the self. Parameters ---------- n : int Number of samples to generate. observed_states : array If provided, states are not sampled. random_state: RandomState or an int seed A random number generator instance. If None is given, the object's random_state is used init_state : int If provided, initial state is not sampled. init_samples : array, default: None If provided, initial samples (for AR) are not sampled. E : array-like, shape (n_samples, n_inputs) Feature matrix of individual inputs. Returns ------- samples : array_like, length (``n_samples``) List of samples states : array_like, shape (``n_samples``) List of hidden states (accounting for tied states by giving them the same index) """ if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) samples = np.zeros(n_samples) states = np.zeros(n_samples) order = self.n_lags if init_state is None: startprob_pdf = np.exp(np.copy(self._log_startprob)) start_dist = stats.rv_discrete(name='custm', values=(np.arange(startprob_pdf.shape[0]), startprob_pdf), seed=random_state) start_state = start_dist.rvs(size=1)[0] else: start_state = init_state if self.n_lags > 0: if init_samples is None: """ n_init_samples = order + 10 noise = np.sqrt(1.0/self._precision_[start_state]) * \ random_state.randn(n_init_samples) pad_after = n_init_samples - order - 1 col = np.pad(1*self._alpha_[start_state, :], (1, pad_after), mode='constant') row = np.zeros(n_init_samples) col[0] = row[0] = 1 A = toeplitz(col, row) init_samples = np.dot(pinv(A), noise + self._mu_[start_state]) # TODO: fix bug with n_lags > 1, blows up """ init_samples = 0.01*np.ones((self.n_lags, self.n_features)) # temporary fix if observed_states is None: transmat_pdf = np.exp(np.copy(self._log_transmat)) transmat_cdf = np.cumsum(transmat_pdf, 1) states[0] = (transmat_cdf[start_state] > random_state.rand()).argmax() transmat_pdf = np.exp(self._log_transmat) transmat_cdf = np.cumsum(transmat_pdf, 1) nrand = random_state.rand(n_samples) for idx in range(1,n_samples): newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax() states[idx] = newstate else: states = observed_states precision = np.copy(self._precision_) for idx in range(n_samples): state_ = int(states[idx]) var_ = np.sqrt(1/precision[state_]) if self.n_lags == 0: mean_ = np.copy(self._mu_[state_]) else: mean_ = np.copy(self._mu_[state_]) for lag in range(1, order+1): if idx < lag: prev_ = init_samples[len(init_samples)-lag] else: prev_ = samples[idx-lag] mean_ += np.copy(self._alpha_[state_, lag-1])*prev_ samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1, random_state=random_state) states = self._process_sequence(states) return samples, states
def sample(self, n_samples=2000, observed_states=None, init_samples=None, init_state=None, random_state=None): """Generate random samples from the self. Parameters ---------- n : int Number of samples to generate. observed_states : array If provided, states are not sampled. random_state: RandomState or an int seed A random number generator instance. If None is given, the object's random_state is used init_state : int If provided, initial state is not sampled. init_samples : array, default: None If provided, initial samples (for AR) are not sampled. E : array-like, shape (n_samples, n_inputs) Feature matrix of individual inputs. Returns ------- samples : array_like, length (``n_samples``, ``n_features``) List of samples states : array_like, shape (``n_samples``) List of hidden states (accounting for tied states by giving them the same index) """ if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) samples = np.zeros((n_samples, self.n_features)) states = np.zeros(n_samples) order = self.n_lags if init_state is None: startprob_pdf = np.exp(np.copy(self._log_startprob)) start_dist = stats.rv_discrete(name='custm', values=(np.arange(startprob_pdf.shape[0]), startprob_pdf), seed=random_state) start_state = start_dist.rvs(size=1)[0] else: start_state = init_state if self.n_lags > 0: if init_samples is None: init_samples = 0.01*np.ones((self.n_lags, self.n_features)) # TODO: better init if observed_states is None: transmat_pdf = np.exp(np.copy(self._log_transmat)) transmat_cdf = np.cumsum(transmat_pdf, 1) states[0] = (transmat_cdf[start_state] > random_state.rand()).argmax() transmat_pdf = np.exp(self._log_transmat) transmat_cdf = np.cumsum(transmat_pdf, 1) nrand = random_state.rand(n_samples) for idx in range(1,n_samples): newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax() states[idx] = newstate else: states = observed_states precision = np.copy(self._precision_) for idx in range(n_samples): state_ = int(states[idx]) covar_ = np.linalg.inv(precision[state_]) if self.n_lags == 0: mean_ = np.copy(self._mu_[state_]) else: mean_ = np.copy(self._mu_[state_]) for lag in range(1, order+1): if idx < lag: prev_ = init_samples[len(init_samples)-lag] else: prev_ = samples[idx-lag] mean_ += np.copy(self._alpha_[state_, lag-1])*prev_ samples[idx] = self.multivariate_t_rvs(mean_, covar_, random_state) states = self._process_sequence(states) return samples, states