def alt_least_squares_prox_iter(A, first, size, fixed_vecs, factors, lambda_, Lk, x_start): num_fixed = fixed_vecs.shape[0] YTY = fixed_vecs.T.dot(fixed_vecs) eye = np.eye(num_fixed) lambda_eye = lambda_ * np.eye(factors) half_L_eye = 0.5 * Lk * np.eye(factors) solve_vecs = np.zeros((size, factors)) x0 = x_start.reshape((-1, factors)) for i in range(size): if first: counts_i = A[i] else: counts_i = A[:, i].T CuI = np.eye(counts_i.shape[0]) np.fill_diagonal(CuI, counts_i) pu = counts_i.copy() pu[np.where(pu != 0)] = 1.0 YTCuIY = fixed_vecs.T.dot(CuI).dot(fixed_vecs) YTCupu = fixed_vecs.T.dot(CuI + eye).dot(pu.T) xu = spsolve(YTY + YTCuIY + lambda_eye + half_L_eye, YTCupu + 0.5 * Lk * x0[i]) solve_vecs[i] = xu return solve_vecs
def rbk_relative_σ(db, X, Y=None): #This should take 2 values D = sklearn.metrics.pairwise.pairwise_distances(X, metric='euclidean', n_jobs=1) K = np.exp(-(D * D * db['Σ']) / 2.0) np.fill_diagonal(K, 0) return K
def distance_to_weights(D): """Compute the weight matrix W from the distance matrix D. The weight matrix corresponding to a distance matrix `D = [d_ij]` is given by `W = [w_ij]` with .. :math:`w_{ij} = \frac{1}{\sqrt{1 - cos^2(d_ij)}}`. Since this is undefined when `d_ij = 0`, so we set the diagonal entries of `W` to 1. Parameters ---------- D : ndarray (n,n) Distance matrix. Must be square and contain no off-diagonal zeros. Returns ------- W : ndarray (n,n) Weights matrix. """ # TODO: no longer identical to pmds version. This version should # always be used. W_inv = (1 - np.cos(D)**2) W = np.sqrt((W_inv + np.eye(D.shape[0]))**-1) np.fill_diagonal(W, 1) return W
def hess_lnpost(ws,fdensity,alpha,sig): print('hess') #print(ws); mo = np.exp(-4.); #hval = hfunc(ws); ws = ws.reshape((n_grid,n_grid)); #calc l1 lsis = np.array([-1*np.sum(psi(index)**2)/sig_noise**2 for (index,w) in np.ndenumerate(ws)]); lsis = lsis.reshape((n_grid,n_grid)); l1 = lsis#*np.sum((Psi(ws)-data)/2/sig_noise**2); xsi = (1.-fdensity ) * gaussian(np.log(ws),loc=np.log(mo), scale=sig)/ws + fdensity*(ws**alpha /w_norm) dxsi = -1*gaussian(np.log(ws),loc=np.log(mo), scale=sig)*(1.-fdensity)/ws**2 - (1.-fdensity)*np.log(ws/mo)*np.exp(-np.log(ws/mo)**2 /2/sig**2)/np.sqrt(2*np.pi)/ws**2 /sig**3 + fdensity*alpha*ws**(alpha-1) /w_norm; dxsi_st = -1*gaussian(np.log(ws),loc=np.log(mo), scale=sig)*(1.-fdensity)/ws**2 - (1.-fdensity)*np.log(ws/mo)*np.exp(-np.log(ws/mo)**2 /2/sig**2)/np.sqrt(2*np.pi)/ws**2 /sig**3; ddxsi_st = -1*dxsi_st/ws - dxsi_st*np.log(ws/mo)/ws /sig**2 -(1.-fdensity)*(1/np.sqrt(2*np.pi)/sig)*np.exp(-np.log(ws/mo)**2 /2/sig**2)*(1/sig**2 - np.log(ws/mo)/sig**2 -1)/ ws**3; ddxsi = ddxsi_st + fdensity*alpha*(alpha-1)*ws**(alpha-2) /w_norm ; l2 = -1*(dxsi/xsi)**2 + ddxsi/np.absolute(xsi); l_tot = l1+l2; #those are the diagonal terms, now need to build off diagonal hess_m = np.zeros((n_grid**2,n_grid**2)); np.fill_diagonal(hess_m,l_tot); ''' for i in range(0,n_grid**2): for j in range(i+1,n_grid**2): ind1 = (int(i/n_grid),i%n_grid); ind2 = (int(j/n_grid),j%n_grid); hess_m[i,j] = -1*np.sum(psi(ind1)*psi(ind2))/sig_noise**2; hess_m = symmetrize(hess_m); ''' print('hess fin'); #print(l_tot); #print('new it'); #print(np.average(hval[0][:][:]-hess_m)); return -1*hess_m;
def corrmat_from_vec(v): """ Convert a vector of correlations to a matrix. Elements of v are read out row-wise. """ C = vec_to_U(v) C = C + C.T # symmetrize np.fill_diagonal(C, 1) return C
def _init_params(self, data, lengths=None, params='stmp'): X = data['obs'] if 's' in params: self.startprob_.fill(1.0 / self.n_components) if 't' in params or 'm' in params or 'p' in params: kmmod = cluster.KMeans(n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ if 't' in params: # TODO: estimate transitions from data (!) / consider n_tied=1 if self.n_tied == 0: transmat = np.ones([self.n_components, self.n_components]) np.fill_diagonal(transmat, 10.0) self.transmat_ = transmat # .90 for self-transition else: transmat = np.zeros((self.n_components, self.n_components)) transmat[range(self.n_components), range(self.n_components)] = 100.0 # diagonal transmat[range(self.n_components - 1), range(1, self.n_components)] = 1.0 # diagonal + 1 transmat[[ r * (self.n_chain) - 1 for r in range(1, self.n_unique + 1) for c in range(self.n_unique - 1) ], [ c * (self.n_chain) for r in range(self.n_unique) for c in range(self.n_unique) if c != r ]] = 1.0 self.transmat_ = np.copy(transmat) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): mu_init[u][f] = kmeans[u, f] self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = np.zeros( (self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = np.linalg.inv( np.cov(X[kmmod.labels_ == u], bias=1)) else: precision_init[u] = np.linalg.inv( np.cov(np.transpose(X[kmmod.labels_ == u]))) self.precision_ = np.copy(precision_init)
def cost(X): Y = np.dot(X, X.T) # Shift the exponentials by the maximum value to reduce numerical # trouble due to possible overflows. s = np.triu(Y, 1).max() expY = np.exp((Y - s) / epsilon) # Zero out the diagonal np.fill_diagonal(expY, np.zeros(n)) u = np.triu(expY, 1).sum() return s + epsilon * np.log(u)
def fit(kernel_x, kernel_y, base_density, X, Y, lmbda): n_y, d_y = Y.shape K_X = kernel_x.kernel(X) h = compute_h(kernel_y, base_density, Y, K_X) G = compute_G(kernel_y, Y, K_X) np.fill_diagonal(G, np.diag(G) + n_y * lmbda) cho_lower = lg.cho_factor(G) beta = lg.cho_solve(cho_lower, h / lmbda) return beta.reshape(n_y, d_y)
def egrad(Y): """Derivative of the cost function.""" # tmp = -1*(np.ones(D.shape) - (Y.T@Y)**2 + np.eye(D.shape[0]))**(-0.5) zero_tol = 1e-12 ip = acos_validate(Y.T @ Y) tmp = np.ones(D.shape) - (ip)**2 idx = np.where(np.abs(tmp) < zero_tol) # Avoid division by zero. tmp[idx] = 1 tmp = -1 * tmp**(-0.5) fill_val = np.min(tmp) # All entries are negative. tmp[idx] = fill_val # Make non-diagonal zeros large. np.fill_diagonal(tmp, 0) # Ignore known zeros on diagonal. return 2 * Y @ ((np.arccos(np.abs(ip)) - D) * tmp * np.sign(ip))
def _init_params(self, data, lengths=None, params='stmp'): X = data['obs'] if 's' in params: self.startprob_.fill(1.0 / self.n_components) if 't' in params or 'm' in params or 'p' in params: kmmod = cluster.KMeans(n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ if 't' in params: # TODO: estimate transitions from data (!) / consider n_tied=1 if self.n_tied == 0: transmat = np.ones([self.n_components, self.n_components]) np.fill_diagonal(transmat, 10.0) self.transmat_ = transmat # .90 for self-transition else: transmat = np.zeros((self.n_components, self.n_components)) transmat[range(self.n_components), range(self.n_components)] = 100.0 # diagonal transmat[range(self.n_components-1), range(1, self.n_components)] = 1.0 # diagonal + 1 transmat[[r * (self.n_chain) - 1 for r in range(1, self.n_unique+1) for c in range(self.n_unique-1)], [c * (self.n_chain) for r in range(self.n_unique) for c in range(self.n_unique) if c != r]] = 1.0 self.transmat_ = np.copy(transmat) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): mu_init[u][f] = kmeans[u, f] self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = np.linalg.inv(np.cov(X[kmmod.labels_ == u], bias = 1)) else: precision_init[u] = np.linalg.inv(np.cov(np.transpose(X[kmmod.labels_ == u]))) self.precision_ = np.copy(precision_init)
def hess_k(ws, fdensity, alpha, sig, psf_k): #print('hess_k begin'); #mo = np.exp(-4.); #ws = real_to_complex(ws); #ws = ws.reshape((n_grid,n_grid)); #ws = np.real(fft.ifft2(ws)); #calc l1 we only get diagonals here l1 = -1 * (psf_k**2 / sig_noise**2 / n_grid**2).flatten() hess_l1 = np.zeros((2 * n_grid**2, 2 * n_grid**2), dtype=complex) np.fill_diagonal(hess_l1, complex_to_real(l1)) l_tot = hess_l1 #print('hess is:'); print(l_tot) return l_tot
def m_step(self, expectations, datas, inputs, masks, tags, samples, **kwargs): # Update the transition matrix between super states P = sum([np.sum(Ezzp1, axis=0) for _, Ezzp1, _ in expectations]) + 1e-16 np.fill_diagonal(P, 0) P /= P.sum(axis=-1, keepdims=True) self.Ps = P # Fit negative binomial models for each duration based on sampled states states, durations = map(np.concatenate, zip(*[rle(z_smpl) for z_smpl in samples])) for k in range(self.K): self.rs[k], self.ps[k] = \ fit_negative_binomial_integer_r(durations[states == k], self.r_min, self.r_max) # Reset the transition matrix self._transition_matrix = None
def __init__(self, K, D, M=0): super(NegativeBinomialSemiMarkovTransitions, self).__init__(K, D, M=M) # Initialize the super state transition probabilities self.Ps = npr.rand(K, K) np.fill_diagonal(self.Ps, 0) self.Ps /= self.Ps.sum(axis=1, keepdims=True) # Initialize the negative binomial duration probabilities self.rs = npr.randint(1, 11, size=K) # self.rs = np.ones(K, dtype=int) # self.ps = npr.rand(K) self.ps = 0.5 * np.ones(K) # Initialize the transition matrix self._trans_matrix = None
def fubinistudy(X): """Distance matrix of X using Fubini-Study metric. Parameters ---------- X : ndarray (complex, d,n) Data. Returns ------- D : ndarray (real, n,n) Distance matrix. """ D = np.arccos(np.sqrt((X.conj().T @ X) * (X.conj().T @ X).conj().T)) np.fill_diagonal(D, 0) # Things work better if diagonal is exactly zero. return np.real(D)
def __init__(self, K, D, M=0, r_min=1, r_max=20): assert K > 1, "Explicit duration models only work if num states > 1." super(NegativeBinomialSemiMarkovTransitions, self).__init__(K, D, M=M) # Initialize the super state transition probabilities self.Ps = npr.rand(K, K) np.fill_diagonal(self.Ps, 0) self.Ps /= self.Ps.sum(axis=1, keepdims=True) # Initialize the negative binomial duration probabilities self.r_min, self.r_max = r_min, r_max self.rs = npr.randint(r_min, r_max + 1, size=K) # self.rs = np.ones(K, dtype=int) # self.ps = npr.rand(K) self.ps = 0.5 * np.ones(K) # Initialize the transition matrix self._transition_matrix = None
def complex_as_matrix(z, n): """Represent a complex number as a matrix. Parameters ---------- z : complex float n : int (even) Returns ------- Z : ndarray (n,n) Real-valued n*n tri-diagonal matrix representing z in the ring of n*n matrices. """ Z = np.zeros((n, n)) ld = np.zeros(n - 1) ld[0::2] = np.imag(z) np.fill_diagonal(Z[1:], ld) Z = Z - Z.T np.fill_diagonal(Z, np.real(z)) return Z
def Kx_D_given_W(db, setX=None, setW=None): if setX is None: outX = db['data'].X.dot(db['W']) else: outX = setX.dot(db['W']) if setW is None: outX = db['data'].X.dot(db['W']) else: outX = db['data'].X.dot(setW) #print(outX[0:5,0:5]) if db['kernel_type'] == 'rbf': Kx = rbk_sklearn(outX, db['data'].σ) elif db['kernel_type'] == 'relative': Kx = rbk_relative_σ(db, outX) elif db['kernel_type'] == 'rbf_slow': Kx = rbk_sklearn(outX, db['data'].σ) elif db['kernel_type'] == 'linear': Kx = outX.dot(outX.T) elif db['kernel_type'] == 'polynomial': Kx = poly_sklearn(outX, db['poly_power'], db['poly_constant']) elif db['kernel_type'] == 'squared': Kx = squared_kernel(outX) elif db['kernel_type'] == 'multiquadratic': Kx = multiquadratic_kernel(outX) elif db['kernel_type'] == 'mkl': # multiple kernel learning Kx = mkl_kernel(db) else: print( '\nError in kernel_lib.py, within Kx_D_given_W, unrecognized kernel type : %s\n\n' % db['kernel_type']) sys.exit() np.fill_diagonal(Kx, 0) # Set diagonal of adjacency matrix to 0 D = compute_inverted_Degree_matrix(Kx) #if np.isnan(D).any(): # Kx = Kx - np.min(Kx) # D = compute_inverted_Degree_matrix(Kx) return [Kx, D]
def doubleIntAI(simulation, iterations): # environment parameters x = np.zeros((hidden_states, temp_orders_states)) # position v = np.zeros((hidden_causes, temp_orders_states - 1)) y = np.zeros((obs_states, temp_orders_states)) eta = np.zeros((hidden_causes, temp_orders_states - 1)) ### free energy variables # parameters for generative model if simulation == 0: alpha = np.exp(2) alpha2 = np.exp(1) elif simulation == 1: alpha = np.exp(1) alpha2 = np.exp(.5) elif simulation == 2: alpha = np.exp(-1) alpha2 = np.exp(0) elif simulation == 3: alpha = np.exp(2) alpha2 = np.exp(1) beta = np.exp(1) A_gm = np.array([[0, 1, 0], [-alpha, -alpha2, 0], [0, 0, 0]]) # state transition matrix B_gm = np.array([[0, 0, 0], [0, beta, 0], [0, 0, 0]]) # input matrix H_gm = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 0]]) # measurement matrix # actions a = np.zeros((hidden_states, temp_orders_states - 1)) # states mu_x = np.zeros((hidden_states, temp_orders_states)) # inputs v = np.zeros((hidden_causes, temp_orders_causes - 1)) # minimisation variables and parameters dFdmu_x = np.zeros((hidden_states, temp_orders_states)) Dmu_x = np.zeros((hidden_states, temp_orders_states)) k_mu_x = 1 # learning rate perception k_a = np.exp(14) # learning rate action # noise on sensory input (world - generative process) gamma_z = 0 * np.ones((obs_states, obs_states)) # log-precisions #gamma_z[:,1] = gamma_z[:,0] - np.log(2 * gamma) pi_z = np.zeros((obs_states, obs_states)) np.fill_diagonal(pi_z, np.exp(gamma_z)) sigma_z = np.linalg.inv(splin.sqrtm(pi_z)) z = np.random.randn(iterations, obs_states) # noise on motion of hidden states (world - generative process) gamma_w = 2 # log-precision pi_w = np.zeros((hidden_states, hidden_states)) np.fill_diagonal(pi_w, np.exp(gamma_w)) sigma_w = np.linalg.inv(splin.sqrtm(pi_w)) w = np.random.randn(iterations, hidden_states) # agent's estimates of the noise (agent - generative model) mu_gamma_z = -8 * np.identity((obs_states)) # log-precisions mu_gamma_z[1, 1] = mu_gamma_z[0, 0] - np.log(2 * gamma) mu_gamma_z[2, 2] = mu_gamma_z[1, 1] - np.log(2 * gamma) mu_pi_z = np.exp(mu_gamma_z) * np.identity((obs_states)) mu_gamma_w = -1 * np.identity((hidden_states)) # log-precision mu_gamma_w[1, 1] = mu_gamma_w[0, 0] - np.log(2 * gamma) mu_gamma_w[2, 2] = mu_gamma_w[1, 1] - np.log(2 * gamma) mu_pi_w = np.exp(mu_gamma_w) * np.identity((hidden_states)) # history y_history = np.zeros((iterations, obs_states, temp_orders_states)) psi_history = np.zeros((iterations, obs_states, temp_orders_states - 1)) mu_x_history = np.zeros((iterations, hidden_states, temp_orders_states)) a_history = np.zeros((iterations, obs_states, temp_orders_states)) FE_history = np.zeros((iterations, )) v_history = np.zeros((iterations, hidden_causes, temp_orders_states - 1)) x = 300 * np.random.rand(hidden_states, temp_orders_states) - 150 x[1, 0] = x[0, 1] x[2, 0] = x[1, 1] x[2, 1] = 0. # if the initialisation is too random, then this agent becomes ``disillusioned'' mu_x[0, 0] = x[0, 0] + .1 * np.random.randn() mu_x[1, 0] = x[0, 1] + .1 * np.random.randn() mu_x[0, 1] = mu_x[1, 0] # automatic differentiation dFdmu_states = grad(F, 1) for i in range(iterations - 1): if simulation == 3 and i >= iterations / 2: v[1, 0] = 50 mu_x_history[ i, :, :] = mu_x # save it at the very beginning since the first jump is rather quick y[:, :] = getObservation(x, v, a, np.dot(np.dot(C, sigma_w), w[i, :])) y[2, 0] = y[ 1, 1] # manually assign the acceleration as observed by the agent psi = y[:, :-1] + np.dot(np.dot(D, sigma_z), z[i, :, None]) ### minimise free energy ### # perception dFdmu_x = dFdmu_states(psi, mu_x, eta, mu_pi_z, mu_pi_w, A_gm, B_gm, H_gm) Dmu_x = mode_path(mu_x) # action dFdy = np.dot(mu_pi_z, (psi - mu_x[:, :-1])) dyda = np.ones((obs_states, temp_orders_states - 1)) # save history y_history[i, :] = y psi_history[i, :] = psi mu_x_history[i, :, :] = mu_x a_history[i] = a v_history[i] = v FE_history[i] = F(psi, mu_x, eta, mu_gamma_z, mu_pi_w, A_gm, B_gm, H_gm) # update equations mu_x += dt * k_mu_x * (Dmu_x - dFdmu_x) a[1, 0] += dt * -k_a * dyda.transpose().dot(dFdy) return psi_history, mu_x_history, a_history, v_history
def rbk_sklearn(data, σ): gammaV = 1.0 / (2 * σ * σ) rbk = sklearn.metrics.pairwise.rbf_kernel(data, gamma=gammaV) np.fill_diagonal(rbk, 0) # Set diagonal of adjacency matrix to 0 return rbk
for key in keys_del: try: del theta_dict_sv[key] except KeyError: print("key not found") # TODO: test affinity shocks on nonzero other values id = 5 b_test = np.array([0.3, 1., 1., 1., 0.1, 0.4]) epsilon = np.zeros((pecmy.N, pecmy.N)) wv_m = pecmy.war_vals(b_test, m, theta_dict, epsilon) # calculate war values ids_j = np.delete(np.arange(pecmy.N), id) wv_m_i = wv_m[:, id][ids_j] tau_hat_nft = 1.25 / pecmy.ecmy.tau np.fill_diagonal(tau_hat_nft, 1) ge_x_sv = np.ones(pecmy.x_len) ge_dict = pecmy.ecmy.rewrap_ge_dict(ge_x_sv) tau_hat_sv = ge_dict["tau_hat"] tau_hat_sv[id] = tau_hat_nft[id] # start slightly above free trade ge_dict_sv = pecmy.ecmy.geq_solve(tau_hat_sv, np.ones(pecmy.N)) ge_x_sv = pecmy.ecmy.unwrap_ge_dict(ge_dict_sv) test = pecmy.br(ge_x_sv, b_test, m, wv_m_i, id) test # wv_m # wv_m_i # # m = pecmy.M / np.ones((pecmy.N, pecmy.N)) # m = m.T
def MI2AMI(y, n_clusters, r, k, init, var_distrib, nj,\ nan_mask, target_nb_pseudo_obs = 500, it = 50, \ eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\ dm = [], max_patience = 1): # dm: Hack to remove ''' Complete the missing values using a trained M1DGMM y (numobs x p ndarray): The observations containing mixed variables n_clusters (int): The number of clusters to look for in the data r (list): The dimension of latent variables through the first 2 layers k (list): The number of components of the latent Gaussian mixture layers init (dict): The initialisation parameters for the algorithm var_distrib (p 1darray): An array containing the types of the variables in y nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable nan_mask (ndarray): A mask array equal to True when the observation value is missing False otherwise target_nb_pseudo_obs (int): The number of pseudo-observations to generate it (int): The maximum number of MCEM iterations of the algorithm eps (float): If the likelihood increase by less than eps then the algorithm stops maxstep (int): The maximum number of optimisation step for each variable seed (int): The random state seed to set (Only for numpy generated data for the moment) perform_selec (Bool): Whether to perform architecture selection or not dm (np array): The distance matrix of the observations. If not given M1DGMM computes it n_neighbors (int): The number of neighbors to use for NA imputation ------------------------------------------------------------------------------------------------ returns (dict): The predicted classes, the likelihood through the EM steps and a continuous representation of the data ''' # !!! Hack cols = y.columns # Formatting if not isinstance(nan_mask, np.ndarray): nan_mask = np.asarray(nan_mask) if not isinstance(y, np.ndarray): y = np.asarray(y) assert len(k) < 2 # Not implemented for deeper MDGMM for the moment # Keep complete observations complete_y = y[~np.isnan(y.astype(float)).any(1)] completed_y = deepcopy(y) out = M1DGMM(complete_y, 'auto', r, k, init, var_distrib, nj, it,\ eps, maxstep, seed, perform_selec = perform_selec,\ dm = dm, max_patience = max_patience, use_silhouette = True) # Compute the associations vc = vars_contributions(pd.DataFrame(complete_y, columns = cols), out['Ez.y'], assoc_thr = 0.0, \ title = 'Contribution of the variables to the latent dimensions',\ storage_path = None) # Upacking the model from the M1DGMM output #p = y.shape[1] k = out['best_k'] r = out['best_r'] mu = out['mu'][0] lambda_bin = np.array(out['lambda_bin']) lambda_ord = out['lambda_ord'] lambda_categ = out['lambda_categ'] lambda_cont = np.array(out['lambda_cont']) nj_bin = nj[pd.Series(var_distrib).isin(['bernoulli', 'binomial'])].astype(int) nj_ord = nj[var_distrib == 'ordinal'].astype(int) nj_categ = nj[var_distrib == 'categorical'].astype(int) nb_cont = np.sum(var_distrib == 'continuous') nb_bin = np.sum(var_distrib == 'binomial') y_std = complete_y[:,var_distrib == 'continuous'].astype(float).std(axis = 0,\ keepdims = True) cat_features = var_distrib != 'categorical' # Compute the associations between variables and use them as weights for the optimisation assoc = cosine_similarity(vc, dense_output=True) np.fill_diagonal(assoc, 0.0) assoc = np.abs(assoc) weights = (assoc / assoc.sum(1, keepdims=True)) #============================================== # Optimisation sandbox #============================================== # Define the observation generated by the center of each cluster cluster_obs = [impute(mu[kk,:,0], var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\ lambda_ord, nj_ord, lambda_cont, y_std) for kk in range(k[0])] # Use only of the observed variables as references types = {'bin': ['bernoulli', 'binomial'], 'categ': ['categorical'],\ 'cont': ['continuous'], 'ord': 'ordinal'} # Gradient optimisation nan_indices = np.where(nan_mask.any(1))[0] imputed_y = np.zeros_like(y) numobs = y.shape[0] #************************************ # Linear constraint to stay in the support of continuous variables #************************************ lb = np.array([]) ub = np.array([]) A = np.array([[]]).reshape((0, r[0])) if nb_bin > 0: ## Corrected Binomial bounds (ub is actually +inf) bin_indices = var_distrib[np.logical_or(var_distrib == 'bernoulli', var_distrib == 'binomial')] binomial_indices = bin_indices == 'binomial' lb_bin = np.nanmin(y[:, var_distrib == 'binomial'], 0) lb_bin = logit( lb_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices, 0] ub_bin = np.nanmax(y[:, var_distrib == 'binomial'], 0) ub_bin = logit( ub_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices, 0] A_bin = lambda_bin[binomial_indices, 1:] ## Concatenate the constraints lb = np.concatenate([lb, lb_bin]) ub = np.concatenate([ub, ub_bin]) A = np.concatenate([A, A_bin], axis=0) if nb_cont > 0: ## Corrected Gaussian bounds lb_cont = np.nanmin(y[:, var_distrib == 'continuous'], 0) / y_std[0] - lambda_cont[:, 0] ub_cont = np.nanmax(y[:, var_distrib == 'continuous'], 0) / y_std[0] - lambda_cont[:, 0] A_cont = lambda_cont[:, 1:] ## Concatenate the constraints lb = np.concatenate([lb, lb_cont]) ub = np.concatenate([ub, ub_cont]) A = np.concatenate([A, A_cont], axis=0) lc = LinearConstraint(A, lb, ub, keep_feasible=True) zz = [] fun = [] for i in range(numobs): if i in nan_indices: # Design the nan masks for the optimisation process nan_mask_i = nan_mask[i] weights_i = weights[nan_mask_i].mean(0) # Look for the best starting point cluster_dist = [error(y[i, ~nan_mask_i], obs[~nan_mask_i],\ cat_features[~nan_mask_i], weights_i)\ for obs in cluster_obs] z02 = mu[np.argmin(cluster_dist), :, 0] # Formatting vars_i = {type_alias: np.where(~nan_mask_i[np.isin(var_distrib, vartype)])[0] \ for type_alias, vartype in types.items()} complete_categ = [ l for idx, l in enumerate(lambda_categ) if idx in vars_i['categ'] ] complete_ord = [ l for idx, l in enumerate(lambda_ord) if idx in vars_i['ord'] ] opt = minimize(stat_all, z02, \ args = (y[i, ~nan_mask_i], var_distrib[~nan_mask_i],\ weights_i[~nan_mask_i],\ lambda_bin[vars_i['bin']], nj_bin[vars_i['bin']],\ complete_categ,\ nj_categ[vars_i['categ']],\ complete_ord,\ nj_ord[vars_i['ord']],\ lambda_cont[vars_i['cont']], y_std[:, vars_i['cont']]), tol = eps, method='trust-constr', jac = grad_stat,\ constraints = lc, options = {'maxiter': 1000}) z = opt.x zz.append(z) fun.append(opt.fun) imputed_y[i] = impute(z, var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\ lambda_ord, nj_ord, lambda_cont, y_std) else: imputed_y[i] = y[i] completed_y = np.where(nan_mask, imputed_y, y) out['completed_y'] = completed_y out['zz'] = zz out['fun'] = fun return (out)
# v = np.ones(N) # v = np.array([1.08, 1.65, 1.61, 1.05, 1.05, 1.30]) # v = np.repeat(1.4, N) # TODO try just running inner loop, problem is that values of v change with theta as well, no reason we should run theta until covergence rather than iterating on v first. imp.reload(policies) imp.reload(economy) pecmy = policies.policies(data, params, ROWname) pecmy.W m_diag = np.diagonal(pecmy.m) m_frac = pecmy.m / m_diag m_frac[:, N - 1] tau_min_mat = copy.deepcopy(pecmy.ecmy.tau) np.fill_diagonal(tau_min_mat, 5) theta_dict = dict() theta_dict["eta"] = 1. theta_dict["c_hat"] = 25. theta_dict["alpha1"] = 0. theta_dict["alpha2"] = 0. theta_dict["gamma"] = 0. theta_dict["C"] = np.repeat(25., pecmy.N) theta_x = pecmy.unwrap_theta(theta_dict) # opt.root(pecmy.pp_wrap_alpha, .5, args=(.99, ))['x'] # pecmy.W ** - .75 np.reshape( np.repeat(np.max(pecmy.ecmy.tau + pecmy.tau_buffer, axis=1), pecmy.N), (pecmy.N, pecmy.N)) / pecmy.ecmy.tau
def ord_params_GLLVM(y_ord, nj_ord, lambda_ord_old, ps_y, pzl1_ys, zl1_s, AT,\ tol = 1E-5, maxstep = 100): ''' Determine the GLLVM coefficients related to ordinal coefficients by optimizing each column coefficients separately. y_ord (numobs x nb_ord nd-array): The ordinal data nj_ord (list of int): The number of modalities for each ord variable lambda_ord_old (list of nb_ord_j x (nj_ord + r1) elements): The ordinal coefficients of the previous iteration ps_y ((numobs, S) nd-array): p(s | y) for all s in Omega pzl1_ys (nd-array): p(z1 | y, s) zl1_s ((M1, r1, s1) nd-array): z1 | s AT ((r1 x r1) nd-array): Var(z1)^{-1/2} tol (int): Control when to stop the optimisation process maxstep (int): The maximum number of optimization step. ---------------------------------------------------------------------- returns (list of nb_ord_j x (nj_ord + r1) elements): The new ordinal coefficients ''' #**************************** # Ordinal link parameters #**************************** r0 = zl1_s.shape[1] S0 = zl1_s.shape[2] nb_ord = len(nj_ord) new_lambda_ord = [] for j in range(nb_ord): enc = OneHotEncoder(categories='auto') y_oh = enc.fit_transform(y_ord[:, j][..., n_axis]).toarray() # Define the constraints such that the threshold coefficients are ordered nb_constraints = nj_ord[j] - 2 nb_params = nj_ord[j] + r0 - 1 lcs = np.full(nb_constraints, -1) lcs = np.diag(lcs, 1) np.fill_diagonal(lcs, 1) lcs = np.hstack([lcs[:nb_constraints, :], \ np.zeros([nb_constraints, nb_params - (nb_constraints + 1)])]) linear_constraint = LinearConstraint(lcs, np.full(nb_constraints, -np.inf), \ np.full(nb_constraints, 0), keep_feasible = True) opt = minimize(ord_loglik_j, lambda_ord_old[j] ,\ args = (y_oh, zl1_s, S0, ps_y, pzl1_ys, nj_ord[j]), tol = tol, method='trust-constr', jac = ord_grad_j, \ constraints = linear_constraint, hess = '2-point',\ options = {'maxiter': maxstep}) res = opt.x if not (opt.success ): # If the program fail, keep the old estimate as value res = lambda_ord_old[j] warnings.warn('One of the ordinal optimisations has failed', RuntimeWarning) # Ensure identifiability for Lambda_j new_lambda_ord_j = (res[-r0:].reshape(1, r0) @ AT[0]).flatten() new_lambda_ord_j = np.hstack( [deepcopy(res[:nj_ord[j] - 1]), new_lambda_ord_j]) new_lambda_ord.append(new_lambda_ord_j) return new_lambda_ord
def m_step(self, expectations, datas, inputs, masks, tags, optimizer="adam", num_iters=5, **kwargs): """ to find most likely labels, ell_labels; coordinate descent {\label_k} = argmax E_{z~p(z|x)}[log p(z)] \likelihood(\theta) = E_{z~p(z|x)}[\sum_{t=1}^T-1 log p(z_{t+1} | z_t; \theta)] weights entries are E[z_t = k], E[z_t = k, z_{t+1}=k'], log p(x_{1:T}) """ K = self.K zzps = np.concatenate([Ezzp1 for _, Ezzp1, _ in expectations]) # T by K by K ell_labels, dist_norm, L, log_p = self.ell_labels, self.dist_norm, self.L, self.log_p for itr in range(num_iters): for kk in range(K): # index kk ### create null matrix with all possible values for the k-th label ### while fixing all other k-1 labels the same ### I. 'changing' k = ell_labels[kk] ell_labels_new = np.array([ell_labels] * K) ell_labels_new[:, kk] = ell_labels ### II. 'swaping': add a line np.fill_diagonal(ell_labels_new, np.repeat(k, K)) log_L = np.zeros(K) ### for every possible swapping for l in range(K): # row index ell_labels_new_eg = ell_labels_new[l, :] log_p_new = log_p[ell_labels_new_eg] ### compute log_transition matrix dist_labeled = np.zeros((K, K)) for i in range(K): for j in range(K): dist_labeled[i, j] = dist_norm[ell_labels_new_eg[i], ell_labels_new_eg[j]] log_Ps = -dist_labeled / L log_Ps += np.diag(log_p_new) log_Ps -= logsumexp(log_Ps, axis=1, keepdims=True) ### compuate log_likelihood log_L[l] = np.sum(zzps * log_Ps[None, :, :]) ### update k-the label with mle ell_labels = ell_labels_new[np.argmax(log_L), :] self.ell_labels = ell_labels
def hess_k(ws, fdensity, alpha, sig, psf_k): print('hess_k begin') mo = np.exp(-4.) ws = real_to_complex(ws) ws = ws.reshape((n_grid, n_grid)) ws = np.real(fft.ifft2(ws)) #calc l1 we only get diagonals here l1 = -1 * (psf_k**2 / sig_noise**2 / n_grid**2).flatten() #calc l2, the hessian of the prior is messy xsi = (1. - fdensity) * gaussian(np.log(ws), loc=np.log( mo), scale=sig) / ws + fdensity * (ws**alpha / w_norm) dxsi = -1 * gaussian(np.log(ws), loc=np.log(mo), scale=sig) * ( 1. - fdensity) / ws**2 - (1. - fdensity) * np.log(ws / mo) * np.exp( -np.log(ws / mo)**2 / 2 / sig**2) / np.sqrt( 2 * np.pi) / ws**2 / sig**3 + fdensity * alpha * ws**( alpha - 1) / w_norm dxsi_st = -1 * gaussian(np.log(ws), loc=np.log(mo), scale=sig) * ( 1. - fdensity) / ws**2 - (1. - fdensity) * np.log(ws / mo) * np.exp( -np.log(ws / mo)**2 / 2 / sig**2) / np.sqrt( 2 * np.pi) / ws**2 / sig**3 ddxsi_st = -1 * dxsi_st / ws - dxsi_st * np.log(ws / mo) / ws / sig**2 - ( 1. - fdensity) * (1 / np.sqrt(2 * np.pi) / sig) * np.exp( -np.log(ws / mo)**2 / 2 / sig**2) * (1 / sig**2 - np.log(ws / mo) / sig**2 - 1) / ws**3 ddxsi = ddxsi_st + fdensity * alpha * (alpha - 1) * ws**(alpha - 2) / w_norm l2 = -1 * (dxsi / xsi)**2 + ddxsi / np.absolute(xsi) #this is the hessian of the prior wrt m_x, not m_k l2_k = fft.ifft2(l2).flatten() / n_grid**2 #we assume that hessian of l2 is diagonal. Under assumption k = -k', then we only get the zeroth element along the diag #lets fill the entire matrix and see whats up; hess_m = np.zeros((n_grid**2, n_grid**2), dtype=complex) hess_l1 = np.zeros((n_grid**2, n_grid**2), dtype=complex) np.fill_diagonal(hess_l1, l1) off = [] #print(l2_k[0]); for i in range(0, n_grid**2): for j in range(0, n_grid**2): hess_m[i, j] = l2_k[int(np.absolute(i - j))] #check the off diagonals to make sure they are small if i != j: off.append(l2_k[int(np.absolute(i - j))]) hess_m = hess_l1 + hess_m ''' print('Sigma Real is:'); print(np.std(np.real(off))); print('Simga Imag is:'); print(np.std(np.imag(off))); fig, ax = plt.subplots(1,2) ax[0].imshow(np.real(hess_m)); ax[0].set_title('Real Hessian') #ax[1].imshow(data3[:-4,:-4]); ax[1].imshow(np.imag(hess_m)); ax[1].set_title('Imaginary Hessian') plt.show(); ''' l_tot = np.diagonal(hess_m) l_minr = min(np.real(l_tot)) l_mini = min(np.imag(l_tot)) #print(l_tot-l1); if l_minr < 0: l_tot = l_tot - l_minr + 0.1 if l_mini < 0: l_tot = l_tot - 1j * (l_mini + 0.1) ''' print('diag is:'); print(l2_k[0]); print('other is:'); print(l1); ''' ''' hess_m = np.zeros((n_grid**2,n_grid**2)); np.fill_diagonal(hess_m,l_tot); return hess_m; ''' #return l1,l2_k[0]; l_tot = complex_to_real(l_tot) #print('hess is'); #print(l_tot); return l_tot